diff --git a/.asf.yaml b/.asf.yaml
index 99fd6fac22c76..0588a300a5ca8 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -54,72 +54,14 @@ github:
     # needs to be updated as part of the release process
     # .asf.yaml doesn't support wildcard branch protection rules, only exact branch names
     # https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#branch-protection
-    # Keeping set of protected branches for future releases
-    # Meanwhile creating a prerelease script that will update the branch protection names
-    # automatically. Keep track on it https://github.com/apache/datafusion/issues/17134
+    # these branches protection blocks autogenerated during release process which is described in
+    # https://github.com/apache/datafusion/tree/main/dev/release#2-add-a-protection-to-release-candidate-branch
     branch-50:
       required_pull_request_reviews:
         required_approving_review_count: 1
     branch-51:
       required_pull_request_reviews:
         required_approving_review_count: 1
-    branch-52:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-53:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-54:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-55:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-56:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-57:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-58:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-59:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-60:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-61:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-62:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-63:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-64:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-65:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-66:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-67:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-68:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-69:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
-    branch-70:
-      required_pull_request_reviews:
-        required_approving_review_count: 1
   pull_requests:
     # enable updating head branches of pull requests
     allow_update_branch: true
@@ -129,3 +71,4 @@ github:
 # https://datafusion.apache.org/
 publish:
   whoami: asf-site
+
diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
index 22d2f2187dd07..6228370c955a9 100644
--- a/.github/actions/setup-builder/action.yaml
+++ b/.github/actions/setup-builder/action.yaml
@@ -46,3 +46,17 @@ runs:
       # https://github.com/actions/checkout/issues/766
       shell: bash
       run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+    - name: Remove unnecessary preinstalled software
+      shell: bash
+      run: |
+        echo "Disk space before cleanup:"
+        df -h 
+        apt-get clean
+        # remove tool cache: about 8.5GB (github has host /opt/hostedtoolcache mounted as /__t)
+        rm -rf /__t/* || true
+        # remove Haskell runtime: about 6.3GB (host /usr/local/.ghcup)
+        rm -rf /host/usr/local/.ghcup || true
+        # remove Android library: about 7.8GB (host /usr/local/lib/android)
+        rm -rf /host/usr/local/lib/android || true
+        echo "Disk space after cleanup:"
+        df -h
\ No newline at end of file
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index f269331e83ca7..b09e82bb8602d 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -40,9 +40,9 @@ jobs:
   security_audit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Install cargo-audit
-        uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b  # v2.62.47
+        uses: taiki-e/install-action@de7896b7cd1c7d181266425abbe571b5a8c757bc  # v2.65.3
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/dependencies.yml b/.github/workflows/dependencies.yml
index 7e736e1a7afbf..fef65870b697d 100644
--- a/.github/workflows/dependencies.yml
+++ b/.github/workflows/dependencies.yml
@@ -44,7 +44,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -62,7 +62,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Install cargo-machete
         run: cargo install cargo-machete --version ^0.9 --locked
       - name: Detect unused dependencies
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index cc879f66cc936..1ec7c16b488f5 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -32,8 +32,9 @@ jobs:
     runs-on: ubuntu-latest
     name: Check License Header
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Install HawkEye
+      # This CI job is bound by installation time, use `--profile dev` to speed it up
         run: cargo install hawkeye --version 6.2.0 --locked --profile dev
       - name: Run license header check
         run: ci/scripts/license_header.sh
@@ -42,18 +43,25 @@ jobs:
     name: Use prettier to check formatting of documents
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
-      - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903  # v6.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
+      - uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f  # v6.1.0
         with:
           node-version: "20"
       - name: Prettier check
-        run: |
-          # if you encounter error, rerun the command below and commit the changes
-          #
-          # ignore subproject CHANGELOG.md because they are machine generated
-          npx prettier@2.7.1 --write \
-            '{datafusion,datafusion-cli,datafusion-examples,dev,docs}/**/*.md' \
-            '!datafusion/CHANGELOG.md' \
-            README.md \
-            CONTRIBUTING.md
-          git diff --exit-code
+      # if you encounter error, see instructions inside the script
+        run: ci/scripts/doc_prettier_check.sh
+
+  typos:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
+        with:
+          persist-credentials: false
+      # Version fixed on purpose. It uses heuristics to detect typos, so upgrading 
+      # it may cause checks to fail more often.
+      # We can upgrade it manually once a while.
+      - name: Install typos-cli
+        run: cargo install typos-cli --locked --version 1.37.0
+      - name: Run typos check
+        run: ci/scripts/typos_check.sh
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 588bf46aaca70..3e2c48643c366 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -32,16 +32,16 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout docs sources
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
 
       - name: Checkout asf-site branch
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           ref: asf-site
           path: asf-site
 
       - name: Setup Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c  # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548  # v6.1.0
         with:
           python-version: "3.12"
 
diff --git a/.github/workflows/docs_pr.yaml b/.github/workflows/docs_pr.yaml
index c182f2ef85d23..81eeb4039ba97 100644
--- a/.github/workflows/docs_pr.yaml
+++ b/.github/workflows/docs_pr.yaml
@@ -40,12 +40,12 @@ jobs:
     name: Test doc build
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
       - name: Setup Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c  # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548  # v6.1.0
         with:
           python-version: "3.12"
       - name: Install doc dependencies
diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml
index 85e40731a9592..01de0d5b77a7a 100644
--- a/.github/workflows/extended.yml
+++ b/.github/workflows/extended.yml
@@ -69,7 +69,7 @@ jobs:
     runs-on: ubuntu-latest
     # note: do not use amd/rust container to preserve disk space
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
@@ -93,7 +93,7 @@ jobs:
     runs-on: ubuntu-latest
     # note: do not use amd/rust container to preserve disk space
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
@@ -137,7 +137,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
@@ -158,7 +158,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 0abf535b9741f..01e21115010fc 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -39,7 +39,7 @@ jobs:
       contents: read
       pull-requests: write
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
 
       - name: Assign GitHub labels
         if: |
diff --git a/.github/workflows/large_files.yml b/.github/workflows/large_files.yml
index 9cbfd6030a7f6..b96b8cd4544ee 100644
--- a/.github/workflows/large_files.yml
+++ b/.github/workflows/large_files.yml
@@ -29,7 +29,7 @@ jobs:
   check-files:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           fetch-depth: 0
       - name: Check size of new Git objects
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c57300eec0e4d..2a907ba7e5b14 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -49,13 +49,13 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           shared-key: "amd-ci-check" # this job uses it's own cache becase check has a separate cache and we need it to be fast as it blocks other jobs
           save-if: ${{ github.ref_name == 'main' }}
@@ -77,7 +77,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -102,13 +102,13 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           save-if: false # set in linux-test
           shared-key: "amd-ci"
@@ -139,7 +139,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -170,13 +170,13 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           save-if: false # set in linux-test
           shared-key: "amd-ci"
@@ -209,8 +209,6 @@ jobs:
         run: cargo check --profile ci --no-default-features -p datafusion --features=math_expressions
       - name: Check datafusion (parquet)
         run: cargo check --profile ci --no-default-features -p datafusion --features=parquet
-      - name: Check datafusion (pyarrow)
-        run: cargo check --profile ci --no-default-features -p datafusion --features=pyarrow
       - name: Check datafusion (regex_expressions)
         run: cargo check --profile ci --no-default-features -p datafusion --features=regex_expressions
       - name: Check datafusion (recursive_protection)
@@ -237,7 +235,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -271,8 +269,10 @@ jobs:
     runs-on: ubuntu-latest
     container:
       image: amd64/rust
+      volumes:
+        - /usr/local:/host/usr/local
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -281,7 +281,7 @@ jobs:
         with:
           rust-version: stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
          save-if: ${{ github.ref_name == 'main' }}
          shared-key: "amd-ci"
@@ -318,14 +318,14 @@ jobs:
     needs: linux-build-lib
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
       - name: Setup Rust toolchain
         run: rustup toolchain install stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           save-if: false # set in linux-test
           shared-key: "amd-ci"
@@ -349,7 +349,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -358,23 +358,10 @@ jobs:
         with:
           rust-version: stable
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           save-if: ${{ github.ref_name == 'main' }}
           shared-key: "amd-ci-linux-test-example"
-      - name: Remove unnecessary preinstalled software
-        run: |
-          echo "Disk space before cleanup:"
-          df -h
-          apt-get clean
-          rm -rf /__t/CodeQL
-          rm -rf /__t/PyPy
-          rm -rf /__t/Java_Temurin-Hotspot_jdk
-          rm -rf /__t/Python
-          rm -rf /__t/go
-          rm -rf /__t/Ruby
-          echo "Disk space after cleanup:"
-          df -h
       - name: Run examples
         run: |
           # test datafusion-sql examples
@@ -392,7 +379,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -413,7 +400,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -425,7 +412,7 @@ jobs:
     name: build and run with wasm-pack
     runs-on: ubuntu-24.04
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup for wasm32
         run: |
           rustup target add wasm32-unknown-unknown
@@ -434,7 +421,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b  # v2.62.47
+        uses: taiki-e/install-action@de7896b7cd1c7d181266425abbe571b5a8c757bc  # v2.65.3
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -453,7 +440,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -500,7 +487,7 @@ jobs:
           --health-timeout 5s
           --health-retries 5
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -524,7 +511,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -562,7 +549,7 @@ jobs:
     name: cargo test (macos-aarch64)
     runs-on: macos-14
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -572,37 +559,13 @@ jobs:
         shell: bash
         run: cargo test --profile ci --exclude datafusion-cli --workspace --lib --tests --bins --features avro,json,backtrace,integration-tests
 
-  test-datafusion-pyarrow:
-    name: cargo test pyarrow (amd64)
-    needs: linux-build-lib
-    runs-on: ubuntu-latest
-    container:
-      image: amd64/rust:bullseye # Use the bullseye tag image which comes with python3.9
-    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
-        with:
-          submodules: true
-          fetch-depth: 1
-      - name: Install PyArrow
-        run: |
-          echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
-          apt-get update
-          apt-get install python3-pip -y
-          python3 -m pip install pyarrow
-      - name: Setup Rust toolchain
-        uses: ./.github/actions/setup-builder
-        with:
-          rust-version: stable
-      - name: Run datafusion-common tests
-        run: cargo test --profile ci -p datafusion-common --features=pyarrow,sql
-
   vendor:
     name: Verify Vendored Code
     runs-on: ubuntu-latest
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -619,7 +582,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
@@ -678,7 +641,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -689,7 +652,7 @@ jobs:
       - name: Install Clippy
         run: rustup component add clippy
       - name: Rust Dependency Cache
-        uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1  # v2.8.1
+        uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5  # v2.8.2
         with:
           save-if: ${{ github.ref_name == 'main' }}
           shared-key: "amd-ci-clippy"
@@ -703,7 +666,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -724,7 +687,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
         with:
           submodules: true
           fetch-depth: 1
@@ -732,7 +695,7 @@ jobs:
         uses: ./.github/actions/setup-builder
         with:
           rust-version: stable
-      - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903  # v6.0.0
+      - uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f  # v6.1.0
         with:
           node-version: "20"
       - name: Check if configs.md has been modified
@@ -746,6 +709,23 @@ jobs:
           ./dev/update_function_docs.sh
           git diff --exit-code
 
+  examples-docs-check:
+    name: check example README is up-to-date
+    needs: linux-build-lib
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+
+    steps:
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Run examples docs check script
+        run: |
+          bash ci/scripts/check_examples_docs.sh
+
   # Verify MSRV for the crates which are directly used by other projects:
   # - datafusion
   # - datafusion-substrait
@@ -757,11 +737,11 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8  # v6.0.1
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b  # v2.62.47
+        uses: taiki-e/install-action@de7896b7cd1c7d181266425abbe571b5a8c757bc  # v2.65.3
         with:
           tool: cargo-msrv
 
@@ -798,12 +778,4 @@ jobs:
         run: cargo msrv --output-format json --log-target stdout verify
       - name: Check datafusion-proto
         working-directory: datafusion/proto
-        run: cargo msrv --output-format json --log-target stdout verify
-  typos:
-    name: Spell Check with Typos
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
-        with:
-          persist-credentials: false
-      - uses: crate-ci/typos@07d900b8fa1097806b8adb6391b0d3e0ac2fdea7  # v1.39.0
+        run: cargo msrv --output-format json --log-target stdout verify
\ No newline at end of file
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index d5fc9287aa6a5..2aba1085b8329 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -27,7 +27,7 @@ jobs:
       issues: write
       pull-requests: write
     steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008  # v10.1.0
+      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d  # v10.1.1
         with:
           stale-pr-message: "Thank you for your contribution. Unfortunately, this pull request is stale because it has been open 60 days with no activity. Please remove the stale label or comment or this will be closed in 7 days."
           days-before-pr-stale: 60
diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml
index 86dc190add1d1..ffb5f728e04c1 100644
--- a/.github/workflows/take.yml
+++ b/.github/workflows/take.yml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-name: Assign the issue via a `take` comment
+name: Assign/unassign the issue via `take` or `untake` comment
 on:
   issue_comment:
     types: created
@@ -26,16 +26,30 @@ permissions:
 jobs:
   issue_assign:
     runs-on: ubuntu-latest
-    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
+    if: (!github.event.issue.pull_request) && (github.event.comment.body == 'take' || github.event.comment.body == 'untake')
     concurrency:
       group: ${{ github.actor }}-issue-assign
     steps:
-      - run: |
-          CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s)
-          if [ "$CODE" -eq "204" ]
+      - name: Take or untake issue
+        env:
+          COMMENT_BODY: ${{ github.event.comment.body }}
+          ISSUE_NUMBER: ${{ github.event.issue.number }}
+          USER_LOGIN: ${{ github.event.comment.user.login }}
+          REPO: ${{ github.repository }}
+          TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ "$COMMENT_BODY" == "take" ]
           then
-            echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
-            curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
-          else
-            echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+            CODE=$(curl -H "Authorization: token $TOKEN" -LI https://api.github.com/repos/$REPO/issues/$ISSUE_NUMBER/assignees/$USER_LOGIN -o /dev/null -w '%{http_code}\n' -s)
+            if [ "$CODE" -eq "204" ]
+            then
+              echo "Assigning issue $ISSUE_NUMBER to $USER_LOGIN"
+              curl -X POST -H "Authorization: token $TOKEN" -H "Content-Type: application/json" -d "{\"assignees\": [\"$USER_LOGIN\"]}" https://api.github.com/repos/$REPO/issues/$ISSUE_NUMBER/assignees
+            else
+              echo "Cannot assign issue $ISSUE_NUMBER to $USER_LOGIN"
+            fi
+          elif [ "$COMMENT_BODY" == "untake" ]
+          then
+            echo "Unassigning issue $ISSUE_NUMBER from $USER_LOGIN"
+            curl -X DELETE -H "Authorization: token $TOKEN" -H "Content-Type: application/json" -d "{\"assignees\": [\"$USER_LOGIN\"]}" https://api.github.com/repos/$REPO/issues/$ISSUE_NUMBER/assignees
           fi
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index f500265108ff5..2ce60805c913c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -105,6 +105,15 @@ dependencies = [
  "alloc-no-stdlib",
 ]
 
+[[package]]
+name = "alloca"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "allocator-api2"
 version = "0.2.21"
@@ -184,15 +193,16 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
 [[package]]
 name = "apache-avro"
-version = "0.20.0"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7"
+checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf"
 dependencies = [
  "bigdecimal",
  "bon",
- "bzip2 0.6.1",
+ "bzip2",
  "crc32fast",
  "digest",
+ "liblzma",
  "log",
  "miniz_oxide",
  "num-bigint",
@@ -207,7 +217,6 @@ dependencies = [
  "strum_macros 0.27.2",
  "thiserror",
  "uuid",
- "xz2",
  "zstd",
 ]
 
@@ -225,9 +234,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae"
+checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -238,7 +247,6 @@ dependencies = [
  "arrow-ipc",
  "arrow-json",
  "arrow-ord",
- "arrow-pyarrow",
  "arrow-row",
  "arrow-schema",
  "arrow-select",
@@ -249,9 +257,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491"
+checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -263,9 +271,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31"
+checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002"
 dependencies = [
  "ahash 0.8.12",
  "arrow-buffer",
@@ -274,7 +282,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.16.0",
+ "hashbrown 0.16.1",
  "num-complex",
  "num-integer",
  "num-traits",
@@ -282,9 +290,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27"
+checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626"
 dependencies = [
  "bytes",
  "half",
@@ -294,13 +302,14 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168"
+checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
+ "arrow-ord",
  "arrow-schema",
  "arrow-select",
  "atoi",
@@ -315,9 +324,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c"
+checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -330,9 +339,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b"
+checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -343,9 +352,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-flight"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f70bb56412a007b0cfc116d15f24dda6adeed9611a213852a004cda20085a3b9"
+checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -371,9 +380,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27"
+checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -387,9 +396,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8"
+checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -398,7 +407,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "itoa",
  "lexical-core",
  "memchr",
@@ -411,9 +420,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b"
+checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -422,23 +431,11 @@ dependencies = [
  "arrow-select",
 ]
 
-[[package]]
-name = "arrow-pyarrow"
-version = "57.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515"
-dependencies = [
- "arrow-array",
- "arrow-data",
- "arrow-schema",
- "pyo3",
-]
-
 [[package]]
 name = "arrow-row"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2"
+checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -449,9 +446,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5"
+checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0"
 dependencies = [
  "bitflags 2.9.4",
  "serde",
@@ -461,9 +458,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47"
+checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -475,9 +472,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2"
+checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -520,19 +517,15 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.19"
+version = "0.4.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
+checksum = "07a926debf178f2d355197f9caddb08e54a9329d44748034bba349c5848cb519"
 dependencies = [
- "bzip2 0.5.2",
- "flate2",
+ "compression-codecs",
+ "compression-core",
  "futures-core",
- "memchr",
  "pin-project-lite",
  "tokio",
- "xz2",
- "zstd",
- "zstd-safe",
 ]
 
 [[package]]
@@ -552,7 +545,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -574,7 +567,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -585,7 +578,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -611,9 +604,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "aws-config"
-version = "1.8.7"
+version = "1.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04b37ddf8d2e9744a0b9c19ce0b78efe4795339a90b66b7bae77987092cd2e69"
+checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -641,9 +634,9 @@ dependencies = [
 
 [[package]]
 name = "aws-credential-types"
-version = "1.2.7"
+version = "1.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799a1290207254984cb7c05245111bc77958b92a3c9bb449598044b36341cce6"
+checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -676,9 +669,9 @@ dependencies = [
 
 [[package]]
 name = "aws-runtime"
-version = "1.5.11"
+version = "1.5.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e1ed337dabcf765ad5f2fb426f13af22d576328aaf09eac8f70953530798ec0"
+checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -700,9 +693,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.85.0"
+version = "1.91.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f2c741e2e439f07b5d1b33155e246742353d82167c785a2ff547275b7e32483"
+checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -722,9 +715,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.87.0"
+version = "1.93.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6428ae5686b18c0ee99f6f3c39d94ae3f8b42894cdc35c35d8fb2470e9db2d4c"
+checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -744,9 +737,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.87.0"
+version = "1.95.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5871bec9a79a3e8d928c7788d654f135dde0e71d2dd98089388bab36b37ef607"
+checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -767,9 +760,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.3.4"
+version = "1.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c"
+checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-http",
@@ -789,9 +782,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.5"
+version = "1.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c"
+checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -800,15 +793,16 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.62.3"
+version = "0.62.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9"
+checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b"
 dependencies = [
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "bytes-utils",
  "futures-core",
+ "futures-util",
  "http 0.2.12",
  "http 1.3.1",
  "http-body 0.4.6",
@@ -820,9 +814,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http-client"
-version = "1.1.1"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b"
+checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -844,27 +838,27 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.61.5"
+version = "0.61.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047"
+checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551"
 dependencies = [
  "aws-smithy-types",
 ]
 
 [[package]]
 name = "aws-smithy-observability"
-version = "0.1.3"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393"
+checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a"
 dependencies = [
  "aws-smithy-runtime-api",
 ]
 
 [[package]]
 name = "aws-smithy-query"
-version = "0.60.7"
+version = "0.60.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
+checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d"
 dependencies = [
  "aws-smithy-types",
  "urlencoding",
@@ -872,9 +866,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.9.2"
+version = "1.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
+checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -896,9 +890,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.9.0"
+version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56"
+checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -913,9 +907,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.3.2"
+version = "1.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8"
+checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -936,18 +930,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.10"
+version = "0.60.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728"
+checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "1.3.8"
+version = "1.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390"
+checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -1026,9 +1020,9 @@ dependencies = [
 
 [[package]]
 name = "bigdecimal"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013"
+checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
 dependencies = [
  "autocfg",
  "libm",
@@ -1055,7 +1049,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1192,9 +1186,9 @@ dependencies = [
 
 [[package]]
 name = "bon"
-version = "3.7.2"
+version = "3.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb"
+checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
 dependencies = [
  "bon-macros",
  "rustversion",
@@ -1202,9 +1196,9 @@ dependencies = [
 
 [[package]]
 name = "bon-macros"
-version = "3.7.2"
+version = "3.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005"
+checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
 dependencies = [
  "darling",
  "ident_case",
@@ -1212,7 +1206,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1235,7 +1229,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1305,9 +1299,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.10.1"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
 
 [[package]]
 name = "bytes-utils"
@@ -1319,15 +1313,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "bzip2"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
-dependencies = [
- "bzip2-sys",
-]
-
 [[package]]
 name = "bzip2"
 version = "0.6.1"
@@ -1337,16 +1322,6 @@ dependencies = [
  "libbz2-rs-sys",
 ]
 
-[[package]]
-name = "bzip2-sys"
-version = "0.1.13+1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
-dependencies = [
- "cc",
- "pkg-config",
-]
-
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -1461,9 +1436,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.50"
+version = "4.5.53"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
+checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1471,9 +1446,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.50"
+version = "4.5.53"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
+checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1490,7 +1465,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1534,6 +1509,27 @@ dependencies = [
  "unicode-width 0.2.1",
 ]
 
+[[package]]
+name = "compression-codecs"
+version = "0.4.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34a3cbbb8b6eca96f3a5c4bf6938d5b27ced3675d69f95bb51948722870bc323"
+dependencies = [
+ "bzip2",
+ "compression-core",
+ "flate2",
+ "liblzma",
+ "memchr",
+ "zstd",
+ "zstd-safe",
+]
+
+[[package]]
+name = "compression-core"
+version = "0.4.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d"
+
 [[package]]
 name = "console"
 version = "0.15.11"
@@ -1655,19 +1651,21 @@ dependencies = [
 
 [[package]]
 name = "criterion"
-version = "0.7.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
+checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf"
 dependencies = [
+ "alloca",
  "anes",
  "cast",
  "ciborium",
- "clap 4.5.50",
+ "clap 4.5.53",
  "criterion-plot",
  "futures",
  "itertools 0.13.0",
  "num-traits",
  "oorandom",
+ "page_size",
  "plotters",
  "rayon",
  "regex",
@@ -1680,9 +1678,9 @@ dependencies = [
 
 [[package]]
 name = "criterion-plot"
-version = "0.6.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
+checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4"
 dependencies = [
  "cast",
  "itertools 0.13.0",
@@ -1761,9 +1759,9 @@ dependencies = [
 
 [[package]]
 name = "ctor"
-version = "0.6.1"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ffc71fcdcdb40d6f087edddf7f8f1f8f79e6cf922f555a9ee8779752d4819bd"
+checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e"
 dependencies = [
  "ctor-proc-macro",
  "dtor",
@@ -1802,7 +1800,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1813,7 +1811,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -1832,13 +1830,13 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-schema",
  "async-trait",
  "bytes",
- "bzip2 0.6.1",
+ "bzip2",
  "chrono",
  "criterion",
  "ctor",
@@ -1879,6 +1877,7 @@ dependencies = [
  "glob",
  "insta",
  "itertools 0.14.0",
+ "liblzma",
  "log",
  "nix",
  "object_store",
@@ -1898,13 +1897,12 @@ dependencies = [
  "tokio",
  "url",
  "uuid",
- "xz2",
  "zstd",
 ]
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion",
@@ -1929,7 +1927,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1952,7 +1950,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1970,19 +1968,18 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "object_store",
- "tokio",
 ]
 
 [[package]]
 name = "datafusion-cli"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "aws-config",
  "aws-credential-types",
  "chrono",
- "clap 4.5.50",
+ "clap 4.5.53",
  "ctor",
  "datafusion",
  "datafusion-common",
@@ -2007,24 +2004,24 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "apache-avro",
  "arrow",
  "arrow-ipc",
  "chrono",
+ "criterion",
  "half",
  "hashbrown 0.14.5",
  "hex",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "libc",
  "log",
  "object_store",
  "parquet",
  "paste",
- "pyo3",
  "rand 0.9.2",
  "recursive",
  "sqlparser",
@@ -2034,7 +2031,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "futures",
  "log",
@@ -2043,13 +2040,13 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.6.1",
+ "bzip2",
  "chrono",
  "criterion",
  "datafusion-common",
@@ -2065,6 +2062,7 @@ dependencies = [
  "futures",
  "glob",
  "itertools 0.14.0",
+ "liblzma",
  "log",
  "object_store",
  "rand 0.9.2",
@@ -2072,13 +2070,12 @@ dependencies = [
  "tokio",
  "tokio-util",
  "url",
- "xz2",
  "zstd",
 ]
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -2101,7 +2098,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -2120,7 +2117,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2141,7 +2138,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2161,7 +2158,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2190,11 +2187,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "50.3.0"
+version = "51.0.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2204,11 +2201,14 @@ dependencies = [
  "bytes",
  "dashmap",
  "datafusion",
- "datafusion-ffi",
+ "datafusion-common",
+ "datafusion-expr",
  "datafusion-physical-expr-adapter",
  "datafusion-proto",
+ "datafusion-sql",
  "env_logger",
  "futures",
+ "insta",
  "log",
  "mimalloc",
  "nix",
@@ -2216,6 +2216,8 @@ dependencies = [
  "prost",
  "rand 0.9.2",
  "serde_json",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
  "tempfile",
  "test-utils",
  "tokio",
@@ -2228,7 +2230,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2249,7 +2251,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2262,7 +2264,7 @@ dependencies = [
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
  "env_logger",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "itertools 0.14.0",
  "paste",
@@ -2273,18 +2275,18 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "itertools 0.14.0",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-ffi"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -2292,10 +2294,22 @@ dependencies = [
  "async-ffi",
  "async-trait",
  "datafusion",
+ "datafusion-catalog",
  "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
  "datafusion-functions-aggregate-common",
+ "datafusion-functions-table",
+ "datafusion-functions-window",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
  "datafusion-proto",
  "datafusion-proto-common",
+ "datafusion-session",
  "doc-comment",
  "futures",
  "log",
@@ -2306,7 +2320,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2314,6 +2328,7 @@ dependencies = [
  "blake2",
  "blake3",
  "chrono",
+ "chrono-tz",
  "criterion",
  "ctor",
  "datafusion-common",
@@ -2338,7 +2353,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2359,7 +2374,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2372,7 +2387,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2395,7 +2410,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2409,7 +2424,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2425,7 +2440,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2433,16 +2448,16 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "datafusion-doc",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2458,7 +2473,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-sql",
  "env_logger",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2469,7 +2484,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2482,19 +2497,21 @@ dependencies = [
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "itertools 0.14.0",
  "parking_lot",
  "paste",
  "petgraph 0.8.3",
  "rand 0.9.2",
+ "recursive",
  "rstest",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2507,19 +2524,21 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
+ "chrono",
  "datafusion-common",
  "datafusion-expr-common",
  "hashbrown 0.14.5",
  "itertools 0.14.0",
+ "parking_lot",
 ]
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2539,19 +2558,19 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
  "arrow-ord",
  "arrow-schema",
  "async-trait",
- "chrono",
  "criterion",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-aggregate-common",
  "datafusion-functions-window",
@@ -2561,7 +2580,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2575,9 +2594,10 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
+ "async-trait",
  "chrono",
  "datafusion",
  "datafusion-catalog",
@@ -2611,7 +2631,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2623,7 +2643,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2641,7 +2661,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2653,7 +2673,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2665,7 +2685,9 @@ dependencies = [
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions",
+ "datafusion-functions-nested",
  "log",
+ "percent-encoding",
  "rand 0.9.2",
  "sha1",
  "url",
@@ -2673,7 +2695,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2686,7 +2708,7 @@ dependencies = [
  "datafusion-functions-nested",
  "datafusion-functions-window",
  "env_logger",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2699,14 +2721,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "bigdecimal",
  "bytes",
  "chrono",
- "clap 4.5.50",
+ "clap 4.5.53",
  "datafusion",
  "datafusion-spark",
  "datafusion-substrait",
@@ -2733,7 +2755,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2755,7 +2777,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "chrono",
  "console_error_panic_hook",
@@ -2830,7 +2852,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -2886,7 +2908,7 @@ dependencies = [
  "enum-ordinalize",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -2924,7 +2946,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -3078,9 +3100,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.1.4"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
+checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
 dependencies = [
  "crc32fast",
  "libz-rs-sys",
@@ -3185,7 +3207,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -3317,7 +3339,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.3.1",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "slab",
  "tokio",
  "tokio-util",
@@ -3368,9 +3390,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.16.0"
+version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
 
 [[package]]
 name = "heck"
@@ -3604,7 +3626,7 @@ dependencies = [
  "js-sys",
  "log",
  "wasm-bindgen",
- "windows-core 0.62.0",
+ "windows-core",
 ]
 
 [[package]]
@@ -3742,21 +3764,21 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.12.0"
+version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
+checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
 dependencies = [
  "equivalent",
- "hashbrown 0.16.0",
+ "hashbrown 0.16.1",
  "serde",
  "serde_core",
 ]
 
 [[package]]
 name = "indicatif"
-version = "0.18.0"
+version = "0.18.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd"
+checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88"
 dependencies = [
  "console 0.16.1",
  "portable-atomic",
@@ -3765,17 +3787,11 @@ dependencies = [
  "web-time",
 ]
 
-[[package]]
-name = "indoc"
-version = "2.0.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
-
 [[package]]
 name = "insta"
-version = "1.43.2"
+version = "1.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0"
+checksum = "b76866be74d68b1595eb8060cb9191dca9c021db2316558e52ddc5d55d41b66c"
 dependencies = [
  "console 0.15.11",
  "globset",
@@ -3783,6 +3799,7 @@ dependencies = [
  "regex",
  "serde",
  "similar",
+ "tempfile",
  "walkdir",
 ]
 
@@ -3870,7 +3887,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -3885,9 +3902,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.82"
+version = "0.3.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
+checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -3988,6 +4005,26 @@ dependencies = [
  "windows-link 0.2.0",
 ]
 
+[[package]]
+name = "liblzma"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648"
+dependencies = [
+ "liblzma-sys",
+]
+
+[[package]]
+name = "liblzma-sys"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
 [[package]]
 name = "libm"
 version = "0.2.15"
@@ -4024,7 +4061,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
 dependencies = [
  "anstream",
  "anstyle",
- "clap 4.5.50",
+ "clap 4.5.53",
  "escape8259",
 ]
 
@@ -4061,9 +4098,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.28"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "lru-slab"
@@ -4073,24 +4110,13 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
 
 [[package]]
 name = "lz4_flex"
-version = "0.11.5"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e"
 dependencies = [
  "twox-hash",
 ]
 
-[[package]]
-name = "lzma-sys"
-version = "0.1.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -4113,15 +4139,6 @@ version = "2.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
 
-[[package]]
-name = "memoffset"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
-dependencies = [
- "autocfg",
-]
-
 [[package]]
 name = "mimalloc"
 version = "0.1.48"
@@ -4417,6 +4434,16 @@ version = "4.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e"
 
+[[package]]
+name = "page_size"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "parking_lot"
 version = "0.12.4"
@@ -4442,9 +4469,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "57.0.0"
+version = "57.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a"
+checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -4461,7 +4488,7 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.16.0",
+ "hashbrown 0.16.1",
  "lz4_flex",
  "num-bigint",
  "num-integer",
@@ -4500,7 +4527,7 @@ dependencies = [
  "regex",
  "regex-syntax",
  "structmeta",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -4559,7 +4586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
 ]
 
 [[package]]
@@ -4570,7 +4597,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset",
  "hashbrown 0.15.5",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "serde",
 ]
 
@@ -4628,7 +4655,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -4701,7 +4728,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -4776,7 +4803,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -4847,7 +4874,7 @@ dependencies = [
  "prost",
  "prost-types",
  "regex",
- "syn 2.0.108",
+ "syn 2.0.111",
  "tempfile",
 ]
 
@@ -4861,7 +4888,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -4911,67 +4938,6 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "pyo3"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
-dependencies = [
- "indoc",
- "libc",
- "memoffset",
- "once_cell",
- "portable-atomic",
- "pyo3-build-config",
- "pyo3-ffi",
- "pyo3-macros",
- "unindent",
-]
-
-[[package]]
-name = "pyo3-build-config"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
-dependencies = [
- "target-lexicon",
-]
-
-[[package]]
-name = "pyo3-ffi"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
-dependencies = [
- "libc",
- "pyo3-build-config",
-]
-
-[[package]]
-name = "pyo3-macros"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
-dependencies = [
- "proc-macro2",
- "pyo3-macros-backend",
- "quote",
- "syn 2.0.108",
-]
-
-[[package]]
-name = "pyo3-macros-backend"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
-dependencies = [
- "heck 0.5.0",
- "proc-macro2",
- "pyo3-build-config",
- "quote",
- "syn 2.0.108",
-]
-
 [[package]]
 name = "quad-rand"
 version = "0.2.3"
@@ -5180,7 +5146,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5220,7 +5186,7 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5248,9 +5214,9 @@ dependencies = [
 
 [[package]]
 name = "regex-lite"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30"
+checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da"
 
 [[package]]
 name = "regex-syntax"
@@ -5402,7 +5368,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.108",
+ "syn 2.0.111",
  "unicode-ident",
 ]
 
@@ -5414,7 +5380,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
  "rand 0.8.5",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5618,7 +5584,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5709,7 +5675,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5720,7 +5686,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5744,7 +5710,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5756,7 +5722,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5781,7 +5747,7 @@ dependencies = [
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "schemars 0.9.0",
  "schemars 1.0.4",
  "serde",
@@ -5800,7 +5766,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -5809,7 +5775,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "itoa",
  "ryu",
  "serde",
@@ -5976,7 +5942,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6024,7 +5990,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "structmeta-derive",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6035,7 +6001,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6084,7 +6050,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6096,7 +6062,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6130,7 +6096,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.108",
+ "syn 2.0.111",
  "typify",
  "walkdir",
 ]
@@ -6154,9 +6120,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.108"
+version = "2.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
+checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6180,7 +6146,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6203,12 +6169,6 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
-[[package]]
-name = "target-lexicon"
-version = "0.13.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c"
-
 [[package]]
 name = "tempfile"
 version = "3.23.0"
@@ -6297,7 +6257,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6420,7 +6380,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -6472,9 +6432,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.16"
+version = "0.7.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
+checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
 dependencies = [
  "bytes",
  "futures-core",
@@ -6498,7 +6458,7 @@ version = "0.23.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
 dependencies = [
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "toml_datetime",
  "toml_parser",
  "winnow",
@@ -6561,7 +6521,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
- "indexmap 2.12.0",
+ "indexmap 2.12.1",
  "pin-project-lite",
  "slab",
  "sync_wrapper",
@@ -6621,14 +6581,14 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
 name = "tracing-core"
-version = "0.1.34"
+version = "0.1.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
 dependencies = [
  "once_cell",
  "valuable",
@@ -6647,9 +6607,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.20"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
 dependencies = [
  "nu-ansi-term",
  "sharded-slab",
@@ -6729,7 +6689,7 @@ dependencies = [
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.108",
+ "syn 2.0.111",
  "thiserror",
  "unicode-ident",
 ]
@@ -6747,7 +6707,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.108",
+ "syn 2.0.111",
  "typify-impl",
 ]
 
@@ -6806,12 +6766,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
 
-[[package]]
-name = "unindent"
-version = "0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
-
 [[package]]
 name = "unit-prefix"
 version = "0.5.1"
@@ -6897,13 +6851,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.18.1"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
 dependencies = [
  "getrandom 0.3.4",
  "js-sys",
- "serde",
+ "serde_core",
  "wasm-bindgen",
 ]
 
@@ -6967,9 +6921,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.105"
+version = "0.2.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
+checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -6980,9 +6934,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.55"
+version = "0.4.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0"
+checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -6993,9 +6947,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.105"
+version = "0.2.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
+checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -7003,34 +6957,42 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.105"
+version = "0.2.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
+checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
 dependencies = [
  "bumpalo",
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.105"
+version = "0.2.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
+checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "wasm-bindgen-test"
-version = "0.3.55"
+version = "0.3.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373"
+checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c"
 dependencies = [
+ "async-trait",
+ "cast",
  "js-sys",
+ "libm",
  "minicov",
+ "nu-ansi-term",
+ "num-traits",
+ "oorandom",
+ "serde",
+ "serde_json",
  "wasm-bindgen",
  "wasm-bindgen-futures",
  "wasm-bindgen-test-macro",
@@ -7038,13 +7000,13 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-test-macro"
-version = "0.3.55"
+version = "0.3.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1"
+checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -7062,9 +7024,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.82"
+version = "0.3.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1"
+checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -7138,7 +7100,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
 dependencies = [
  "windows-collections",
- "windows-core 0.61.2",
+ "windows-core",
  "windows-future",
  "windows-link 0.1.3",
  "windows-numerics",
@@ -7150,7 +7112,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
 ]
 
 [[package]]
@@ -7162,21 +7124,8 @@ dependencies = [
  "windows-implement",
  "windows-interface",
  "windows-link 0.1.3",
- "windows-result 0.3.4",
- "windows-strings 0.4.2",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.62.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
-dependencies = [
- "windows-implement",
- "windows-interface",
- "windows-link 0.2.0",
- "windows-result 0.4.0",
- "windows-strings 0.5.0",
+ "windows-result",
+ "windows-strings",
 ]
 
 [[package]]
@@ -7185,7 +7134,7 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
  "windows-link 0.1.3",
  "windows-threading",
 ]
@@ -7198,7 +7147,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -7209,7 +7158,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -7230,7 +7179,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
  "windows-link 0.1.3",
 ]
 
@@ -7243,15 +7192,6 @@ dependencies = [
  "windows-link 0.1.3",
 ]
 
-[[package]]
-name = "windows-result"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
-dependencies = [
- "windows-link 0.2.0",
-]
-
 [[package]]
 name = "windows-strings"
 version = "0.4.2"
@@ -7261,15 +7201,6 @@ dependencies = [
  "windows-link 0.1.3",
 ]
 
-[[package]]
-name = "windows-strings"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
-dependencies = [
- "windows-link 0.2.0",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.52.0"
@@ -7490,15 +7421,6 @@ version = "0.13.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
 
-[[package]]
-name = "xz2"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
-dependencies = [
- "lzma-sys",
-]
-
 [[package]]
 name = "yansi"
 version = "1.0.1"
@@ -7525,7 +7447,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
  "synstructure",
 ]
 
@@ -7546,7 +7468,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
@@ -7566,7 +7488,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
  "synstructure",
 ]
 
@@ -7606,7 +7528,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.108",
+ "syn 2.0.111",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index f15929b4c2b00..10fc88b7057c8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,7 +71,7 @@ resolver = "2"
 
 [workspace.package]
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
-edition = "2021"
+edition = "2024"
 homepage = "https://datafusion.apache.org"
 license = "Apache-2.0"
 readme = "README.md"
@@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
 rust-version = "1.88.0"
 # Define DataFusion version
-version = "50.3.0"
+version = "51.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -90,83 +90,88 @@ version = "50.3.0"
 ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
-apache-avro = { version = "0.20", default-features = false }
-arrow = { version = "57.0.0", features = [
+apache-avro = { version = "0.21", default-features = false }
+arrow = { version = "57.1.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "57.0.0", default-features = false }
-arrow-flight = { version = "57.0.0", features = [
+arrow-buffer = { version = "57.1.0", default-features = false }
+arrow-flight = { version = "57.1.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "57.0.0", default-features = false, features = [
+arrow-ipc = { version = "57.1.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "57.0.0", default-features = false }
-arrow-schema = { version = "57.0.0", default-features = false }
+arrow-ord = { version = "57.1.0", default-features = false }
+arrow-schema = { version = "57.1.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
-bytes = "1.10"
+bytes = "1.11"
+bzip2 = "0.6.1"
 chrono = { version = "0.4.42", default-features = false }
-criterion = "0.7"
-ctor = "0.6.1"
+criterion = "0.8"
+ctor = "0.6.3"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" }
-datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false }
-datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "50.3.0" }
-datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false }
-datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" }
-datafusion-functions = { path = "datafusion/functions", version = "50.3.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" }
-datafusion-macros = { path = "datafusion/macros", version = "50.3.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false }
-datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" }
-datafusion-proto = { path = "datafusion/proto", version = "50.3.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" }
-datafusion-session = { path = "datafusion/session", version = "50.3.0" }
-datafusion-spark = { path = "datafusion/spark", version = "50.3.0" }
-datafusion-sql = { path = "datafusion/sql", version = "50.3.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" }
+datafusion = { path = "datafusion/core", version = "51.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "51.0.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "51.0.0" }
+datafusion-common = { path = "datafusion/common", version = "51.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "51.0.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "51.0.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "51.0.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "51.0.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "51.0.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "51.0.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "51.0.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "51.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "51.0.0", default-features = false }
+datafusion-expr = { path = "datafusion/expr", version = "51.0.0", default-features = false }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "51.0.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "51.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "51.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "51.0.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "51.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "51.0.0", default-features = false }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "51.0.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "51.0.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "51.0.0" }
+datafusion-macros = { path = "datafusion/macros", version = "51.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "51.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "51.0.0", default-features = false }
+datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "51.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "51.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "51.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "51.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "51.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "51.0.0" }
+datafusion-pruning = { path = "datafusion/pruning", version = "51.0.0" }
+datafusion-session = { path = "datafusion/session", version = "51.0.0" }
+datafusion-spark = { path = "datafusion/spark", version = "51.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "51.0.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "51.0.0" }
 
 doc-comment = "0.3"
 env_logger = "0.11"
+flate2 = "1.1.5"
 futures = "0.3"
+glob = "0.3.0"
 half = { version = "2.7.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 hex = { version = "0.4.3" }
-indexmap = "2.12.0"
-insta = { version = "1.43.2", features = ["glob", "filters"] }
+indexmap = "2.12.1"
+insta = { version = "1.45.0", features = ["glob", "filters"] }
 itertools = "0.14"
+liblzma = { version = "0.4.4", features = ["static"] }
 log = "^0.4"
 num-traits = { version = "0.2" }
 object_store = { version = "0.12.4", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "57.0.0", default-features = false, features = [
+parquet = { version = "57.1.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
 ] }
+paste = "1.0.15"
 pbjson = { version = "0.8.0" }
 pbjson-types = "0.8"
 # Should match arrow-flight's version of prost.
@@ -177,11 +182,14 @@ regex = "1.12"
 rstest = "0.26.1"
 serde_json = "1"
 sqlparser = { version = "0.59.0", default-features = false, features = ["std", "visitor"] }
+strum = "0.27.2"
+strum_macros = "0.27.2"
 tempfile = "3"
 testcontainers = { version = "0.25.2", features = ["default"] }
 testcontainers-modules = { version = "0.13" }
 tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
 url = "2.5.7"
+zstd = { version = "0.13", default-features = false }
 
 [workspace.lints.clippy]
 # Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
@@ -191,6 +199,8 @@ or_fun_call = "warn"
 unnecessary_lazy_evaluations = "warn"
 uninlined_format_args = "warn"
 inefficient_to_string = "warn"
+# https://github.com/apache/datafusion/issues/18503
+needless_pass_by_value = "warn"
 
 [workspace.lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
diff --git a/README.md b/README.md
index 5191496eaafe3..880adfb3ac392 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@
 [![Build Status][actions-badge]][actions-url]
 ![Commit Activity][commit-activity-badge]
 [![Open Issues][open-issues-badge]][open-issues-url]
+[![Pending PRs][pending-pr-badge]][pending-pr-url]
 [![Discord chat][discord-badge]][discord-url]
 [![Linkedin][linkedin-badge]][linkedin-url]
 ![Crates.io MSRV][msrv-badge]
@@ -39,6 +40,8 @@
 [commit-activity-badge]: https://img.shields.io/github/commit-activity/m/apache/datafusion
 [open-issues-badge]: https://img.shields.io/github/issues-raw/apache/datafusion
 [open-issues-url]: https://github.com/apache/datafusion/issues
+[pending-pr-badge]: https://img.shields.io/github/issues-search/apache/datafusion?query=is%3Apr+is%3Aopen+draft%3Afalse+review%3Arequired+status%3Asuccess&label=Pending%20PRs&logo=github
+[pending-pr-url]: https://github.com/apache/datafusion/pulls?q=is%3Apr+is%3Aopen+draft%3Afalse+review%3Arequired+status%3Asuccess+sort%3Aupdated-desc
 [linkedin-badge]: https://img.shields.io/badge/Follow-Linkedin-blue
 [linkedin-url]: https://www.linkedin.com/company/apache-datafusion/
 [msrv-badge]: https://img.shields.io/crates/msrv/datafusion?label=Min%20Rust%20Version
@@ -129,7 +132,6 @@ Optional features:
 - `avro`: support for reading the [Apache Avro] format
 - `backtrace`: include backtrace information in error messages
 - `parquet_encryption`: support for using [Parquet Modular Encryption]
-- `pyarrow`: conversions between PyArrow and DataFusion types
 - `serde`: enable arrow-schema's `serde` feature
 
 [apache avro]: https://avro.apache.org/
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 870c826f55810..5f91175ca8baf 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -56,7 +56,7 @@ serde_json = { workspace = true }
 snmalloc-rs = { version = "0.3", optional = true }
 structopt = { version = "0.3", default-features = false }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
-tokio-util = { version = "0.7.16" }
+tokio-util = { version = "0.7.17" }
 
 [dev-dependencies]
 datafusion-proto = { workspace = true }
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 8fed85fa02b80..0b71628b2db12 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -119,7 +119,6 @@ You can also invoke the helper directly if you need to customise arguments furth
 ./benchmarks/compile_profile.py --profiles dev release --data /path/to/tpch_sf1
 ```
 
-
 ## Benchmark with modified configurations
 
 ### Select join algorithm
@@ -147,6 +146,19 @@ To verify that datafusion picked up your configuration, run the benchmarks with
 
 ## Comparing performance of main and a branch
 
+For TPC-H
+```shell
+./benchmarks/compare_tpch.sh main mybranch
+```
+
+For TPC-DS. 
+To get data in `DATA_DIR` for TPCDS, please follow instructions in `./benchmarks/bench.sh data tcpds` 
+```shell
+DATA_DIR=../../datafusion-benchmarks/tpcds/data/sf1/ ./benchmarks/compare_tpcds.sh main mybranch
+```
+
+Alternatively you can compare manually followng the example velor
+
 ```shell
 git checkout main
 
@@ -243,28 +255,11 @@ See the help for more details.
 You can enable `mimalloc` or `snmalloc` (to use either the mimalloc or snmalloc allocator) as features by passing them in as `--features`. For example:
 
 ```shell
-cargo run --release --features "mimalloc" --bin tpch -- benchmark datafusion --iterations 3 --path ./data --format tbl --query 1 --batch-size 4096
-```
-
-The benchmark program also supports CSV and Parquet input file formats and a utility is provided to convert from `tbl`
-(generated by the `dbgen` utility) to CSV and Parquet.
-
-```bash
-cargo run --release --bin tpch -- convert --input ./data --output /mnt/tpch-parquet --format parquet
+cargo run --release --features "mimalloc" --bin dfbench tpch --iterations 3 --path ./data --format tbl --query 1 --batch-size 4096
 ```
 
 Or if you want to verify and run all the queries in the benchmark, you can just run `cargo test`.
 
-#### Sorted Conversion
-
-The TPCH tables generated by the dbgen utility are sorted by their first column (their primary key for most tables, the `l_orderkey` column for the `lineitem` table.)
-
-To preserve this sorted order information during conversion (useful for benchmarking execution on pre-sorted data) include the `--sort` flag:
-
-```bash
-cargo run --release --bin tpch -- convert --input ./data --output /mnt/tpch-sorted-parquet --format parquet --sort
-```
-
 ### Comparing results between runs
 
 Any `dfbench` execution with `-o <dir>` argument will produce a
@@ -316,7 +311,6 @@ This will produce output like:
 └──────────────┴──────────────┴──────────────┴───────────────┘
 ```
 
-
 # Benchmark Runner
 
 The `dfbench` program contains subcommands to run the various
@@ -356,24 +350,28 @@ FLAGS:
 ```
 
 # Profiling Memory Stats for each benchmark query
+
 The `mem_profile` program wraps benchmark execution to measure memory usage statistics, such as peak RSS. It runs each benchmark query in a separate subprocess, capturing the child process’s stdout to print structured output.
 
 Subcommands supported by mem_profile are the subset of those in `dfbench`.
-Currently supported benchmarks include: Clickbench, H2o, Imdb, SortTpch, Tpch
+Currently supported benchmarks include: Clickbench, H2o, Imdb, SortTpch, Tpch, TPCDS
 
 Before running benchmarks, `mem_profile` automatically compiles the benchmark binary (`dfbench`) using `cargo build`. Note that the build profile used for `dfbench` is not tied to the profile used for running `mem_profile` itself. We can explicitly specify the desired build profile using the `--bench-profile` option (e.g. release-nonlto). By prebuilding the binary and running each query in a separate process, we can ensure accurate memory statistics.
 
 Currently, `mem_profile` only supports `mimalloc` as the memory allocator, since it relies on `mimalloc`'s API to collect memory statistics.
 
-Because it runs the compiled binary directly from the target directory, make sure your working directory is the top-level datafusion/ directory, where the target/ is also located. 
+Because it runs the compiled binary directly from the target directory, make sure your working directory is the top-level datafusion/ directory, where the target/ is also located.
+
+The benchmark subcommand (e.g., `tpch`) and all following arguments are passed directly to `dfbench`. Be sure to specify `--bench-profile` before the benchmark subcommand.
 
-The benchmark subcommand (e.g., `tpch`) and all following arguments are passed directly to `dfbench`. Be sure to specify `--bench-profile` before the benchmark subcommand. 
+Example:
 
-Example: 
 ```shell
 datafusion$ cargo run --profile release-nonlto --bin mem_profile -- --bench-profile release-nonlto tpch --path benchmarks/data/tpch_sf1 --partitions 4 --format parquet
 ```
+
 Example Output:
+
 ```
 Query     Time (ms)     Peak RSS  Peak Commit  Major Page Faults
 ----------------------------------------------------------------
@@ -402,19 +400,21 @@ Query     Time (ms)     Peak RSS  Peak Commit  Major Page Faults
 ```
 
 ## Reported Metrics
+
 When running benchmarks, `mem_profile` collects several memory-related statistics using the mimalloc API:
 
-- Peak RSS (Resident Set Size): 
-The maximum amount of physical memory used by the process.
-This is a process-level metric collected via OS-specific mechanisms and is not mimalloc-specific.
+- Peak RSS (Resident Set Size):
+  The maximum amount of physical memory used by the process.
+  This is a process-level metric collected via OS-specific mechanisms and is not mimalloc-specific.
 
 - Peak Commit:
-The peak amount of memory committed by the allocator (i.e., total virtual memory reserved).
-This is mimalloc-specific. It gives a more allocator-aware view of memory usage than RSS.
+  The peak amount of memory committed by the allocator (i.e., total virtual memory reserved).
+  This is mimalloc-specific. It gives a more allocator-aware view of memory usage than RSS.
 
 - Major Page Faults:
-The number of major page faults triggered during execution.
-This metric is obtained from the operating system and is not mimalloc-specific.
+  The number of major page faults triggered during execution.
+  This metric is obtained from the operating system and is not mimalloc-specific.
+
 # Writing a new benchmark
 
 ## Creating or downloading data outside of the benchmark
@@ -603,6 +603,34 @@ This benchmarks is derived from the [TPC-H][1] version
 [2]: https://github.com/databricks/tpch-dbgen.git,
 [2.17.1]: https://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.17.1.pdf
 
+## TPCDS
+
+Run the tpcds benchmark.
+
+For data please clone `datafusion-benchmarks` repo which contains the predefined parquet data with SF1.
+
+```shell
+git clone https://github.com/apache/datafusion-benchmarks
+```
+
+Then run the benchmark with the following command:
+
+```shell
+DATA_DIR=../../datafusion-benchmarks/tpcds/data/sf1/ ./benchmarks/bench.sh run tpcds
+```
+
+Alternatively benchmark the specific query
+
+```shell
+DATA_DIR=../../datafusion-benchmarks/tpcds/data/sf1/ ./benchmarks/bench.sh run tpcds 30
+```
+
+More help
+
+```shell
+cargo run --release --bin dfbench -- tpcds --help
+```
+
 ## External Aggregation
 
 Run the benchmark for aggregations with limited memory.
@@ -762,7 +790,7 @@ Different queries are included to test nested loop joins under various workloads
 
 ## Hash Join
 
-This benchmark focuses on the performance of queries with nested hash joins, minimizing other overheads such as scanning data sources or evaluating predicates.
+This benchmark focuses on the performance of queries with hash joins, minimizing other overheads such as scanning data sources or evaluating predicates.
 
 Several queries are included to test hash joins under various workloads.
 
@@ -774,6 +802,19 @@ Several queries are included to test hash joins under various workloads.
 ./bench.sh run hj
 ```
 
+## Sort Merge Join
+
+This benchmark focuses on the performance of queries with sort merge joins joins, minimizing other overheads such as scanning data sources or evaluating predicates.
+
+Several queries are included to test sort merge joins under various workloads.
+
+### Example Run
+
+```bash
+# No need to generate data: this benchmark uses table function `range()` as the data source
+
+./bench.sh run smj
+```
 ## Cancellation
 
 Test performance of cancelling queries.
@@ -804,3 +845,41 @@ Getting results...
 cancelling thread
 done dropping runtime in 83.531417ms
 ```
+
+## Sorted Data Benchmarks
+
+### Data Sorted ClickBench
+
+Benchmark for queries on pre-sorted data to test sort order optimization.
+This benchmark uses a subset of the ClickBench dataset (hits.parquet, ~14GB) that has been pre-sorted by the EventTime column. The queries are designed to test DataFusion's performance when the data is already sorted as is common in timeseries workloads.
+
+The benchmark includes queries that:
+- Scan pre-sorted data with ORDER BY clauses that match the sort order
+- Test reverse scans on sorted data
+- Verify the performance result
+
+#### Generating Sorted Data
+
+The sorted dataset is automatically generated from the ClickBench partitioned dataset. You can configure the memory used during the sorting process with the `DATAFUSION_MEMORY_GB` environment variable. The default memory limit is 12GB.
+```bash
+./bench.sh data data_sorted_clickbench
+```
+
+To create the sorted dataset, for example with 16GB of memory, run:
+
+```bash
+DATAFUSION_MEMORY_GB=16 ./bench.sh data data_sorted_clickbench
+```
+
+This command will:
+1. Download the ClickBench partitioned dataset if not present
+2. Sort hits.parquet by EventTime in ascending order
+3. Save the sorted file as hits_sorted.parquet
+
+#### Running the Benchmark
+
+```bash
+./bench.sh run data_sorted_clickbench
+```
+
+This runs queries against the pre-sorted dataset with the `--sorted-by EventTime` flag, which informs DataFusion that the data is pre-sorted, allowing it to optimize away redundant sort operations.
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index dbfd319dd9ad4..d5fa52d7f00ee 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -87,6 +87,9 @@ tpch10:                 TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB),
 tpch_csv10:             TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single csv file per table, hash join
 tpch_mem10:             TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
 
+# TPC-DS Benchmarks
+tpcds:                  TPCDS inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, hash join
+
 # Extended TPC-H Benchmarks
 sort_tpch:              Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=1)
 sort_tpch10:            Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=10)
@@ -99,6 +102,9 @@ clickbench_partitioned: ClickBench queries against partitioned (100 files) parqu
 clickbench_pushdown:    ClickBench queries against partitioned (100 files) parquet w/ filter_pushdown enabled
 clickbench_extended:    ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
 
+# Sorted Data Benchmarks (ORDER BY Optimization)
+clickbench_sorted:     ClickBench queries on pre-sorted data using prefer_existing_sort (tests sort elimination optimization)
+
 # H2O.ai Benchmarks (Group By, Join, Window)
 h2o_small:                      h2oai benchmark with small dataset (1e7 rows) for groupby,  default file format is csv
 h2o_medium:                     h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv
@@ -126,6 +132,7 @@ imdb:                   Join Order Benchmark (JOB) using the IMDB dataset conver
 cancellation:           How long cancelling a query takes
 nlj:                    Benchmark for simple nested loop joins, testing various join scenarios
 hj:                     Benchmark for simple hash joins, testing various join scenarios
+smj:                    Benchmark for simple sort merge joins, testing various join scenarios
 compile_profile:        Compile and execute TPC-H across selected Cargo profiles, reporting timing and binary size
 
 
@@ -189,8 +196,8 @@ main() {
             echo "***************************"
             case "$BENCHMARK" in
                 all)
-                    data_tpch "1"
-                    data_tpch "10"
+                    data_tpch "1" "parquet"
+                    data_tpch "10" "parquet"
                     data_h2o "SMALL"
                     data_h2o "MEDIUM"
                     data_h2o "BIG"
@@ -203,18 +210,25 @@ main() {
                     # nlj uses range() function, no data generation needed
                     ;;
                 tpch)
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
                     ;;
                 tpch_mem)
-                    # same data as for tpch
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
+                    ;;
+                tpch_csv)
+                    data_tpch "1" "csv"
                     ;;
                 tpch10)
-                    data_tpch "10"
+                    data_tpch "10" "parquet"
                     ;;
                 tpch_mem10)
-                    # same data as for tpch10
-                    data_tpch "10"
+                    data_tpch "10" "parquet"
+                    ;;
+                tpch_csv10)
+                    data_tpch "10" "csv"
+                    ;;
+                tpcds)
+                    data_tpcds
                     ;;
                 clickbench_1)
                     data_clickbench_1
@@ -289,19 +303,19 @@ main() {
                     ;;
                 external_aggr)
                     # same data as for tpch
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
                     ;;
                 sort_tpch)
                     # same data as for tpch
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
                     ;;
                 sort_tpch10)
                     # same data as for tpch10
-                    data_tpch "10"
+                    data_tpch "10" "parquet"
                     ;;
                 topk_tpch)
                     # same data as for tpch
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
                     ;;
                 nlj)
                     # nlj uses range() function, no data generation needed
@@ -311,8 +325,15 @@ main() {
                     # hj uses range() function, no data generation needed
                     echo "HJ benchmark does not require data generation"
                     ;;
+                smj)
+                    # smj uses range() function, no data generation needed
+                    echo "SMJ benchmark does not require data generation"
+                    ;;
                 compile_profile)
-                    data_tpch "1"
+                    data_tpch "1" "parquet"
+                    ;;
+                clickbench_sorted)
+                    clickbench_sorted
                     ;;
                 *)
                     echo "Error: unknown benchmark '$BENCHMARK' for data generation"
@@ -384,6 +405,8 @@ main() {
                     run_external_aggr
                     run_nlj
                     run_hj
+                    run_tpcds
+                    run_smj
                     ;;
                 tpch)
                     run_tpch "1" "parquet"
@@ -403,6 +426,9 @@ main() {
                 tpch_mem10)
                     run_tpch_mem "10"
                     ;;
+                tpcds)
+                    run_tpcds
+                    ;;
                 cancellation)
                     run_cancellation
                     ;;
@@ -445,7 +471,7 @@ main() {
                 h2o_medium_window)
                     run_h2o_window "MEDIUM" "CSV" "window"
                     ;;
-                h2o_big_window) 
+                h2o_big_window)
                     run_h2o_window "BIG" "CSV" "window"
                     ;;
                 h2o_small_parquet)
@@ -494,9 +520,15 @@ main() {
                 hj)
                     run_hj
                     ;;
+                smj)
+                    run_smj
+                    ;;
                 compile_profile)
                     run_compile_profile "${PROFILE_ARGS[@]}"
                     ;;
+                clickbench_sorted)
+                    run_clickbench_sorted
+                    ;;
                 *)
                     echo "Error: unknown benchmark '$BENCHMARK' for run"
                     usage
@@ -529,7 +561,7 @@ main() {
 # Creates TPCH data at a certain scale factor, if it doesn't already
 # exist
 #
-# call like: data_tpch($scale_factor)
+# call like: data_tpch($scale_factor, format)
 #
 # Creates data in $DATA_DIR/tpch_sf1 for scale factor 1
 # Creates data in $DATA_DIR/tpch_sf10 for scale factor 10
@@ -540,20 +572,23 @@ data_tpch() {
         echo "Internal error: Scale factor not specified"
         exit 1
     fi
+    FORMAT=$2
+    if [ -z "$FORMAT" ] ; then
+        echo "Internal error: Format not specified"
+        exit 1
+    fi
 
     TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
-    echo "Creating tpch dataset at Scale Factor ${SCALE_FACTOR} in ${TPCH_DIR}..."
+    echo "Creating tpch $FORMAT dataset at Scale Factor ${SCALE_FACTOR} in ${TPCH_DIR}..."
 
     # Ensure the target data directory exists
     mkdir -p "${TPCH_DIR}"
 
-    # Create 'tbl' (CSV format) data into $DATA_DIR if it does not already exist
-    FILE="${TPCH_DIR}/supplier.tbl"
-    if test -f "${FILE}"; then
-        echo " tbl files exist ($FILE exists)."
-    else
-        echo " creating tbl files with tpch_dbgen..."
-        docker run -v "${TPCH_DIR}":/data -it --rm ghcr.io/scalytics/tpch-docker:main -vf -s "${SCALE_FACTOR}"
+    # check if tpchgen-cli is installed
+    if ! command -v tpchgen-cli &> /dev/null
+    then
+        echo "tpchgen-cli could not be found, please install it via 'cargo install tpchgen-cli'"
+        exit 1
     fi
 
     # Copy expected answers into the ./data/answers directory if it does not already exist
@@ -566,27 +601,52 @@ data_tpch() {
         docker run -v "${TPCH_DIR}":/data -it --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main  -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
     fi
 
-    # Create 'parquet' files from tbl
-    FILE="${TPCH_DIR}/supplier"
-    if test -d "${FILE}"; then
-        echo " parquet files exist ($FILE exists)."
-    else
-        echo " creating parquet files using benchmark binary ..."
-        pushd "${SCRIPT_DIR}" > /dev/null
-        $CARGO_COMMAND --bin tpch -- convert --input "${TPCH_DIR}" --output "${TPCH_DIR}" --format parquet
-        popd > /dev/null
+    if [ "$FORMAT" = "parquet" ]; then
+      # Create 'parquet' files, one directory per file
+      FILE="${TPCH_DIR}/supplier"
+      if test -d "${FILE}"; then
+          echo " parquet files exist ($FILE exists)."
+      else
+          echo " creating parquet files using tpchgen-cli ..."
+          tpchgen-cli --scale-factor "${SCALE_FACTOR}" --format parquet --parquet-compression='ZSTD(1)' --parts=1 --output-dir "${TPCH_DIR}"
+      fi
+      return
     fi
 
-    # Create 'csv' files from tbl
-    FILE="${TPCH_DIR}/csv/supplier"
-    if test -d "${FILE}"; then
-        echo " csv files exist ($FILE exists)."
-    else
-        echo " creating csv files using benchmark binary ..."
-        pushd "${SCRIPT_DIR}" > /dev/null
-        $CARGO_COMMAND --bin tpch -- convert --input "${TPCH_DIR}" --output "${TPCH_DIR}/csv" --format csv
-        popd > /dev/null
+    # Create 'csv' files, one directory per file
+    if [ "$FORMAT" = "csv" ]; then
+      FILE="${TPCH_DIR}/csv/supplier"
+      if test -d "${FILE}"; then
+          echo " csv files exist ($FILE exists)."
+      else
+          echo " creating csv files using tpchgen-cli binary ..."
+          tpchgen-cli --scale-factor "${SCALE_FACTOR}" --format csv --parts=1 --output-dir "${TPCH_DIR}/csv"
+      fi
+      return
+    fi
+
+    echo "Error: unknown format '$FORMAT' for tpch data generation, expected 'parquet' or 'csv'"
+    exit 1
+}
+
+# Downloads TPC-DS data
+data_tpcds() {
+    TPCDS_DIR="${DATA_DIR}/tpcds_sf1"
+
+    # Check if `web_site.parquet` exists in the TPCDS data directory to verify data presence
+    echo "Checking TPC-DS data directory: ${TPCDS_DIR}"
+    if [ ! -f "${TPCDS_DIR}/web_site.parquet" ]; then
+        mkdir -p "${TPCDS_DIR}"
+        # Download the DataFusion benchmarks repository zip if it is not already downloaded
+        if [ ! -f "${DATA_DIR}/datafusion-benchmarks.zip" ]; then
+          echo "Downloading DataFusion benchmarks repository zip to: ${DATA_DIR}/datafusion-benchmarks.zip"
+          wget --timeout=30 --tries=3 -O "${DATA_DIR}/datafusion-benchmarks.zip" https://github.com/apache/datafusion-benchmarks/archive/refs/heads/main.zip
+        fi
+        echo "Extracting TPC-DS parquet data to ${TPCDS_DIR}..."
+        unzip -o -j -d "${TPCDS_DIR}" "${DATA_DIR}/datafusion-benchmarks.zip" datafusion-benchmarks-main/tpcds/data/sf1/*
+        echo "TPC-DS data extracted."
     fi
+    echo "Done."
 }
 
 # Runs the tpch benchmark
@@ -603,10 +663,10 @@ run_tpch() {
     echo "Running tpch benchmark..."
 
     FORMAT=$2
-    debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" ${QUERY_ARG}
+    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" ${QUERY_ARG}
 }
 
-# Runs the tpch in memory
+# Runs the tpch in memory (needs tpch parquet data)
 run_tpch_mem() {
     SCALE_FACTOR=$1
     if [ -z "$SCALE_FACTOR" ] ; then
@@ -619,7 +679,27 @@ run_tpch_mem() {
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch_mem benchmark..."
     # -m means in memory
-    debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG}
+    debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG}
+}
+
+# Runs the tpcds benchmark
+run_tpcds() {
+    TPCDS_DIR="${DATA_DIR}/tpcds_sf1"
+
+    # Check if TPCDS data directory and representative file exists
+    if [ ! -f "${TPCDS_DIR}/web_site.parquet" ]; then
+        echo "" >&2
+        echo "Please prepare TPC-DS data first by following instructions:" >&2
+        echo "  ./bench.sh data tpcds" >&2
+        echo "" >&2
+        exit 1
+    fi
+
+    RESULTS_FILE="${RESULTS_DIR}/tpcds_sf1.json"
+    echo "RESULTS_FILE: ${RESULTS_FILE}"
+    echo "Running tpcds benchmark..."
+
+    debug_run $CARGO_COMMAND --bin dfbench -- tpcds --iterations 5 --path "${TPCDS_DIR}" --query_path "../datafusion/core/tests/tpc-ds" --prefer_hash_join "${PREFER_HASH_JOIN}" -o "${RESULTS_FILE}" ${QUERY_ARG}
 }
 
 # Runs the compile profile benchmark helper
@@ -1154,6 +1234,14 @@ run_hj() {
     debug_run $CARGO_COMMAND --bin dfbench -- hj --iterations 5 -o "${RESULTS_FILE}" ${QUERY_ARG}
 }
 
+# Runs the smj benchmark
+run_smj() {
+    RESULTS_FILE="${RESULTS_DIR}/smj.json"
+    echo "RESULTS_FILE: ${RESULTS_FILE}"
+    echo "Running smj benchmark..."
+    debug_run $CARGO_COMMAND --bin dfbench -- smj --iterations 5 -o "${RESULTS_FILE}" ${QUERY_ARG}
+}
+
 
 compare_benchmarks() {
     BASE_RESULTS_DIR="${SCRIPT_DIR}/results"
@@ -1189,6 +1277,113 @@ compare_benchmarks() {
 
 }
 
+# Creates sorted ClickBench data from hits.parquet (full dataset)
+# The data is sorted by EventTime in ascending order
+# Uses datafusion-cli to reduce dependencies
+clickbench_sorted() {
+    SORTED_FILE="${DATA_DIR}/hits_sorted.parquet"
+    ORIGINAL_FILE="${DATA_DIR}/hits.parquet"
+
+    # Default memory limit is 12GB, can be overridden with DATAFUSION_MEMORY_GB env var
+    MEMORY_LIMIT_GB=${DATAFUSION_MEMORY_GB:-12}
+
+    echo "Creating sorted ClickBench dataset from hits.parquet..."
+    echo "Configuration:"
+    echo "  Memory limit: ${MEMORY_LIMIT_GB}G"
+    echo "  Row group size: 64K rows"
+    echo "  Compression: uncompressed"
+
+    if [ ! -f "${ORIGINAL_FILE}" ]; then
+        echo "hits.parquet not found. Running data_clickbench_1 first..."
+        data_clickbench_1
+    fi
+
+    if [ -f "${SORTED_FILE}" ]; then
+        echo "Sorted hits.parquet already exists at ${SORTED_FILE}"
+        return 0
+    fi
+
+    echo "Sorting hits.parquet by EventTime (this may take several minutes)..."
+
+    pushd "${DATAFUSION_DIR}" > /dev/null
+    echo "Building datafusion-cli..."
+    cargo build --release --bin datafusion-cli
+    DATAFUSION_CLI="${DATAFUSION_DIR}/target/release/datafusion-cli"
+    popd > /dev/null
+
+
+    START_TIME=$(date +%s)
+    echo "Start time: $(date '+%Y-%m-%d %H:%M:%S')"
+    echo "Using datafusion-cli to create sorted parquet file..."
+    "${DATAFUSION_CLI}" << EOF
+-- Memory and performance configuration
+SET datafusion.runtime.memory_limit = '${MEMORY_LIMIT_GB}G';
+SET datafusion.execution.spill_compression = 'uncompressed';
+SET datafusion.execution.sort_spill_reservation_bytes = 10485760; -- 10MB
+SET datafusion.execution.batch_size = 8192;
+SET datafusion.execution.target_partitions = 1;
+
+-- Parquet output configuration
+SET datafusion.execution.parquet.max_row_group_size = 65536;
+SET datafusion.execution.parquet.compression = 'uncompressed';
+
+-- Execute sort and write
+COPY (SELECT * FROM '${ORIGINAL_FILE}' ORDER BY "EventTime")
+TO '${SORTED_FILE}'
+STORED AS PARQUET;
+EOF
+
+    local result=$?
+
+    END_TIME=$(date +%s)
+    DURATION=$((END_TIME - START_TIME))
+    echo "End time: $(date '+%Y-%m-%d %H:%M:%S')"
+
+    if [ $result -eq 0 ]; then
+        echo "✓ Successfully created sorted ClickBench dataset"
+
+        INPUT_SIZE=$(stat -f%z "${ORIGINAL_FILE}" 2>/dev/null || stat -c%s "${ORIGINAL_FILE}" 2>/dev/null)
+        OUTPUT_SIZE=$(stat -f%z "${SORTED_FILE}" 2>/dev/null || stat -c%s "${SORTED_FILE}" 2>/dev/null)
+        INPUT_MB=$((INPUT_SIZE / 1024 / 1024))
+        OUTPUT_MB=$((OUTPUT_SIZE / 1024 / 1024))
+
+        echo "  Input:  ${INPUT_MB} MB"
+        echo "  Output: ${OUTPUT_MB} MB"
+
+        echo ""
+        echo "Time Statistics:"
+        echo "  Total duration: ${DURATION} seconds ($(printf '%02d:%02d:%02d' $((DURATION/3600)) $((DURATION%3600/60)) $((DURATION%60))))"
+        echo "  Throughput: $((INPUT_MB / DURATION)) MB/s"
+
+        return 0
+    else
+        echo "✗ Error: Failed to create sorted dataset"
+        echo "💡 Tip: Try increasing memory with: DATAFUSION_MEMORY_GB=16 ./bench.sh data clickbench_sorted"
+        return 1
+    fi
+}
+
+# Runs the sorted data benchmark with prefer_existing_sort configuration
+run_clickbench_sorted() {
+    RESULTS_FILE="${RESULTS_DIR}/clickbench_sorted.json"
+    echo "RESULTS_FILE: ${RESULTS_FILE}"
+    echo "Running sorted data benchmark with prefer_existing_sort optimization..."
+
+    # Ensure sorted data exists
+    clickbench_sorted
+
+    # Run benchmark with prefer_existing_sort configuration
+    # This allows DataFusion to optimize away redundant sorts while maintaining parallelism
+    debug_run $CARGO_COMMAND --bin dfbench -- clickbench \
+        --iterations 5 \
+        --path "${DATA_DIR}/hits_sorted.parquet" \
+        --queries-path "${SCRIPT_DIR}/queries/clickbench/queries/sorted_data" \
+        --sorted-by "EventTime" \
+        -c datafusion.optimizer.prefer_existing_sort=true \
+        -o "${RESULTS_FILE}" \
+        ${QUERY_ARG}
+}
+
 setup_venv() {
     python3 -m venv "$VIRTUAL_ENV"
     PATH=$VIRTUAL_ENV/bin:$PATH python3 -m pip install -r requirements.txt
diff --git a/benchmarks/compare_tpcds.sh b/benchmarks/compare_tpcds.sh
new file mode 100755
index 0000000000000..48331a7c7510e
--- /dev/null
+++ b/benchmarks/compare_tpcds.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Compare TPC-DS benchmarks between two branches
+
+set -e
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+usage() {
+    echo "Usage: $0 <branch1> <branch2>"
+    echo ""
+    echo "Example: $0 main dev2"
+    echo ""
+    echo "Note: TPC-DS benchmarks are not currently implemented in bench.sh"
+    exit 1
+}
+
+BRANCH1=${1:-""}
+BRANCH2=${2:-""}
+
+if [ -z "$BRANCH1" ] || [ -z "$BRANCH2" ]; then
+    usage
+fi
+
+# Store current branch
+CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+
+echo "Comparing TPC-DS benchmarks: ${BRANCH1} vs ${BRANCH2}"
+
+# Run benchmark on first branch
+git checkout "$BRANCH1"
+./benchmarks/bench.sh run tpcds
+
+# Run benchmark on second branch
+git checkout "$BRANCH2"
+./benchmarks/bench.sh run tpcds
+
+# Compare results
+./benchmarks/bench.sh compare "$BRANCH1" "$BRANCH2"
+
+# Return to original branch
+git checkout "$CURRENT_BRANCH"
\ No newline at end of file
diff --git a/benchmarks/compare_tpch.sh b/benchmarks/compare_tpch.sh
new file mode 100755
index 0000000000000..85e8da29ce41d
--- /dev/null
+++ b/benchmarks/compare_tpch.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Compare TPC-H benchmarks between two branches
+
+set -e
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+usage() {
+    echo "Usage: $0 <branch1> <branch2>"
+    echo ""
+    echo "Example: $0 main dev2"
+    exit 1
+}
+
+BRANCH1=${1:-""}
+BRANCH2=${2:-""}
+
+if [ -z "$BRANCH1" ] || [ -z "$BRANCH2" ]; then
+    usage
+fi
+
+# Store current branch
+CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+
+echo "Comparing TPC-H benchmarks: ${BRANCH1} vs ${BRANCH2}"
+
+# Run benchmark on first branch
+git checkout "$BRANCH1"
+./benchmarks/bench.sh run tpch
+
+# Run benchmark on second branch
+git checkout "$BRANCH2"
+./benchmarks/bench.sh run tpch
+
+# Compare results
+./benchmarks/bench.sh compare "$BRANCH1" "$BRANCH2"
+
+# Return to original branch
+git checkout "$CURRENT_BRANCH"
\ No newline at end of file
diff --git a/benchmarks/queries/clickbench/queries/sorted_data/q0.sql b/benchmarks/queries/clickbench/queries/sorted_data/q0.sql
new file mode 100644
index 0000000000000..1170a383bcb22
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/sorted_data/q0.sql
@@ -0,0 +1,3 @@
+-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591
+-- set datafusion.execution.parquet.binary_as_string = true
+SELECT * FROM hits ORDER BY "EventTime" DESC limit 10;
diff --git a/benchmarks/src/bin/dfbench.rs b/benchmarks/src/bin/dfbench.rs
index 816cae0e38555..d842d306c1f65 100644
--- a/benchmarks/src/bin/dfbench.rs
+++ b/benchmarks/src/bin/dfbench.rs
@@ -34,7 +34,7 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
 use datafusion_benchmarks::{
-    cancellation, clickbench, h2o, hj, imdb, nlj, sort_tpch, tpch,
+    cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_tpch, tpcds, tpch,
 };
 
 #[derive(Debug, StructOpt)]
@@ -46,9 +46,10 @@ enum Options {
     HJ(hj::RunOpt),
     Imdb(imdb::RunOpt),
     Nlj(nlj::RunOpt),
+    Smj(smj::RunOpt),
     SortTpch(sort_tpch::RunOpt),
     Tpch(tpch::RunOpt),
-    TpchConvert(tpch::ConvertOpt),
+    Tpcds(tpcds::RunOpt),
 }
 
 // Main benchmark runner entrypoint
@@ -63,8 +64,9 @@ pub async fn main() -> Result<()> {
         Options::HJ(opt) => opt.run().await,
         Options::Imdb(opt) => Box::pin(opt.run()).await,
         Options::Nlj(opt) => opt.run().await,
+        Options::Smj(opt) => opt.run().await,
         Options::SortTpch(opt) => opt.run().await,
         Options::Tpch(opt) => Box::pin(opt.run()).await,
-        Options::TpchConvert(opt) => opt.run().await,
+        Options::Tpcds(opt) => Box::pin(opt.run()).await,
     }
 }
diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs
index 46b6cc9a80b24..2bc2bd4458a53 100644
--- a/benchmarks/src/bin/external_aggr.rs
+++ b/benchmarks/src/bin/external_aggr.rs
@@ -33,17 +33,17 @@ use datafusion::datasource::listing::{
 };
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::error::Result;
+use datafusion::execution::SessionStateBuilder;
 use datafusion::execution::memory_pool::FairSpillPool;
-use datafusion::execution::memory_pool::{human_readable_size, units};
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
-use datafusion::execution::SessionStateBuilder;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
 use datafusion::prelude::*;
 use datafusion_benchmarks::util::{BenchmarkRun, CommonOpt, QueryResult};
 use datafusion_common::instant::Instant;
 use datafusion_common::utils::get_available_parallelism;
-use datafusion_common::{exec_err, DEFAULT_PARQUET_EXTENSION};
+use datafusion_common::{DEFAULT_PARQUET_EXTENSION, exec_err};
+use datafusion_common::{human_readable_size, units};
 
 #[derive(Debug, StructOpt)]
 #[structopt(
diff --git a/benchmarks/src/bin/mem_profile.rs b/benchmarks/src/bin/mem_profile.rs
index 16fc3871bec86..025efefe062e1 100644
--- a/benchmarks/src/bin/mem_profile.rs
+++ b/benchmarks/src/bin/mem_profile.rs
@@ -199,21 +199,18 @@ fn run_query(args: &[String], results: &mut Vec<QueryResult>) -> Result<()> {
 
     // Look for lines that contain execution time / memory stats
     while let Some(line) = iter.next() {
-        if let Some((query, duration_ms)) = parse_query_time(line) {
-            if let Some(next_line) = iter.peek() {
-                if let Some((peak_rss, peak_commit, page_faults)) =
-                    parse_vm_line(next_line)
-                {
-                    results.push(QueryResult {
-                        query,
-                        duration_ms,
-                        peak_rss,
-                        peak_commit,
-                        page_faults,
-                    });
-                    break;
-                }
-            }
+        if let Some((query, duration_ms)) = parse_query_time(line)
+            && let Some(next_line) = iter.peek()
+            && let Some((peak_rss, peak_commit, page_faults)) = parse_vm_line(next_line)
+        {
+            results.push(QueryResult {
+                query,
+                duration_ms,
+                peak_rss,
+                peak_commit,
+                page_faults,
+            });
+            break;
         }
     }
 
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
deleted file mode 100644
index ca2bb8e57c0ec..0000000000000
--- a/benchmarks/src/bin/tpch.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! tpch binary only entrypoint
-
-use datafusion::error::Result;
-use datafusion_benchmarks::tpch;
-use structopt::StructOpt;
-
-#[cfg(all(feature = "snmalloc", feature = "mimalloc"))]
-compile_error!(
-    "feature \"snmalloc\" and feature \"mimalloc\" cannot be enabled at the same time"
-);
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[cfg(feature = "mimalloc")]
-#[global_allocator]
-static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
-
-#[derive(Debug, StructOpt)]
-#[structopt(about = "benchmark command")]
-enum BenchmarkSubCommandOpt {
-    #[structopt(name = "datafusion")]
-    DataFusionBenchmark(tpch::RunOpt),
-}
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "TPC-H", about = "TPC-H Benchmarks.")]
-enum TpchOpt {
-    Benchmark(BenchmarkSubCommandOpt),
-    Convert(tpch::ConvertOpt),
-}
-
-/// 'tpch' entry point, with tortured command line arguments.  Please
-/// use `dbbench` instead.
-///
-/// Note: this is kept to be backwards compatible with the benchmark names prior to
-/// <https://github.com/apache/datafusion/issues/6994>
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-    match TpchOpt::from_args() {
-        TpchOpt::Benchmark(BenchmarkSubCommandOpt::DataFusionBenchmark(opt)) => {
-            Box::pin(opt.run()).await
-        }
-        TpchOpt::Convert(opt) => opt.run().await,
-    }
-}
diff --git a/benchmarks/src/cancellation.rs b/benchmarks/src/cancellation.rs
index fcf03fbc54550..1b4c04b409ccd 100644
--- a/benchmarks/src/cancellation.rs
+++ b/benchmarks/src/cancellation.rs
@@ -25,22 +25,22 @@ use arrow::array::Array;
 use arrow::datatypes::DataType;
 use arrow::record_batch::RecordBatch;
 use datafusion::common::{Result, ScalarValue};
-use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::{ListingOptions, ListingTableUrl};
-use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::execution::TaskContext;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::prelude::*;
 use datafusion_common::instant::Instant;
 use futures::TryStreamExt;
 use object_store::ObjectStore;
-use parquet::arrow::async_writer::ParquetObjectWriter;
 use parquet::arrow::AsyncArrowWriter;
+use parquet::arrow::async_writer::ParquetObjectWriter;
+use rand::Rng;
 use rand::distr::Alphanumeric;
 use rand::rngs::ThreadRng;
-use rand::Rng;
 use structopt::StructOpt;
 use tokio::runtime::Runtime;
 use tokio_util::sync::CancellationToken;
diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
index a550503390c54..9036e7d9501ec 100644
--- a/benchmarks/src/clickbench.rs
+++ b/benchmarks/src/clickbench.rs
@@ -19,7 +19,7 @@ use std::fs;
 use std::io::ErrorKind;
 use std::path::{Path, PathBuf};
 
-use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt, QueryResult};
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
 use datafusion::logical_expr::{ExplainFormat, ExplainOption};
 use datafusion::{
     error::{DataFusionError, Result},
@@ -78,6 +78,27 @@ pub struct RunOpt {
     /// If present, write results json here
     #[structopt(parse(from_os_str), short = "o", long = "output")]
     output_path: Option<PathBuf>,
+
+    /// Column name that the data is sorted by (e.g., "EventTime")
+    /// If specified, DataFusion will be informed that the data has this sort order
+    /// using CREATE EXTERNAL TABLE with WITH ORDER clause.
+    ///
+    /// Recommended to use with: -c datafusion.optimizer.prefer_existing_sort=true
+    /// This allows DataFusion to optimize away redundant sorts while maintaining
+    /// multi-core parallelism for other operations.
+    #[structopt(long = "sorted-by")]
+    sorted_by: Option<String>,
+
+    /// Sort order: ASC or DESC (default: ASC)
+    #[structopt(long = "sort-order", default_value = "ASC")]
+    sort_order: String,
+
+    /// Configuration options in the format key=value
+    /// Can be specified multiple times.
+    ///
+    /// Example: -c datafusion.optimizer.prefer_existing_sort=true
+    #[structopt(short = "c", long = "config")]
+    config_options: Vec<String>,
 }
 
 /// Get the SQL file path
@@ -125,6 +146,39 @@ impl RunOpt {
 
         // configure parquet options
         let mut config = self.common.config()?;
+
+        if self.sorted_by.is_some() {
+            println!("ℹ️  Data is registered with sort order");
+
+            let has_prefer_sort = self
+                .config_options
+                .iter()
+                .any(|opt| opt.contains("prefer_existing_sort=true"));
+
+            if !has_prefer_sort {
+                println!(
+                    "ℹ️  Consider using -c datafusion.optimizer.prefer_existing_sort=true"
+                );
+                println!("ℹ️  to optimize queries while maintaining parallelism");
+            }
+        }
+
+        // Apply user-provided configuration options
+        for config_opt in &self.config_options {
+            let parts: Vec<&str> = config_opt.splitn(2, '=').collect();
+            if parts.len() != 2 {
+                return Err(exec_datafusion_err!(
+                    "Invalid config option format: '{}'. Expected 'key=value'",
+                    config_opt
+                ));
+            }
+            let key = parts[0];
+            let value = parts[1];
+
+            println!("Setting config: {key} = {value}");
+            config = config.set_str(key, value);
+        }
+
         {
             let parquet_options = &mut config.options_mut().execution.parquet;
             // The hits_partitioned dataset specifies string columns
@@ -136,10 +190,18 @@ impl RunOpt {
                 parquet_options.pushdown_filters = true;
                 parquet_options.reorder_filters = true;
             }
+
+            if self.sorted_by.is_some() {
+                // We should compare the dynamic topk optimization when data is sorted, so we make the
+                // assumption that filter pushdown is also enabled in this case.
+                parquet_options.pushdown_filters = true;
+                parquet_options.reorder_filters = true;
+            }
         }
 
         let rt_builder = self.common.runtime_env_builder()?;
         let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?);
+
         self.register_hits(&ctx).await?;
 
         let mut benchmark_run = BenchmarkRun::new();
@@ -214,17 +276,54 @@ impl RunOpt {
     }
 
     /// Registers the `hits.parquet` as a table named `hits`
+    /// If sorted_by is specified, uses CREATE EXTERNAL TABLE with WITH ORDER
     async fn register_hits(&self, ctx: &SessionContext) -> Result<()> {
-        let options = Default::default();
         let path = self.path.as_os_str().to_str().unwrap();
-        ctx.register_parquet("hits", path, options)
-            .await
-            .map_err(|e| {
-                DataFusionError::Context(
-                    format!("Registering 'hits' as {path}"),
-                    Box::new(e),
-                )
-            })
+
+        // If sorted_by is specified, use CREATE EXTERNAL TABLE with WITH ORDER
+        if let Some(ref sort_column) = self.sorted_by {
+            println!(
+                "Registering table with sort order: {} {}",
+                sort_column, self.sort_order
+            );
+
+            // Escape column name with double quotes
+            let escaped_column = if sort_column.contains('"') {
+                sort_column.clone()
+            } else {
+                format!("\"{sort_column}\"")
+            };
+
+            // Build CREATE EXTERNAL TABLE DDL with WITH ORDER clause
+            // Schema will be automatically inferred from the Parquet file
+            let create_table_sql = format!(
+                "CREATE EXTERNAL TABLE hits \
+                 STORED AS PARQUET \
+                 LOCATION '{}' \
+                 WITH ORDER ({} {})",
+                path,
+                escaped_column,
+                self.sort_order.to_uppercase()
+            );
+
+            println!("Executing: {create_table_sql}");
+
+            // Execute the CREATE EXTERNAL TABLE statement
+            ctx.sql(&create_table_sql).await?.collect().await?;
+
+            Ok(())
+        } else {
+            // Original registration without sort order
+            let options = Default::default();
+            ctx.register_parquet("hits", path, options)
+                .await
+                .map_err(|e| {
+                    DataFusionError::Context(
+                        format!("Registering 'hits' as {path}"),
+                        Box::new(e),
+                    )
+                })
+        }
     }
 
     fn iterations(&self) -> usize {
diff --git a/benchmarks/src/h2o.rs b/benchmarks/src/h2o.rs
index be74252031194..07a40447d4149 100644
--- a/benchmarks/src/h2o.rs
+++ b/benchmarks/src/h2o.rs
@@ -20,11 +20,11 @@
 //! - [H2O AI Benchmark](https://duckdb.org/2023/04/14/h2oai.html)
 //! - [Extended window function benchmark](https://duckdb.org/2024/06/26/benchmarks-over-time.html#window-functions-benchmark)
 
-use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt};
+use crate::util::{BenchmarkRun, CommonOpt, print_memory_stats};
 use datafusion::logical_expr::{ExplainFormat, ExplainOption};
 use datafusion::{error::Result, prelude::SessionContext};
 use datafusion_common::{
-    exec_datafusion_err, instant::Instant, internal_err, DataFusionError, TableReference,
+    DataFusionError, TableReference, exec_datafusion_err, instant::Instant, internal_err,
 };
 use std::path::{Path, PathBuf};
 use structopt::StructOpt;
diff --git a/benchmarks/src/hj.rs b/benchmarks/src/hj.rs
index 505b322745485..562047f615bc8 100644
--- a/benchmarks/src/hj.rs
+++ b/benchmarks/src/hj.rs
@@ -19,7 +19,7 @@ use crate::util::{BenchmarkRun, CommonOpt, QueryResult};
 use datafusion::physical_plan::execute_stream;
 use datafusion::{error::Result, prelude::SessionContext};
 use datafusion_common::instant::Instant;
-use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError};
+use datafusion_common::{DataFusionError, exec_datafusion_err, exec_err};
 use structopt::StructOpt;
 
 use futures::StreamExt;
diff --git a/benchmarks/src/imdb/convert.rs b/benchmarks/src/imdb/convert.rs
index e7949aa715c23..2c4e1270255bb 100644
--- a/benchmarks/src/imdb/convert.rs
+++ b/benchmarks/src/imdb/convert.rs
@@ -26,8 +26,8 @@ use structopt::StructOpt;
 
 use datafusion::common::not_impl_err;
 
-use super::get_imdb_table_schema;
 use super::IMDB_TABLES;
+use super::get_imdb_table_schema;
 
 #[derive(Debug, StructOpt)]
 pub struct ConvertOpt {
diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs
index 11bd424ba6866..05f1870c5d45a 100644
--- a/benchmarks/src/imdb/run.rs
+++ b/benchmarks/src/imdb/run.rs
@@ -19,16 +19,16 @@ use std::path::PathBuf;
 use std::sync::Arc;
 
 use super::{
-    get_imdb_table_schema, get_query_sql, IMDB_QUERY_END_ID, IMDB_QUERY_START_ID,
-    IMDB_TABLES,
+    IMDB_QUERY_END_ID, IMDB_QUERY_START_ID, IMDB_TABLES, get_imdb_table_schema,
+    get_query_sql,
 };
-use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt, QueryResult};
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
 
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
+use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::file_format::csv::CsvFormat;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
-use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs
index 07cffa5ae468e..a3bc221840ada 100644
--- a/benchmarks/src/lib.rs
+++ b/benchmarks/src/lib.rs
@@ -22,6 +22,8 @@ pub mod h2o;
 pub mod hj;
 pub mod imdb;
 pub mod nlj;
+pub mod smj;
 pub mod sort_tpch;
+pub mod tpcds;
 pub mod tpch;
 pub mod util;
diff --git a/benchmarks/src/nlj.rs b/benchmarks/src/nlj.rs
index 7d1e14f69439c..cbf5a03fbf93d 100644
--- a/benchmarks/src/nlj.rs
+++ b/benchmarks/src/nlj.rs
@@ -19,7 +19,7 @@ use crate::util::{BenchmarkRun, CommonOpt, QueryResult};
 use datafusion::physical_plan::execute_stream;
 use datafusion::{error::Result, prelude::SessionContext};
 use datafusion_common::instant::Instant;
-use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError};
+use datafusion_common::{DataFusionError, exec_datafusion_err, exec_err};
 use structopt::StructOpt;
 
 use futures::StreamExt;
@@ -268,8 +268,8 @@ impl RunOpt {
             let elapsed = start.elapsed();
 
             println!(
-                    "Query {query_name} iteration {i} returned {row_count} rows in {elapsed:?}"
-                );
+                "Query {query_name} iteration {i} returned {row_count} rows in {elapsed:?}"
+            );
 
             query_results.push(QueryResult { elapsed, row_count });
         }
diff --git a/benchmarks/src/smj.rs b/benchmarks/src/smj.rs
new file mode 100644
index 0000000000000..53902e09302c2
--- /dev/null
+++ b/benchmarks/src/smj.rs
@@ -0,0 +1,524 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult};
+use datafusion::physical_plan::execute_stream;
+use datafusion::{error::Result, prelude::SessionContext};
+use datafusion_common::instant::Instant;
+use datafusion_common::{DataFusionError, exec_datafusion_err, exec_err};
+use structopt::StructOpt;
+
+use futures::StreamExt;
+
+/// Run the Sort Merge Join (SMJ) benchmark
+///
+/// This micro-benchmark focuses on the performance characteristics of SMJs.
+///
+/// It uses equality join predicates (to ensure SMJ is selected) and varies:
+/// - Join type: Inner/Left/Right/Full/LeftSemi/LeftAnti/RightSemi/RightAnti
+/// - Key cardinality: 1:1, 1:N, N:M relationships
+/// - Filter selectivity: Low (1%), Medium (10%), High (50%)
+/// - Input sizes: Small to large, balanced and skewed
+///
+/// All inputs are pre-sorted in CTEs before the join to isolate join
+/// performance from sort overhead.
+#[derive(Debug, StructOpt, Clone)]
+#[structopt(verbatim_doc_comment)]
+pub struct RunOpt {
+    /// Query number (between 1 and 20). If not specified, runs all queries
+    #[structopt(short, long)]
+    query: Option<usize>,
+
+    /// Common options
+    #[structopt(flatten)]
+    common: CommonOpt,
+
+    /// If present, write results json here
+    #[structopt(parse(from_os_str), short = "o", long = "output")]
+    output_path: Option<std::path::PathBuf>,
+}
+
+/// Inline SQL queries for SMJ benchmarks
+///
+/// Each query's comment includes:
+///   - Join type
+///   - Left row count × Right row count
+///   - Key cardinality (rows per key)
+///   - Filter selectivity (if applicable)
+const SMJ_QUERIES: &[&str] = &[
+    // Q1: INNER 100K x 100K | 1:1
+    r#"
+        WITH t1_sorted AS (
+            SELECT value as key FROM range(100000) ORDER BY value
+        ),
+        t2_sorted AS (
+            SELECT value as key FROM range(100000) ORDER BY value
+        )
+        SELECT t1_sorted.key as k1, t2_sorted.key as k2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+    "#,
+    // Q2: INNER 100K x 1M | 1:10
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+    "#,
+    // Q3: INNER 1M x 1M | 1:100
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+    "#,
+    // Q4: INNER 100K x 1M | 1:10 | 1%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        WHERE t2_sorted.data % 100 = 0
+    "#,
+    // Q5: INNER 1M x 1M | 1:100 | 10%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        WHERE t1_sorted.data <> t2_sorted.data AND t2_sorted.data % 10 = 0
+    "#,
+    // Q6: LEFT 100K x 1M | 1:10
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10500 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+    "#,
+    // Q7: LEFT 100K x 1M | 1:10 | 50%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        WHERE t2_sorted.data IS NULL OR t2_sorted.data % 2 = 0
+    "#,
+    // Q8: FULL 100K x 100K | 1:10
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 12500 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key as k1, t1_sorted.data as d1,
+               t2_sorted.key as k2, t2_sorted.data as d2
+        FROM t1_sorted FULL JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+    "#,
+    // Q9: FULL 100K x 1M | 1:10 | 10%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key as k1, t1_sorted.data as d1,
+               t2_sorted.key as k2, t2_sorted.data as d2
+        FROM t1_sorted FULL JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        WHERE (t1_sorted.data IS NULL OR t2_sorted.data IS NULL
+               OR t1_sorted.data <> t2_sorted.data)
+          AND (t1_sorted.data IS NULL OR t1_sorted.data % 10 = 0)
+    "#,
+    // Q10: LEFT SEMI 100K x 1M | 1:10
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key
+            FROM range(1000000)
+            ORDER BY key
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+        )
+    "#,
+    // Q11: LEFT SEMI 100K x 1M | 1:10 | 1%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+              AND t2_sorted.data <> t1_sorted.data
+              AND t2_sorted.data % 100 = 0
+        )
+    "#,
+    // Q12: LEFT SEMI 100K x 1M | 1:10 | 50%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+              AND t2_sorted.data <> t1_sorted.data
+              AND t2_sorted.data % 2 = 0
+        )
+    "#,
+    // Q13: LEFT SEMI 100K x 1M | 1:10 | 90%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+              AND t2_sorted.data % 10 <> 0
+        )
+    "#,
+    // Q14: LEFT ANTI 100K x 1M | 1:10
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10500 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key
+            FROM range(1000000)
+            ORDER BY key
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE NOT EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+        )
+    "#,
+    // Q15: LEFT ANTI 100K x 1M | 1:10 | partial match
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 12000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key
+            FROM range(1000000)
+            ORDER BY key
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE NOT EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+        )
+    "#,
+    // Q16: LEFT ANTI 100K x 100K | 1:1 | stress
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 11000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key
+            FROM range(100000)
+            ORDER BY key
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE NOT EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+        )
+    "#,
+    // Q17: INNER 100K x 5M | 1:50 | 5%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(5000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        WHERE t2_sorted.data <> t1_sorted.data AND t2_sorted.data % 20 = 0
+    "#,
+    // Q18: LEFT SEMI 100K x 5M | 1:50 | 2%
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(5000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+              AND t2_sorted.data <> t1_sorted.data
+              AND t2_sorted.data % 50 = 0
+        )
+    "#,
+    // Q19: LEFT ANTI 100K x 5M | 1:50 | partial match
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 15000 as key, value as data
+            FROM range(100000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key
+            FROM range(5000000)
+            ORDER BY key
+        )
+        SELECT t1_sorted.key, t1_sorted.data
+        FROM t1_sorted
+        WHERE NOT EXISTS (
+            SELECT 1 FROM t2_sorted
+            WHERE t2_sorted.key = t1_sorted.key
+        )
+    "#,
+    // Q20: INNER 1M x 10M | 1:100 + GROUP BY
+    r#"
+        WITH t1_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(1000000)
+            ORDER BY key, data
+        ),
+        t2_sorted AS (
+            SELECT value % 10000 as key, value as data
+            FROM range(10000000)
+            ORDER BY key, data
+        )
+        SELECT t1_sorted.key, count(*) as cnt
+        FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
+        GROUP BY t1_sorted.key
+    "#,
+];
+
+impl RunOpt {
+    pub async fn run(self) -> Result<()> {
+        println!("Running SMJ benchmarks with the following options: {self:#?}\n");
+
+        // Define query range
+        let query_range = match self.query {
+            Some(query_id) => {
+                if query_id >= 1 && query_id <= SMJ_QUERIES.len() {
+                    query_id..=query_id
+                } else {
+                    return exec_err!(
+                        "Query {query_id} not found. Available queries: 1 to {}",
+                        SMJ_QUERIES.len()
+                    );
+                }
+            }
+            None => 1..=SMJ_QUERIES.len(),
+        };
+
+        let mut config = self.common.config()?;
+        // Disable hash joins to force SMJ
+        config = config.set_bool("datafusion.optimizer.prefer_hash_join", false);
+        let rt_builder = self.common.runtime_env_builder()?;
+        let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?);
+
+        let mut benchmark_run = BenchmarkRun::new();
+        for query_id in query_range {
+            let query_index = query_id - 1; // Convert 1-based to 0-based index
+
+            let sql = SMJ_QUERIES[query_index];
+            benchmark_run.start_new_case(&format!("Query {query_id}"));
+            let query_run = self.benchmark_query(sql, &query_id.to_string(), &ctx).await;
+            match query_run {
+                Ok(query_results) => {
+                    for iter in query_results {
+                        benchmark_run.write_iter(iter.elapsed, iter.row_count);
+                    }
+                }
+                Err(e) => {
+                    return Err(DataFusionError::Context(
+                        format!("SMJ benchmark Q{query_id} failed with error:"),
+                        Box::new(e),
+                    ));
+                }
+            }
+        }
+
+        benchmark_run.maybe_write_json(self.output_path.as_ref())?;
+        Ok(())
+    }
+
+    async fn benchmark_query(
+        &self,
+        sql: &str,
+        query_name: &str,
+        ctx: &SessionContext,
+    ) -> Result<Vec<QueryResult>> {
+        let mut query_results = vec![];
+
+        // Validate that the query plan includes a Sort Merge Join
+        let df = ctx.sql(sql).await?;
+        let physical_plan = df.create_physical_plan().await?;
+        let plan_string = format!("{physical_plan:#?}");
+
+        if !plan_string.contains("SortMergeJoinExec") {
+            return Err(exec_datafusion_err!(
+                "Query {query_name} does not use Sort Merge Join. Physical plan: {plan_string}"
+            ));
+        }
+
+        for i in 0..self.common.iterations {
+            let start = Instant::now();
+
+            let row_count = Self::execute_sql_without_result_buffering(sql, ctx).await?;
+
+            let elapsed = start.elapsed();
+
+            println!(
+                "Query {query_name} iteration {i} returned {row_count} rows in {elapsed:?}"
+            );
+
+            query_results.push(QueryResult { elapsed, row_count });
+        }
+
+        Ok(query_results)
+    }
+
+    /// Executes the SQL query and drops each result batch after evaluation, to
+    /// minimizes memory usage by not buffering results.
+    ///
+    /// Returns the total result row count
+    async fn execute_sql_without_result_buffering(
+        sql: &str,
+        ctx: &SessionContext,
+    ) -> Result<usize> {
+        let mut row_count = 0;
+
+        let df = ctx.sql(sql).await?;
+        let physical_plan = df.create_physical_plan().await?;
+        let mut stream = execute_stream(physical_plan, ctx.task_ctx())?;
+
+        while let Some(batch) = stream.next().await {
+            row_count += batch?.num_rows();
+
+            // Evaluate the result and do nothing, the result will be dropped
+            // to reduce memory pressure
+        }
+
+        Ok(row_count)
+    }
+}
diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs
index 09b5a676bbff1..2f3be76f050b9 100644
--- a/benchmarks/src/sort_tpch.rs
+++ b/benchmarks/src/sort_tpch.rs
@@ -36,11 +36,11 @@ use datafusion::execution::SessionStateBuilder;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{displayable, execute_stream};
 use datafusion::prelude::*;
+use datafusion_common::DEFAULT_PARQUET_EXTENSION;
 use datafusion_common::instant::Instant;
 use datafusion_common::utils::get_available_parallelism;
-use datafusion_common::DEFAULT_PARQUET_EXTENSION;
 
-use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt, QueryResult};
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
 
 #[derive(Debug, StructOpt)]
 pub struct RunOpt {
diff --git a/datafusion/core/tests/schema_adapter/mod.rs b/benchmarks/src/tpcds/mod.rs
similarity index 95%
rename from datafusion/core/tests/schema_adapter/mod.rs
rename to benchmarks/src/tpcds/mod.rs
index 2f81a43f4736e..4829eb9fd348a 100644
--- a/datafusion/core/tests/schema_adapter/mod.rs
+++ b/benchmarks/src/tpcds/mod.rs
@@ -15,4 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-mod schema_adapter_integration_tests;
+mod run;
+pub use run::RunOpt;
diff --git a/benchmarks/src/tpcds/run.rs b/benchmarks/src/tpcds/run.rs
new file mode 100644
index 0000000000000..3f579024ba519
--- /dev/null
+++ b/benchmarks/src/tpcds/run.rs
@@ -0,0 +1,356 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fs;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
+
+use arrow::record_batch::RecordBatch;
+use arrow::util::pretty::{self, pretty_format_batches};
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::listing::{
+    ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+};
+use datafusion::datasource::{MemTable, TableProvider};
+use datafusion::error::Result;
+use datafusion::physical_plan::display::DisplayableExecutionPlan;
+use datafusion::physical_plan::{collect, displayable};
+use datafusion::prelude::*;
+use datafusion_common::instant::Instant;
+use datafusion_common::utils::get_available_parallelism;
+use datafusion_common::{DEFAULT_PARQUET_EXTENSION, plan_err};
+
+use log::info;
+use structopt::StructOpt;
+
+// hack to avoid `default_value is meaningless for bool` errors
+type BoolDefaultTrue = bool;
+pub const TPCDS_QUERY_START_ID: usize = 1;
+pub const TPCDS_QUERY_END_ID: usize = 99;
+
+pub const TPCDS_TABLES: &[&str] = &[
+    "call_center",
+    "customer_address",
+    "household_demographics",
+    "promotion",
+    "store_sales",
+    "web_page",
+    "catalog_page",
+    "customer_demographics",
+    "income_band",
+    "reason",
+    "store",
+    "web_returns",
+    "catalog_returns",
+    "customer",
+    "inventory",
+    "ship_mode",
+    "time_dim",
+    "web_sales",
+    "catalog_sales",
+    "date_dim",
+    "item",
+    "store_returns",
+    "warehouse",
+    "web_site",
+];
+
+/// Get the SQL statements from the specified query file
+pub fn get_query_sql(base_query_path: &str, query: usize) -> Result<Vec<String>> {
+    if query > 0 && query < 100 {
+        let filename = format!("{base_query_path}/{query}.sql");
+        let mut errors = vec![];
+        match fs::read_to_string(&filename) {
+            Ok(contents) => {
+                return Ok(contents
+                    .split(';')
+                    .map(|s| s.trim())
+                    .filter(|s| !s.is_empty())
+                    .map(|s| s.to_string())
+                    .collect());
+            }
+            Err(e) => errors.push(format!("{filename}: {e}")),
+        };
+
+        plan_err!("invalid query. Could not find query: {:?}", errors)
+    } else {
+        plan_err!("invalid query. Expected value between 1 and 99")
+    }
+}
+
+/// Run the tpcds benchmark.
+#[derive(Debug, StructOpt, Clone)]
+#[structopt(verbatim_doc_comment)]
+pub struct RunOpt {
+    /// Query number. If not specified, runs all queries
+    #[structopt(short, long)]
+    pub query: Option<usize>,
+
+    /// Common options
+    #[structopt(flatten)]
+    common: CommonOpt,
+
+    /// Path to data files
+    #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
+    path: PathBuf,
+
+    /// Path to query files
+    #[structopt(parse(from_os_str), required = true, short = "Q", long = "query_path")]
+    query_path: PathBuf,
+
+    /// Load the data into a MemTable before executing the query
+    #[structopt(short = "m", long = "mem-table")]
+    mem_table: bool,
+
+    /// Path to machine readable output file
+    #[structopt(parse(from_os_str), short = "o", long = "output")]
+    output_path: Option<PathBuf>,
+
+    /// Whether to disable collection of statistics (and cost based optimizations) or not.
+    #[structopt(short = "S", long = "disable-statistics")]
+    disable_statistics: bool,
+
+    /// If true then hash join used, if false then sort merge join
+    /// True by default.
+    #[structopt(short = "j", long = "prefer_hash_join", default_value = "true")]
+    prefer_hash_join: BoolDefaultTrue,
+
+    /// If true then Piecewise Merge Join can be used, if false then it will opt for Nested Loop Join
+    /// False by default.
+    #[structopt(
+        short = "w",
+        long = "enable_piecewise_merge_join",
+        default_value = "false"
+    )]
+    enable_piecewise_merge_join: BoolDefaultTrue,
+
+    /// Mark the first column of each table as sorted in ascending order.
+    /// The tables should have been created with the `--sort` option for this to have any effect.
+    #[structopt(short = "t", long = "sorted")]
+    sorted: bool,
+}
+
+impl RunOpt {
+    pub async fn run(self) -> Result<()> {
+        println!("Running benchmarks with the following options: {self:?}");
+        let query_range = match self.query {
+            Some(query_id) => query_id..=query_id,
+            None => TPCDS_QUERY_START_ID..=TPCDS_QUERY_END_ID,
+        };
+
+        let mut benchmark_run = BenchmarkRun::new();
+        let mut config = self
+            .common
+            .config()?
+            .with_collect_statistics(!self.disable_statistics);
+        config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
+        config.options_mut().optimizer.enable_piecewise_merge_join =
+            self.enable_piecewise_merge_join;
+        let rt_builder = self.common.runtime_env_builder()?;
+        let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?);
+        // register tables
+        self.register_tables(&ctx).await?;
+
+        for query_id in query_range {
+            benchmark_run.start_new_case(&format!("Query {query_id}"));
+            let query_run = self.benchmark_query(query_id, &ctx).await;
+            match query_run {
+                Ok(query_results) => {
+                    for iter in query_results {
+                        benchmark_run.write_iter(iter.elapsed, iter.row_count);
+                    }
+                }
+                Err(e) => {
+                    benchmark_run.mark_failed();
+                    eprintln!("Query {query_id} failed: {e}");
+                }
+            }
+        }
+        benchmark_run.maybe_write_json(self.output_path.as_ref())?;
+        benchmark_run.maybe_print_failures();
+        Ok(())
+    }
+
+    async fn benchmark_query(
+        &self,
+        query_id: usize,
+        ctx: &SessionContext,
+    ) -> Result<Vec<QueryResult>> {
+        let mut millis = vec![];
+        // run benchmark
+        let mut query_results = vec![];
+
+        let sql = &get_query_sql(self.query_path.to_str().unwrap(), query_id)?;
+
+        if self.common.debug {
+            println!("=== SQL for query {query_id} ===\n{}\n", sql.join(";\n"));
+        }
+
+        for i in 0..self.iterations() {
+            let start = Instant::now();
+
+            // query 15 is special, with 3 statements. the second statement is the one from which we
+            // want to capture the results
+            let mut result = vec![];
+
+            for query in sql {
+                result = self.execute_query(ctx, query).await?;
+            }
+
+            let elapsed = start.elapsed();
+            let ms = elapsed.as_secs_f64() * 1000.0;
+            millis.push(ms);
+            info!("output:\n\n{}\n\n", pretty_format_batches(&result)?);
+            let row_count = result.iter().map(|b| b.num_rows()).sum();
+            println!(
+                "Query {query_id} iteration {i} took {ms:.1} ms and returned {row_count} rows"
+            );
+            query_results.push(QueryResult { elapsed, row_count });
+        }
+
+        let avg = millis.iter().sum::<f64>() / millis.len() as f64;
+        println!("Query {query_id} avg time: {avg:.2} ms");
+
+        // Print memory stats using mimalloc (only when compiled with --features mimalloc_extended)
+        print_memory_stats();
+
+        Ok(query_results)
+    }
+
+    async fn register_tables(&self, ctx: &SessionContext) -> Result<()> {
+        for table in TPCDS_TABLES {
+            let table_provider = { self.get_table(ctx, table).await? };
+
+            if self.mem_table {
+                println!("Loading table '{table}' into memory");
+                let start = Instant::now();
+                let memtable =
+                    MemTable::load(table_provider, Some(self.partitions()), &ctx.state())
+                        .await?;
+                println!(
+                    "Loaded table '{}' into memory in {} ms",
+                    table,
+                    start.elapsed().as_millis()
+                );
+                ctx.register_table(*table, Arc::new(memtable))?;
+            } else {
+                ctx.register_table(*table, table_provider)?;
+            }
+        }
+        Ok(())
+    }
+
+    async fn execute_query(
+        &self,
+        ctx: &SessionContext,
+        sql: &str,
+    ) -> Result<Vec<RecordBatch>> {
+        let debug = self.common.debug;
+        let plan = ctx.sql(sql).await?;
+        let (state, plan) = plan.into_parts();
+
+        if debug {
+            println!("=== Logical plan ===\n{plan}\n");
+        }
+
+        let plan = state.optimize(&plan)?;
+        if debug {
+            println!("=== Optimized logical plan ===\n{plan}\n");
+        }
+        let physical_plan = state.create_physical_plan(&plan).await?;
+        if debug {
+            println!(
+                "=== Physical plan ===\n{}\n",
+                displayable(physical_plan.as_ref()).indent(true)
+            );
+        }
+        let result = collect(physical_plan.clone(), state.task_ctx()).await?;
+        if debug {
+            println!(
+                "=== Physical plan with metrics ===\n{}\n",
+                DisplayableExecutionPlan::with_metrics(physical_plan.as_ref())
+                    .indent(true)
+            );
+            if !result.is_empty() {
+                // do not call print_batches if there are no batches as the result is confusing
+                // and makes it look like there is a batch with no columns
+                pretty::print_batches(&result)?;
+            }
+        }
+        Ok(result)
+    }
+
+    async fn get_table(
+        &self,
+        ctx: &SessionContext,
+        table: &str,
+    ) -> Result<Arc<dyn TableProvider>> {
+        let path = self.path.to_str().unwrap();
+        let target_partitions = self.partitions();
+
+        // Obtain a snapshot of the SessionState
+        let state = ctx.state();
+        let path = format!("{path}/{table}.parquet");
+
+        // Check if the file exists
+        if !std::path::Path::new(&path).exists() {
+            eprintln!("Warning registering {table}: Table file does not exist: {path}");
+        }
+
+        let format = ParquetFormat::default()
+            .with_options(ctx.state().table_options().parquet.clone());
+
+        let table_path = ListingTableUrl::parse(path)?;
+        let options = ListingOptions::new(Arc::new(format))
+            .with_file_extension(DEFAULT_PARQUET_EXTENSION)
+            .with_target_partitions(target_partitions)
+            .with_collect_stat(state.config().collect_statistics());
+        let schema = options.infer_schema(&state, &table_path).await?;
+
+        if self.common.debug {
+            println!(
+                "Inferred schema from {table_path} for table '{table}':\n{schema:#?}\n"
+            );
+        }
+
+        let options = if self.sorted {
+            let key_column_name = schema.fields()[0].name();
+            options
+                .with_file_sort_order(vec![vec![col(key_column_name).sort(true, false)]])
+        } else {
+            options
+        };
+
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(options)
+            .with_schema(schema);
+
+        Ok(Arc::new(ListingTable::try_new(config)?))
+    }
+
+    fn iterations(&self) -> usize {
+        self.common.iterations
+    }
+
+    fn partitions(&self) -> usize {
+        self.common
+            .partitions
+            .unwrap_or_else(get_available_parallelism)
+    }
+}
diff --git a/benchmarks/src/tpch/convert.rs b/benchmarks/src/tpch/convert.rs
deleted file mode 100644
index 5219e09cd3052..0000000000000
--- a/benchmarks/src/tpch/convert.rs
+++ /dev/null
@@ -1,162 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::logical_expr::select_expr::SelectExpr;
-use datafusion_common::instant::Instant;
-use std::fs;
-use std::path::{Path, PathBuf};
-
-use datafusion::common::not_impl_err;
-
-use super::get_tbl_tpch_table_schema;
-use super::TPCH_TABLES;
-use datafusion::error::Result;
-use datafusion::prelude::*;
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
-use structopt::StructOpt;
-
-/// Convert tpch .slt files to .parquet or .csv files
-#[derive(Debug, StructOpt)]
-pub struct ConvertOpt {
-    /// Path to csv files
-    #[structopt(parse(from_os_str), required = true, short = "i", long = "input")]
-    input_path: PathBuf,
-
-    /// Output path
-    #[structopt(parse(from_os_str), required = true, short = "o", long = "output")]
-    output_path: PathBuf,
-
-    /// Output file format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format")]
-    file_format: String,
-
-    /// Compression to use when writing Parquet files
-    #[structopt(short = "c", long = "compression", default_value = "zstd")]
-    compression: String,
-
-    /// Number of partitions to produce
-    #[structopt(short = "n", long = "partitions", default_value = "1")]
-    partitions: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-
-    /// Sort each table by its first column in ascending order.
-    #[structopt(short = "t", long = "sort")]
-    sort: bool,
-}
-
-impl ConvertOpt {
-    pub async fn run(self) -> Result<()> {
-        let compression = self.compression()?;
-
-        let input_path = self.input_path.to_str().unwrap();
-        let output_path = self.output_path.to_str().unwrap();
-
-        let output_root_path = Path::new(output_path);
-        for table in TPCH_TABLES {
-            let start = Instant::now();
-            let schema = get_tbl_tpch_table_schema(table);
-            let key_column_name = schema.fields()[0].name();
-
-            let input_path = format!("{input_path}/{table}.tbl");
-            let options = CsvReadOptions::new()
-                .schema(&schema)
-                .has_header(false)
-                .delimiter(b'|')
-                .file_extension(".tbl");
-            let options = if self.sort {
-                // indicated that the file is already sorted by its first column to speed up the conversion
-                options
-                    .file_sort_order(vec![vec![col(key_column_name).sort(true, false)]])
-            } else {
-                options
-            };
-
-            let config = SessionConfig::new().with_batch_size(self.batch_size);
-            let ctx = SessionContext::new_with_config(config);
-
-            // build plan to read the TBL file
-            let mut csv = ctx.read_csv(&input_path, options).await?;
-
-            // Select all apart from the padding column
-            let selection = csv
-                .schema()
-                .iter()
-                .take(schema.fields.len() - 1)
-                .map(Expr::from)
-                .map(SelectExpr::from)
-                .collect::<Vec<_>>();
-
-            csv = csv.select(selection)?;
-            // optionally, repartition the file
-            let partitions = self.partitions;
-            if partitions > 1 {
-                csv = csv.repartition(Partitioning::RoundRobinBatch(partitions))?
-            }
-            let csv = if self.sort {
-                csv.sort_by(vec![col(key_column_name)])?
-            } else {
-                csv
-            };
-
-            // create the physical plan
-            let csv = csv.create_physical_plan().await?;
-
-            let output_path = output_root_path.join(table);
-            let output_path = output_path.to_str().unwrap().to_owned();
-            fs::create_dir_all(&output_path)?;
-            println!(
-                "Converting '{}' to {} files in directory '{}'",
-                &input_path, self.file_format, &output_path
-            );
-            match self.file_format.as_str() {
-                "csv" => ctx.write_csv(csv, output_path).await?,
-                "parquet" => {
-                    let props = WriterProperties::builder()
-                        .set_compression(compression)
-                        .build();
-                    ctx.write_parquet(csv, output_path, Some(props)).await?
-                }
-                other => {
-                    return not_impl_err!("Invalid output format: {other}");
-                }
-            }
-            println!("Conversion completed in {} ms", start.elapsed().as_millis());
-        }
-
-        Ok(())
-    }
-
-    /// return the compression method to use when writing parquet
-    fn compression(&self) -> Result<Compression> {
-        Ok(match self.compression.as_str() {
-            "none" => Compression::UNCOMPRESSED,
-            "snappy" => Compression::SNAPPY,
-            "brotli" => Compression::BROTLI(Default::default()),
-            "gzip" => Compression::GZIP(Default::default()),
-            "lz4" => Compression::LZ4,
-            "lz0" => Compression::LZO,
-            "zstd" => Compression::ZSTD(Default::default()),
-            other => {
-                return not_impl_err!("Invalid compression format: {other}");
-            }
-        })
-    }
-}
diff --git a/benchmarks/src/tpch/mod.rs b/benchmarks/src/tpch/mod.rs
index 233ea94a05c1a..681aa0a403ee1 100644
--- a/benchmarks/src/tpch/mod.rs
+++ b/benchmarks/src/tpch/mod.rs
@@ -27,9 +27,6 @@ use std::fs;
 mod run;
 pub use run::RunOpt;
 
-mod convert;
-pub use convert::ConvertOpt;
-
 pub const TPCH_TABLES: &[&str] = &[
     "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
 ];
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index cc59b78030360..65bb9594f00a6 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -19,16 +19,16 @@ use std::path::PathBuf;
 use std::sync::Arc;
 
 use super::{
-    get_query_sql, get_tbl_tpch_table_schema, get_tpch_table_schema, TPCH_QUERY_END_ID,
-    TPCH_QUERY_START_ID, TPCH_TABLES,
+    TPCH_QUERY_END_ID, TPCH_QUERY_START_ID, TPCH_TABLES, get_query_sql,
+    get_tbl_tpch_table_schema, get_tpch_table_schema,
 };
-use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt, QueryResult};
+use crate::util::{BenchmarkRun, CommonOpt, QueryResult, print_memory_stats};
 
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
+use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::file_format::csv::CsvFormat;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
-use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
@@ -93,9 +93,9 @@ pub struct RunOpt {
     prefer_hash_join: BoolDefaultTrue,
 
     /// If true then Piecewise Merge Join can be used, if false then it will opt for Nested Loop Join
-    /// True by default.
+    /// False by default.
     #[structopt(
-        short = "j",
+        short = "w",
         long = "enable_piecewise_merge_join",
         default_value = "false"
     )]
diff --git a/benchmarks/src/util/memory.rs b/benchmarks/src/util/memory.rs
index 944239df31cfd..11b96ef227756 100644
--- a/benchmarks/src/util/memory.rs
+++ b/benchmarks/src/util/memory.rs
@@ -19,7 +19,7 @@
 pub fn print_memory_stats() {
     #[cfg(all(feature = "mimalloc", feature = "mimalloc_extended"))]
     {
-        use datafusion::execution::memory_pool::human_readable_size;
+        use datafusion_common::human_readable_size;
         let mut peak_rss = 0;
         let mut peak_commit = 0;
         let mut page_faults = 0;
diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index 6627a287dfcd4..b1d5bc99fb406 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -105,7 +105,7 @@ impl CommonOpt {
                     return Err(DataFusionError::Configuration(format!(
                         "Invalid memory pool type: {}",
                         self.mem_pool_type
-                    )))
+                    )));
                 }
             };
             rt_builder = rt_builder
diff --git a/benchmarks/src/util/run.rs b/benchmarks/src/util/run.rs
index 764ea648ff725..df17674e62961 100644
--- a/benchmarks/src/util/run.rs
+++ b/benchmarks/src/util/run.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::{error::Result, DATAFUSION_VERSION};
+use datafusion::{DATAFUSION_VERSION, error::Result};
 use datafusion_common::utils::get_available_parallelism;
 use serde::{Serialize, Serializer};
 use serde_json::Value;
diff --git a/ci/scripts/check_examples_docs.sh b/ci/scripts/check_examples_docs.sh
new file mode 100755
index 0000000000000..37b0cc088df4c
--- /dev/null
+++ b/ci/scripts/check_examples_docs.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+EXAMPLES_DIR="datafusion-examples/examples"
+README="datafusion-examples/README.md"
+
+# ffi examples are skipped because they were not part of the recent example
+# consolidation work and do not follow the new grouping and execution pattern.
+# They are not documented in the README using the new structure, so including
+# them here would cause false CI failures.
+SKIP_LIST=("ffi")
+
+missing=0
+
+skip() {
+    local value="$1"
+    for item in "${SKIP_LIST[@]}"; do
+        if [[ "$item" == "$value" ]]; then
+            return 0
+        fi
+    done
+    return 1
+}
+
+# collect folder names
+folders=$(find "$EXAMPLES_DIR" -mindepth 1 -maxdepth 1 -type d -exec basename {} \;)
+
+# collect group names from README headers
+groups=$(grep "^### Group:" "$README" | sed -E 's/^### Group: `([^`]+)`.*/\1/')
+
+for folder in $folders; do
+    if skip "$folder"; then
+        echo "Skipped group: $folder"
+        continue
+    fi
+
+    if ! echo "$groups" | grep -qx "$folder"; then
+        echo "Missing README entry for example group: $folder"
+        missing=1
+    fi
+done
+
+if [[ $missing -eq 1 ]]; then
+    echo "README is out of sync with examples"
+    exit 1
+fi
diff --git a/ci/scripts/doc_prettier_check.sh b/ci/scripts/doc_prettier_check.sh
new file mode 100755
index 0000000000000..d94a0d1c96171
--- /dev/null
+++ b/ci/scripts/doc_prettier_check.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
+
+MODE="--check"
+ACTION="Checking"
+if [ $# -gt 0 ]; then
+  if [ "$1" = "--write" ]; then
+    MODE="--write"
+    ACTION="Formatting"
+  else
+    echo "Usage: $0 [--write]" >&2
+    exit 1
+  fi
+fi
+
+echo "$SCRIPT_PATH: $ACTION documents with prettier"
+
+# Ensure `npx` is available
+if ! command -v npx >/dev/null 2>&1; then
+  echo "npx is required to run the prettier check. Install Node.js (e.g., brew install node) and re-run." >&2
+  exit 1
+fi
+ 
+# Ignore subproject CHANGELOG.md because it is machine generated
+npx prettier@2.7.1 $MODE \
+  '{datafusion,datafusion-cli,datafusion-examples,dev,docs}/**/*.md' \
+  '!datafusion/CHANGELOG.md' \
+  README.md \
+  CONTRIBUTING.md
+status=$?
+
+if [ $status -ne 0 ]; then
+  if [ "$MODE" = "--check" ]; then
+    echo "Prettier check failed. Re-run with --write (e.g., ./ci/scripts/doc_prettier_check.sh --write) to format files, commit the changes, and re-run the check." >&2
+  else
+    echo "Prettier format failed. Files may have been modified; commit any changes and re-run." >&2
+  fi
+  exit $status
+fi
diff --git a/ci/scripts/rust_clippy.sh b/ci/scripts/rust_clippy.sh
index 6a00ad8109561..aa994bc2b8c8a 100755
--- a/ci/scripts/rust_clippy.sh
+++ b/ci/scripts/rust_clippy.sh
@@ -18,4 +18,4 @@
 # under the License.
 
 set -ex
-cargo clippy --all-targets --workspace --features avro,pyarrow,integration-tests,extended_tests -- -D warnings
\ No newline at end of file
+cargo clippy --all-targets --workspace --features avro,integration-tests,extended_tests -- -D warnings
diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh
index c3efcf2cf2e92..7a5f7825b4e6d 100755
--- a/ci/scripts/rust_example.sh
+++ b/ci/scripts/rust_example.sh
@@ -25,12 +25,26 @@ export CARGO_PROFILE_CI_STRIP=true
 cd datafusion-examples/examples/
 cargo build --profile ci --examples
 
-files=$(ls .)
-for filename in $files
-do
-  example_name=`basename $filename ".rs"`
-  # Skip tests that rely on external storage and flight
-  if [ ! -d $filename ]; then
-    cargo run --profile ci --example $example_name
-  fi
+SKIP_LIST=("external_dependency" "flight" "ffi")
+
+skip_example() {
+    local name="$1"
+    for skip in "${SKIP_LIST[@]}"; do
+        if [ "$name" = "$skip" ]; then
+            return 0
+        fi
+    done
+    return 1
+}
+
+for dir in */; do
+    example_name=$(basename "$dir")
+
+    if skip_example "$example_name"; then
+        echo "Skipping $example_name"
+        continue
+    fi
+
+    echo "Running example group: $example_name"
+    cargo run --profile ci --example "$example_name" -- all
 done
diff --git a/ci/scripts/typos_check.sh b/ci/scripts/typos_check.sh
new file mode 100755
index 0000000000000..a3a4a893213f7
--- /dev/null
+++ b/ci/scripts/typos_check.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+# To use this script, you must have installed `typos`, for example:
+# cargo install typos-cli --locked --version 1.37.0
+typos --config typos.toml
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index f3069b492352d..67cb10081ca47 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -37,10 +37,10 @@ backtrace = ["datafusion/backtrace"]
 [dependencies]
 arrow = { workspace = true }
 async-trait = { workspace = true }
-aws-config = "1.8.7"
+aws-config = "1.8.12"
 aws-credential-types = "1.2.7"
 chrono = { workspace = true }
-clap = { version = "4.5.50", features = ["cargo", "derive"] }
+clap = { version = "4.5.53", features = ["cargo", "derive"] }
 datafusion = { workspace = true, features = [
     "avro",
     "compression",
diff --git a/datafusion-cli/examples/cli-session-context.rs b/datafusion-cli/examples/cli-session-context.rs
index bd2dbb736781f..6095072163870 100644
--- a/datafusion-cli/examples/cli-session-context.rs
+++ b/datafusion-cli/examples/cli-session-context.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use datafusion::{
     dataframe::DataFrame,
     error::DataFusionError,
-    execution::{context::SessionState, TaskContext},
+    execution::{TaskContext, context::SessionState},
     logical_expr::{LogicalPlan, LogicalPlanBuilder},
     prelude::SessionContext,
 };
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index 20d62eabc3901..63b055388fdbe 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -18,13 +18,13 @@
 use std::any::Any;
 use std::sync::{Arc, Weak};
 
-use crate::object_storage::{get_object_store, AwsOptions, GcpOptions};
+use crate::object_storage::{AwsOptions, GcpOptions, get_object_store};
 
 use datafusion::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider};
 
 use datafusion::common::plan_datafusion_err;
-use datafusion::datasource::listing::ListingTableUrl;
 use datafusion::datasource::TableProvider;
+use datafusion::datasource::listing::ListingTableUrl;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::session_state::SessionStateBuilder;
@@ -152,10 +152,10 @@ impl SchemaProvider for DynamicObjectStoreSchemaProvider {
 
     async fn table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
         let inner_table = self.inner.table(name).await;
-        if inner_table.is_ok() {
-            if let Some(inner_table) = inner_table? {
-                return Ok(Some(inner_table));
-            }
+        if inner_table.is_ok()
+            && let Some(inner_table) = inner_table?
+        {
+            return Ok(Some(inner_table));
         }
 
         // if the inner schema provider didn't have a table by
@@ -219,12 +219,12 @@ impl SchemaProvider for DynamicObjectStoreSchemaProvider {
 }
 
 pub fn substitute_tilde(cur: String) -> String {
-    if let Some(usr_dir_path) = home_dir() {
-        if let Some(usr_dir) = usr_dir_path.to_str() {
-            if cur.starts_with('~') && !usr_dir.is_empty() {
-                return cur.replacen('~', usr_dir, 1);
-            }
-        }
+    if let Some(usr_dir_path) = home_dir()
+        && let Some(usr_dir) = usr_dir_path.to_str()
+        && cur.starts_with('~')
+        && !usr_dir.is_empty()
+    {
+        return cur.replacen('~', usr_dir, 1);
     }
     cur
 }
@@ -359,10 +359,12 @@ mod tests {
         } else {
             "/home/user"
         };
-        env::set_var(
-            if cfg!(windows) { "USERPROFILE" } else { "HOME" },
-            test_home_path,
-        );
+        unsafe {
+            env::set_var(
+                if cfg!(windows) { "USERPROFILE" } else { "HOME" },
+                test_home_path,
+            );
+        }
         let input = "~/Code/datafusion/benchmarks/data/tpch_sf1/part/part-0.parquet";
         let expected = PathBuf::from(test_home_path)
             .join("Code")
@@ -376,12 +378,16 @@ mod tests {
             .to_string();
         let actual = substitute_tilde(input.to_string());
         assert_eq!(actual, expected);
-        match original_home {
-            Some(home_path) => env::set_var(
-                if cfg!(windows) { "USERPROFILE" } else { "HOME" },
-                home_path.to_str().unwrap(),
-            ),
-            None => env::remove_var(if cfg!(windows) { "USERPROFILE" } else { "HOME" }),
+        unsafe {
+            match original_home {
+                Some(home_path) => env::set_var(
+                    if cfg!(windows) { "USERPROFILE" } else { "HOME" },
+                    home_path.to_str().unwrap(),
+                ),
+                None => {
+                    env::remove_var(if cfg!(windows) { "USERPROFILE" } else { "HOME" })
+                }
+            }
         }
     }
 }
diff --git a/datafusion-cli/src/cli_context.rs b/datafusion-cli/src/cli_context.rs
index 516929ebacf19..a6320f03fe4de 100644
--- a/datafusion-cli/src/cli_context.rs
+++ b/datafusion-cli/src/cli_context.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 use datafusion::{
     dataframe::DataFrame,
     error::DataFusionError,
-    execution::{context::SessionState, TaskContext},
+    execution::{TaskContext, context::SessionState},
     logical_expr::LogicalPlan,
     prelude::SessionContext,
 };
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index 3fbfe5680cfcd..8aaa8025d1c3a 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -19,7 +19,7 @@
 
 use crate::cli_context::CliSessionContext;
 use crate::exec::{exec_and_print, exec_from_lines};
-use crate::functions::{display_all_functions, Function};
+use crate::functions::{Function, display_all_functions};
 use crate::print_format::PrintFormat;
 use crate::print_options::PrintOptions;
 use clap::ValueEnum;
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index d079a88a6440e..2b8385ac2d89c 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -35,19 +35,19 @@ use datafusion::execution::memory_pool::MemoryConsumer;
 use datafusion::logical_expr::{DdlStatement, LogicalPlan};
 use datafusion::physical_plan::execution_plan::EmissionType;
 use datafusion::physical_plan::spill::get_record_batch_memory_size;
-use datafusion::physical_plan::{execute_stream, ExecutionPlanProperties};
+use datafusion::physical_plan::{ExecutionPlanProperties, execute_stream};
 use datafusion::sql::parser::{DFParser, Statement};
 use datafusion::sql::sqlparser;
 use datafusion::sql::sqlparser::dialect::dialect_from_str;
 use futures::StreamExt;
 use log::warn;
 use object_store::Error::Generic;
-use rustyline::error::ReadlineError;
 use rustyline::Editor;
+use rustyline::error::ReadlineError;
 use std::collections::HashMap;
 use std::fs::File;
-use std::io::prelude::*;
 use std::io::BufReader;
+use std::io::prelude::*;
 use tokio::signal;
 
 /// run and execute SQL statements and commands, against a context with the given print options
@@ -153,7 +153,7 @@ pub async fn exec_from_repl(
                                     }
                                 } else {
                                     eprintln!(
-                                        "'\\{}' is not a valid command",
+                                        "'\\{}' is not a valid command, you can use '\\?' to see all commands",
                                         &line[1..]
                                     );
                                 }
@@ -168,7 +168,10 @@ pub async fn exec_from_repl(
                         }
                     }
                 } else {
-                    eprintln!("'\\{}' is not a valid command", &line[1..]);
+                    eprintln!(
+                        "'\\{}' is not a valid command, you can use '\\?' to see all commands",
+                        &line[1..]
+                    );
                 }
             }
             Ok(line) => {
@@ -334,7 +337,9 @@ impl StatementExecutor {
                 if matches!(err.as_ref(), Generic { store, source: _ } if "S3".eq_ignore_ascii_case(store))
                     && self.statement_for_retry.is_some() =>
             {
-                warn!("S3 region is incorrect, auto-detecting the correct region (this may be slow). Consider updating your region configuration.");
+                warn!(
+                    "S3 region is incorrect, auto-detecting the correct region (this may be slow). Consider updating your region configuration."
+                );
                 let plan =
                     create_plan(ctx, self.statement_for_retry.take().unwrap(), true)
                         .await?;
@@ -699,8 +704,7 @@ mod tests {
     #[tokio::test]
     async fn create_object_store_table_gcs() -> Result<()> {
         let service_account_path = "fake_service_account_path";
-        let service_account_key =
-            "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\", \"private_key_id\":\"id\"}";
+        let service_account_key = "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\", \"private_key_id\":\"id\"}";
         let application_credentials_path = "fake_application_credentials_path";
         let location = "gcs://bucket/path/file.parquet";
 
@@ -713,7 +717,9 @@ mod tests {
         assert!(err.to_string().contains("os error 2"));
 
         // for service_account_key
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_key' '{service_account_key}') LOCATION '{location}'");
+        let sql = format!(
+            "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_key' '{service_account_key}') LOCATION '{location}'"
+        );
         let err = create_external_table_test(location, &sql)
             .await
             .unwrap_err()
@@ -748,8 +754,9 @@ mod tests {
         let location = "path/to/file.cvs";
 
         // Test with format options
-        let sql =
-            format!("CREATE EXTERNAL TABLE test STORED AS CSV LOCATION '{location}' OPTIONS('format.has_header' 'true')");
+        let sql = format!(
+            "CREATE EXTERNAL TABLE test STORED AS CSV LOCATION '{location}' OPTIONS('format.has_header' 'true')"
+        );
         create_external_table_test(location, &sql).await.unwrap();
 
         Ok(())
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index d23b12469e385..a45d57e8e952d 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -27,9 +27,9 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
 use datafusion::catalog::{Session, TableFunctionImpl};
-use datafusion::common::{plan_err, Column};
-use datafusion::datasource::memory::MemorySourceConfig;
+use datafusion::common::{Column, plan_err};
 use datafusion::datasource::TableProvider;
+use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::error::Result;
 use datafusion::execution::cache::cache_manager::CacheManager;
 use datafusion::logical_expr::Expr;
@@ -581,3 +581,119 @@ impl TableFunctionImpl for MetadataCacheFunc {
         Ok(Arc::new(metadata_cache))
     }
 }
+
+/// STATISTICS_CACHE table function
+#[derive(Debug)]
+struct StatisticsCacheTable {
+    schema: SchemaRef,
+    batch: RecordBatch,
+}
+
+#[async_trait]
+impl TableProvider for StatisticsCacheTable {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn schema(&self) -> arrow::datatypes::SchemaRef {
+        self.schema.clone()
+    }
+
+    fn table_type(&self) -> datafusion::logical_expr::TableType {
+        datafusion::logical_expr::TableType::Base
+    }
+
+    async fn scan(
+        &self,
+        _state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        _filters: &[Expr],
+        _limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(MemorySourceConfig::try_new_exec(
+            &[vec![self.batch.clone()]],
+            TableProvider::schema(self),
+            projection.cloned(),
+        )?)
+    }
+}
+
+#[derive(Debug)]
+pub struct StatisticsCacheFunc {
+    cache_manager: Arc<CacheManager>,
+}
+
+impl StatisticsCacheFunc {
+    pub fn new(cache_manager: Arc<CacheManager>) -> Self {
+        Self { cache_manager }
+    }
+}
+
+impl TableFunctionImpl for StatisticsCacheFunc {
+    fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
+        if !exprs.is_empty() {
+            return plan_err!("statistics_cache should have no arguments");
+        }
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("path", DataType::Utf8, false),
+            Field::new(
+                "file_modified",
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+                false,
+            ),
+            Field::new("file_size_bytes", DataType::UInt64, false),
+            Field::new("e_tag", DataType::Utf8, true),
+            Field::new("version", DataType::Utf8, true),
+            Field::new("num_rows", DataType::Utf8, false),
+            Field::new("num_columns", DataType::UInt64, false),
+            Field::new("table_size_bytes", DataType::Utf8, false),
+            Field::new("statistics_size_bytes", DataType::UInt64, false),
+        ]));
+
+        // construct record batch from metadata
+        let mut path_arr = vec![];
+        let mut file_modified_arr = vec![];
+        let mut file_size_bytes_arr = vec![];
+        let mut e_tag_arr = vec![];
+        let mut version_arr = vec![];
+        let mut num_rows_arr = vec![];
+        let mut num_columns_arr = vec![];
+        let mut table_size_bytes_arr = vec![];
+        let mut statistics_size_bytes_arr = vec![];
+
+        if let Some(file_statistics_cache) = self.cache_manager.get_file_statistic_cache()
+        {
+            for (path, entry) in file_statistics_cache.list_entries() {
+                path_arr.push(path.to_string());
+                file_modified_arr
+                    .push(Some(entry.object_meta.last_modified.timestamp_millis()));
+                file_size_bytes_arr.push(entry.object_meta.size);
+                e_tag_arr.push(entry.object_meta.e_tag);
+                version_arr.push(entry.object_meta.version);
+                num_rows_arr.push(entry.num_rows.to_string());
+                num_columns_arr.push(entry.num_columns as u64);
+                table_size_bytes_arr.push(entry.table_size_bytes.to_string());
+                statistics_size_bytes_arr.push(entry.statistics_size_bytes as u64);
+            }
+        }
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(StringArray::from(path_arr)),
+                Arc::new(TimestampMillisecondArray::from(file_modified_arr)),
+                Arc::new(UInt64Array::from(file_size_bytes_arr)),
+                Arc::new(StringArray::from(e_tag_arr)),
+                Arc::new(StringArray::from(version_arr)),
+                Arc::new(StringArray::from(num_rows_arr)),
+                Arc::new(UInt64Array::from(num_columns_arr)),
+                Arc::new(StringArray::from(table_size_bytes_arr)),
+                Arc::new(UInt64Array::from(statistics_size_bytes_arr)),
+            ],
+        )?;
+
+        let statistics_cache = StatisticsCacheTable { schema, batch };
+        Ok(Arc::new(statistics_cache))
+    }
+}
diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 219637b3460e6..df7afc14048b9 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -67,7 +67,7 @@ impl CliHelper {
                     return Ok(ValidationResult::Invalid(Some(format!(
                         "  🤔 Invalid dialect: {}",
                         self.dialect
-                    ))))
+                    ))));
                 }
             };
             let lines = split_from_semicolon(sql);
@@ -121,10 +121,10 @@ impl Hinter for CliHelper {
 fn is_open_quote_for_location(line: &str, pos: usize) -> bool {
     let mut sql = line[..pos].to_string();
     sql.push('\'');
-    if let Ok(stmts) = DFParser::parse_sql(&sql) {
-        if let Some(Statement::CreateExternalTable(_)) = stmts.back() {
-            return true;
-        }
+    if let Ok(stmts) = DFParser::parse_sql(&sql)
+        && let Some(Statement::CreateExternalTable(_)) = stmts.back()
+    {
+        return true;
     }
     false
 }
diff --git a/datafusion-cli/src/highlighter.rs b/datafusion-cli/src/highlighter.rs
index f4e57a2e3593a..912a13916a5bd 100644
--- a/datafusion-cli/src/highlighter.rs
+++ b/datafusion-cli/src/highlighter.rs
@@ -23,7 +23,7 @@ use std::{
 };
 
 use datafusion::sql::sqlparser::{
-    dialect::{dialect_from_str, Dialect, GenericDialect},
+    dialect::{Dialect, GenericDialect, dialect_from_str},
     keywords::Keyword,
     tokenizer::{Token, Tokenizer},
 };
@@ -94,8 +94,8 @@ impl Color {
 
 #[cfg(test)]
 mod tests {
-    use super::config::Dialect;
     use super::SyntaxHighlighter;
+    use super::config::Dialect;
     use rustyline::highlight::Highlighter;
 
     #[test]
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 09fa8ef15af84..8f69ae477904c 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -31,16 +31,17 @@ use datafusion::execution::runtime_env::RuntimeEnvBuilder;
 use datafusion::logical_expr::ExplainFormat;
 use datafusion::prelude::SessionContext;
 use datafusion_cli::catalog::DynamicObjectStoreCatalog;
-use datafusion_cli::functions::{MetadataCacheFunc, ParquetMetadataFunc};
+use datafusion_cli::functions::{
+    MetadataCacheFunc, ParquetMetadataFunc, StatisticsCacheFunc,
+};
 use datafusion_cli::object_storage::instrumented::{
     InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry,
 };
 use datafusion_cli::{
-    exec,
+    DATAFUSION_CLI_VERSION, exec,
     pool_type::PoolType,
     print_format::PrintFormat,
     print_options::{MaxRows, PrintOptions},
-    DATAFUSION_CLI_VERSION,
 };
 
 use clap::Parser;
@@ -244,6 +245,14 @@ async fn main_inner() -> Result<()> {
         )),
     );
 
+    // register `statistics_cache` table function to get the contents of the file statistics cache
+    ctx.register_udtf(
+        "statistics_cache",
+        Arc::new(StatisticsCacheFunc::new(
+            ctx.task_ctx().runtime_env().cache_manager.clone(),
+        )),
+    );
+
     let mut print_options = PrintOptions {
         format: args.format,
         quiet: args.quiet,
@@ -423,7 +432,13 @@ pub fn extract_disk_limit(size: &str) -> Result<usize, String> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use datafusion::{common::test_util::batches_to_string, prelude::ParquetReadOptions};
+    use datafusion::{
+        common::test_util::batches_to_string,
+        execution::cache::{
+            cache_manager::CacheManagerConfig, cache_unit::DefaultFileStatisticsCache,
+        },
+        prelude::ParquetReadOptions,
+    };
     use insta::assert_snapshot;
 
     fn assert_conversion(input: &str, expected: Result<usize, String>) {
@@ -488,8 +503,7 @@ mod tests {
         ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {}));
 
         // input with single quote
-        let sql =
-            "SELECT * FROM parquet_metadata('../datafusion/core/tests/data/fixed_size_list_array.parquet')";
+        let sql = "SELECT * FROM parquet_metadata('../datafusion/core/tests/data/fixed_size_list_array.parquet')";
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
 
@@ -502,8 +516,7 @@ mod tests {
         "#);
 
         // input with double quote
-        let sql =
-            "SELECT * FROM parquet_metadata(\"../datafusion/core/tests/data/fixed_size_list_array.parquet\")";
+        let sql = "SELECT * FROM parquet_metadata(\"../datafusion/core/tests/data/fixed_size_list_array.parquet\")";
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
         assert_snapshot!(batches_to_string(&rbs), @r#"
@@ -523,8 +536,7 @@ mod tests {
         ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {}));
 
         // input with string columns
-        let sql =
-            "SELECT * FROM parquet_metadata('../parquet-testing/data/data_index_bloom_encoding_stats.parquet')";
+        let sql = "SELECT * FROM parquet_metadata('../parquet-testing/data/data_index_bloom_encoding_stats.parquet')";
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
 
@@ -592,9 +604,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 6957                | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 2    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 8882                | 2    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 269266              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 1347                | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
@@ -623,12 +635,110 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 6957                | 5    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 3    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 8882                | 5    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 269266              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 1347                | 3    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
         Ok(())
     }
+
+    /// Shows that the statistics cache is not enabled by default yet
+    /// See https://github.com/apache/datafusion/issues/19217
+    #[tokio::test]
+    async fn test_statistics_cache_default() -> Result<(), DataFusionError> {
+        let ctx = SessionContext::new();
+
+        ctx.register_udtf(
+            "statistics_cache",
+            Arc::new(StatisticsCacheFunc::new(
+                ctx.task_ctx().runtime_env().cache_manager.clone(),
+            )),
+        );
+
+        for filename in [
+            "alltypes_plain",
+            "alltypes_tiny_pages",
+            "lz4_raw_compressed_larger",
+        ] {
+            ctx.sql(
+                format!(
+                    "create external table {filename}
+                    stored as parquet
+                    location '../parquet-testing/data/{filename}.parquet'",
+                )
+                .as_str(),
+            )
+            .await?
+            .collect()
+            .await?;
+        }
+
+        // When the cache manager creates a StatisticsCache by default,
+        // the contents will show up here
+        let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename";
+        let df = ctx.sql(sql).await?;
+        let rbs = df.collect().await?;
+        assert_snapshot!(batches_to_string(&rbs),@r"
+        ++
+        ++
+        ");
+
+        Ok(())
+    }
+
+    // Can be removed when https://github.com/apache/datafusion/issues/19217 is resolved
+    #[tokio::test]
+    async fn test_statistics_cache_override() -> Result<(), DataFusionError> {
+        // Install a specific StatisticsCache implementation
+        let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default());
+        let cache_config = CacheManagerConfig::default()
+            .with_files_statistics_cache(Some(file_statistics_cache.clone()));
+        let runtime = RuntimeEnvBuilder::new()
+            .with_cache_manager(cache_config)
+            .build()?;
+        let config = SessionConfig::new().with_collect_statistics(true);
+        let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime));
+
+        ctx.register_udtf(
+            "statistics_cache",
+            Arc::new(StatisticsCacheFunc::new(
+                ctx.task_ctx().runtime_env().cache_manager.clone(),
+            )),
+        );
+
+        for filename in [
+            "alltypes_plain",
+            "alltypes_tiny_pages",
+            "lz4_raw_compressed_larger",
+        ] {
+            ctx.sql(
+                format!(
+                    "create external table {filename}
+                    stored as parquet
+                    location '../parquet-testing/data/{filename}.parquet'",
+                )
+                .as_str(),
+            )
+            .await?
+            .collect()
+            .await?;
+        }
+
+        let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename";
+        let df = ctx.sql(sql).await?;
+        let rbs = df.collect().await?;
+        assert_snapshot!(batches_to_string(&rbs),@r"
+        +-----------------------------------+-----------------+--------------+-------------+------------------+
+        | filename                          | file_size_bytes | num_rows     | num_columns | table_size_bytes |
+        +-----------------------------------+-----------------+--------------+-------------+------------------+
+        | alltypes_plain.parquet            | 1851            | Exact(8)     | 11          | Absent           |
+        | alltypes_tiny_pages.parquet       | 454233          | Exact(7300)  | 13          | Absent           |
+        | lz4_raw_compressed_larger.parquet | 380836          | Exact(10000) | 1           | Absent           |
+        +-----------------------------------+-----------------+--------------+-------------+------------------+
+        ");
+
+        Ok(())
+    }
 }
diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs
index e6e6be42c7ad0..3cee78a5b17cc 100644
--- a/datafusion-cli/src/object_storage.rs
+++ b/datafusion-cli/src/object_storage.rs
@@ -20,7 +20,7 @@ pub mod instrumented;
 use async_trait::async_trait;
 use aws_config::BehaviorVersion;
 use aws_credential_types::provider::{
-    error::CredentialsError, ProvideCredentials, SharedCredentialsProvider,
+    ProvideCredentials, SharedCredentialsProvider, error::CredentialsError,
 };
 use datafusion::{
     common::{
@@ -33,12 +33,12 @@ use datafusion::{
 };
 use log::debug;
 use object_store::{
-    aws::{AmazonS3Builder, AmazonS3ConfigKey, AwsCredential},
-    gcp::GoogleCloudStorageBuilder,
-    http::HttpBuilder,
     ClientOptions, CredentialProvider,
     Error::Generic,
     ObjectStore,
+    aws::{AmazonS3Builder, AmazonS3ConfigKey, AwsCredential},
+    gcp::GoogleCloudStorageBuilder,
+    http::HttpBuilder,
 };
 use std::{
     any::Any,
@@ -124,14 +124,15 @@ pub async fn get_s3_object_store_builder(
     if let Some(endpoint) = endpoint {
         // Make a nicer error if the user hasn't allowed http and the endpoint
         // is http as the default message is "URL scheme is not allowed"
-        if let Ok(endpoint_url) = Url::try_from(endpoint.as_str()) {
-            if !matches!(allow_http, Some(true)) && endpoint_url.scheme() == "http" {
-                return config_err!(
-                    "Invalid endpoint: {endpoint}. \
+        if let Ok(endpoint_url) = Url::try_from(endpoint.as_str())
+            && !matches!(allow_http, Some(true))
+            && endpoint_url.scheme() == "http"
+        {
+            return config_err!(
+                "Invalid endpoint: {endpoint}. \
                 HTTP is not allowed for S3 endpoints. \
                 To allow HTTP, set 'aws.allow_http' to true"
-                );
-            }
+            );
         }
 
         builder = builder.with_endpoint(endpoint);
@@ -586,8 +587,10 @@ mod tests {
 
         let location = "s3://bucket/path/FAKE/file.parquet";
         // Set it to a non-existent file to avoid reading the default configuration file
-        std::env::set_var("AWS_CONFIG_FILE", "data/aws.config");
-        std::env::set_var("AWS_SHARED_CREDENTIALS_FILE", "data/aws.credentials");
+        unsafe {
+            std::env::set_var("AWS_CONFIG_FILE", "data/aws.config");
+            std::env::set_var("AWS_SHARED_CREDENTIALS_FILE", "data/aws.credentials");
+        }
 
         // No options
         let table_url = ListingTableUrl::parse(location)?;
@@ -716,7 +719,10 @@ mod tests {
             .await
             .unwrap_err();
 
-        assert_eq!(err.to_string().lines().next().unwrap_or_default(), "Invalid or Unsupported Configuration: Invalid endpoint: http://endpoint33. HTTP is not allowed for S3 endpoints. To allow HTTP, set 'aws.allow_http' to true");
+        assert_eq!(
+            err.to_string().lines().next().unwrap_or_default(),
+            "Invalid or Unsupported Configuration: Invalid endpoint: http://endpoint33. HTTP is not allowed for S3 endpoints. To allow HTTP, set 'aws.allow_http' to true"
+        );
 
         // Now add `allow_http` to the options and check if it works
         let sql = format!(
@@ -746,7 +752,9 @@ mod tests {
         let expected_region = "eu-central-1";
         let location = "s3://test-bucket/path/file.parquet";
         // Set it to a non-existent file to avoid reading the default configuration file
-        std::env::set_var("AWS_CONFIG_FILE", "data/aws.config");
+        unsafe {
+            std::env::set_var("AWS_CONFIG_FILE", "data/aws.config");
+        }
 
         let table_url = ListingTableUrl::parse(location)?;
         let aws_options = AwsOptions {
@@ -767,8 +775,8 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn s3_object_store_builder_overrides_region_when_resolve_region_enabled(
-    ) -> Result<()> {
+    async fn s3_object_store_builder_overrides_region_when_resolve_region_enabled()
+    -> Result<()> {
         if let Err(DataFusionError::Execution(e)) = check_aws_envs().await {
             // Skip test if AWS envs are not set
             eprintln!("{e}");
@@ -806,7 +814,9 @@ mod tests {
 
         let table_url = ListingTableUrl::parse(location)?;
         let scheme = table_url.scheme();
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'");
+        let sql = format!(
+            "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'"
+        );
 
         let ctx = SessionContext::new();
         ctx.register_table_options_extension_from_scheme(scheme);
@@ -830,14 +840,15 @@ mod tests {
     #[tokio::test]
     async fn gcs_object_store_builder() -> Result<()> {
         let service_account_path = "fake_service_account_path";
-        let service_account_key =
-            "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\"}";
+        let service_account_key = "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\"}";
         let application_credentials_path = "fake_application_credentials_path";
         let location = "gcs://bucket/path/file.parquet";
 
         let table_url = ListingTableUrl::parse(location)?;
         let scheme = table_url.scheme();
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_path' '{service_account_path}', 'gcp.service_account_key' '{service_account_key}', 'gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
+        let sql = format!(
+            "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_path' '{service_account_path}', 'gcp.service_account_key' '{service_account_key}', 'gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'"
+        );
 
         let ctx = SessionContext::new();
         ctx.register_table_options_extension_from_scheme(scheme);
diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index c4b63b417fe42..0d5e9dc2c5a84 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -20,8 +20,8 @@ use std::{
     ops::AddAssign,
     str::FromStr,
     sync::{
-        atomic::{AtomicU8, Ordering},
         Arc,
+        atomic::{AtomicU8, Ordering},
     },
     time::Duration,
 };
@@ -31,18 +31,71 @@ use arrow::util::pretty::pretty_format_batches;
 use async_trait::async_trait;
 use chrono::Utc;
 use datafusion::{
-    common::{instant::Instant, HashMap},
+    common::{HashMap, instant::Instant},
     error::DataFusionError,
     execution::object_store::{DefaultObjectStoreRegistry, ObjectStoreRegistry},
 };
-use futures::stream::BoxStream;
+use futures::stream::{BoxStream, Stream};
 use object_store::{
-    path::Path, GetOptions, GetRange, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    GetOptions, GetRange, GetResult, ListResult, MultipartUpload, ObjectMeta,
     ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result,
+    path::Path,
 };
 use parking_lot::{Mutex, RwLock};
 use url::Url;
 
+/// A stream wrapper that measures the time until the first response(item or end of stream) is yielded
+struct TimeToFirstItemStream<S> {
+    inner: S,
+    start: Instant,
+    request_index: usize,
+    requests: Arc<Mutex<Vec<RequestDetails>>>,
+    first_item_yielded: bool,
+}
+
+impl<S> TimeToFirstItemStream<S> {
+    fn new(
+        inner: S,
+        start: Instant,
+        request_index: usize,
+        requests: Arc<Mutex<Vec<RequestDetails>>>,
+    ) -> Self {
+        Self {
+            inner,
+            start,
+            request_index,
+            requests,
+            first_item_yielded: false,
+        }
+    }
+}
+
+impl<S> Stream for TimeToFirstItemStream<S>
+where
+    S: Stream<Item = Result<ObjectMeta>> + Unpin,
+{
+    type Item = Result<ObjectMeta>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        let poll_result = std::pin::Pin::new(&mut self.inner).poll_next(cx);
+
+        if !self.first_item_yielded && poll_result.is_ready() {
+            self.first_item_yielded = true;
+            let elapsed = self.start.elapsed();
+
+            let mut requests = self.requests.lock();
+            if let Some(request) = requests.get_mut(self.request_index) {
+                request.duration = Some(elapsed);
+            }
+        }
+
+        poll_result
+    }
+}
+
 /// The profiling mode to use for an [`InstrumentedObjectStore`] instance. Collecting profiling
 /// data will have a small negative impact on both CPU and memory usage. Default is `Disabled`
 #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
@@ -91,7 +144,7 @@ impl From<u8> for InstrumentedObjectStoreMode {
 pub struct InstrumentedObjectStore {
     inner: Arc<dyn ObjectStore>,
     instrument_mode: AtomicU8,
-    requests: Mutex<Vec<RequestDetails>>,
+    requests: Arc<Mutex<Vec<RequestDetails>>>,
 }
 
 impl InstrumentedObjectStore {
@@ -100,7 +153,7 @@ impl InstrumentedObjectStore {
         Self {
             inner: object_store,
             instrument_mode,
-            requests: Mutex::new(Vec::new()),
+            requests: Arc::new(Mutex::new(Vec::new())),
         }
     }
 
@@ -218,19 +271,31 @@ impl InstrumentedObjectStore {
         prefix: Option<&Path>,
     ) -> BoxStream<'static, Result<ObjectMeta>> {
         let timestamp = Utc::now();
-        let ret = self.inner.list(prefix);
+        let start = Instant::now();
+        let inner_stream = self.inner.list(prefix);
+
+        let request_index = {
+            let mut requests = self.requests.lock();
+            requests.push(RequestDetails {
+                op: Operation::List,
+                path: prefix.cloned().unwrap_or_else(|| Path::from("")),
+                timestamp,
+                duration: None,
+                size: None,
+                range: None,
+                extra_display: None,
+            });
+            requests.len() - 1
+        };
 
-        self.requests.lock().push(RequestDetails {
-            op: Operation::List,
-            path: prefix.cloned().unwrap_or_else(|| Path::from("")),
-            timestamp,
-            duration: None, // list returns a stream, so the duration isn't meaningful
-            size: None,
-            range: None,
-            extra_display: None,
-        });
+        let wrapped_stream = TimeToFirstItemStream::new(
+            inner_stream,
+            start,
+            request_index,
+            Arc::clone(&self.requests),
+        );
 
-        ret
+        Box::pin(wrapped_stream)
     }
 
     async fn instrumented_list_with_delimiter(
@@ -758,6 +823,7 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
 
 #[cfg(test)]
 mod tests {
+    use futures::StreamExt;
     use object_store::WriteMultipart;
 
     use super::*;
@@ -782,9 +848,11 @@ mod tests {
             "TRaCe".parse().unwrap(),
             InstrumentedObjectStoreMode::Trace
         ));
-        assert!("does_not_exist"
-            .parse::<InstrumentedObjectStoreMode>()
-            .is_err());
+        assert!(
+            "does_not_exist"
+                .parse::<InstrumentedObjectStoreMode>()
+                .is_err()
+        );
 
         assert!(matches!(0.into(), InstrumentedObjectStoreMode::Disabled));
         assert!(matches!(1.into(), InstrumentedObjectStoreMode::Summary));
@@ -896,13 +964,15 @@ mod tests {
 
         instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
         assert!(instrumented.requests.lock().is_empty());
-        let _ = instrumented.list(Some(&path));
+        let mut stream = instrumented.list(Some(&path));
+        // Consume at least one item from the stream to trigger duration measurement
+        let _ = stream.next().await;
         assert_eq!(instrumented.requests.lock().len(), 1);
 
         let request = instrumented.take_requests().pop().unwrap();
         assert_eq!(request.op, Operation::List);
         assert_eq!(request.path, path);
-        assert!(request.duration.is_none());
+        assert!(request.duration.is_some());
         assert!(request.size.is_none());
         assert!(request.range.is_none());
         assert!(request.extra_display.is_none());
diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs
index 56bdb15a315d9..cfb8a32ffcfeb 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -247,12 +247,12 @@ mod tests {
             .with_schema(three_column_schema())
             .with_batches(vec![])
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+---+---+
         | a | b | c |
         +---+---+---+
         +---+---+---+
-        "#);
+        ");
     }
 
     #[test]
@@ -262,11 +262,11 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::No)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         1,4,7
         2,5,8
         3,6,9
-        "#);
+        ");
     }
 
     #[test]
@@ -276,12 +276,12 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::Yes)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         a,b,c
         1,4,7
         2,5,8
         3,6,9
-        "#);
+        ");
     }
 
     #[test]
@@ -291,10 +291,10 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::No)
             .run();
-        assert_snapshot!(output, @"
-        1\t4\t7
-        2\t5\t8
-        3\t6\t9
+        assert_snapshot!(output, @r"
+        1	4	7
+        2	5	8
+        3	6	9
         ")
     }
 
@@ -305,11 +305,11 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::Yes)
             .run();
-        assert_snapshot!(output, @"
-        a\tb\tc
-        1\t4\t7
-        2\t5\t8
-        3\t6\t9
+        assert_snapshot!(output, @r"
+        a	b	c
+        1	4	7
+        2	5	8
+        3	6	9
         ");
     }
 
@@ -320,7 +320,7 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::Ignored)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+---+---+
         | a | b | c |
         +---+---+---+
@@ -328,7 +328,7 @@ mod tests {
         | 2 | 5 | 8 |
         | 3 | 6 | 9 |
         +---+---+---+
-        "#);
+        ");
     }
     #[test]
     fn print_json() {
@@ -337,9 +337,7 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::Ignored)
             .run();
-        assert_snapshot!(output, @r#"
-        [{"a":1,"b":4,"c":7},{"a":2,"b":5,"c":8},{"a":3,"b":6,"c":9}]
-        "#);
+        assert_snapshot!(output, @r#"[{"a":1,"b":4,"c":7},{"a":2,"b":5,"c":8},{"a":3,"b":6,"c":9}]"#);
     }
 
     #[test]
@@ -363,11 +361,11 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::No)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         1,4,7
         2,5,8
         3,6,9
-        "#);
+        ");
     }
     #[test]
     fn print_automatic_with_header() {
@@ -376,12 +374,12 @@ mod tests {
             .with_batches(split_batch(three_column_batch()))
             .with_header(WithHeader::Yes)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         a,b,c
         1,4,7
         2,5,8
         3,6,9
-        "#);
+        ");
     }
 
     #[test]
@@ -396,7 +394,7 @@ mod tests {
                 .with_maxrows(max_rows)
                 .run();
             allow_duplicates! {
-                assert_snapshot!(output, @r#"
+                assert_snapshot!(output, @r"
                 +---+
                 | a |
                 +---+
@@ -404,7 +402,7 @@ mod tests {
                 | 2 |
                 | 3 |
                 +---+
-                "#);
+                ");
             }
         }
     }
@@ -416,7 +414,7 @@ mod tests {
             .with_batches(vec![one_column_batch()])
             .with_maxrows(MaxRows::Limited(1))
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+
         | a |
         +---+
@@ -425,7 +423,7 @@ mod tests {
         | . |
         | . |
         +---+
-        "#);
+        ");
     }
 
     #[test]
@@ -439,7 +437,7 @@ mod tests {
             ])
             .with_maxrows(MaxRows::Limited(5))
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+
         | a |
         +---+
@@ -452,7 +450,7 @@ mod tests {
         | . |
         | . |
         +---+
-        "#);
+        ");
     }
 
     #[test]
@@ -464,7 +462,7 @@ mod tests {
             .with_format(PrintFormat::Table)
             .with_batches(vec![empty_batch.clone(), batch, empty_batch])
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+
         | a |
         +---+
@@ -472,7 +470,7 @@ mod tests {
         | 2 |
         | 3 |
         +---+
-        "#);
+        ");
     }
 
     #[test]
@@ -486,12 +484,12 @@ mod tests {
             .with_batches(vec![empty_batch])
             .with_header(WithHeader::Yes)
             .run();
-        assert_snapshot!(output, @r#"
+        assert_snapshot!(output, @r"
         +---+
         | a |
         +---+
         +---+
-        "#);
+        ");
 
         // No output for empty batch when schema contains no columns
         let empty_batch = RecordBatch::new_empty(Arc::new(Schema::empty()));
diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs
index 93d1d450fd82b..5fbe27d805db0 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -28,8 +28,8 @@ use crate::print_format::PrintFormat;
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion::common::instant::Instant;
 use datafusion::common::DataFusionError;
+use datafusion::common::instant::Instant;
 use datafusion::error::Result;
 use datafusion::physical_plan::RecordBatchStream;
 
@@ -55,8 +55,10 @@ impl FromStr for MaxRows {
             Ok(Self::Unlimited)
         } else {
             match maxrows.parse::<usize>() {
-                Ok(nrows)  => Ok(Self::Limited(nrows)),
-                _ => Err(format!("Invalid maxrows {maxrows}. Valid inputs are natural numbers or \'none\', \'inf\', or \'infinite\' for no limit.")),
+                Ok(nrows) => Ok(Self::Limited(nrows)),
+                _ => Err(format!(
+                    "Invalid maxrows {maxrows}. Valid inputs are natural numbers or \'none\', \'inf\', or \'infinite\' for no limit."
+                )),
             }
         }
     }
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index c1395aa4f562c..d6f8deedfe32c 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -20,7 +20,7 @@ use std::process::Command;
 use rstest::rstest;
 
 use async_trait::async_trait;
-use insta::{glob, Settings};
+use insta::{Settings, glob};
 use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
 use std::path::PathBuf;
 use std::{env, fs};
@@ -111,7 +111,9 @@ async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
         }
 
         Err(TestcontainersError::Client(e)) => {
-            panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}");
+            panic!(
+                "Failed to start MinIO container. Ensure Docker is running and accessible: {e}"
+            );
         }
         Err(e) => {
             panic!("Failed to start MinIO container: {e}");
@@ -258,13 +260,15 @@ async fn test_cli() {
 
     glob!("sql/integration/*.sql", |path| {
         let input = fs::read_to_string(path).unwrap();
-        assert_cmd_snapshot!(cli()
-            .env_clear()
-            .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin")
-            .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword")
-            .env("AWS_ENDPOINT", format!("http://localhost:{port}"))
-            .env("AWS_ALLOW_HTTP", "true")
-            .pass_stdin(input))
+        assert_cmd_snapshot!(
+            cli()
+                .env_clear()
+                .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin")
+                .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword")
+                .env("AWS_ENDPOINT", format!("http://localhost:{port}"))
+                .env("AWS_ALLOW_HTTP", "true")
+                .pass_stdin(input)
+        )
     });
 }
 
@@ -328,10 +332,12 @@ SELECT COUNT(*) FROM hits;
 "#
     );
 
-    assert_cmd_snapshot!(cli()
-        .env("RUST_LOG", "warn")
-        .env_remove("AWS_ENDPOINT")
-        .pass_stdin(input));
+    assert_cmd_snapshot!(
+        cli()
+            .env("RUST_LOG", "warn")
+            .env_remove("AWS_ENDPOINT")
+            .pass_stdin(input)
+    );
 }
 
 /// Ensure backtrace will be printed, if executing `datafusion-cli` with a query
@@ -450,7 +456,7 @@ SELECT * from CARS LIMIT 1;
 #[async_trait]
 trait MinioCommandExt {
     async fn with_minio(&mut self, container: &ContainerAsync<minio::MinIO>)
-        -> &mut Self;
+    -> &mut Self;
 }
 
 #[async_trait]
diff --git a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
index 6b3a247dd7b82..1359cefbe71c7 100644
--- a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
+++ b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
@@ -7,7 +7,6 @@ info:
     - EXPLAIN SELECT 123
   env:
     DATAFUSION_EXPLAIN_FORMAT: pgjson
-snapshot_kind: text
 ---
 success: true
 exit_code: 0
diff --git a/datafusion-cli/tests/snapshots/cli_format@automatic.snap b/datafusion-cli/tests/snapshots/cli_format@automatic.snap
index 2591f493e90a8..76b14d9a3a924 100644
--- a/datafusion-cli/tests/snapshots/cli_format@automatic.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@automatic.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_format@csv.snap b/datafusion-cli/tests/snapshots/cli_format@csv.snap
index c41b042298eb0..2c969bd91d121 100644
--- a/datafusion-cli/tests/snapshots/cli_format@csv.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@csv.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_format@json.snap b/datafusion-cli/tests/snapshots/cli_format@json.snap
index 8f804a337cce5..22a9cc4657a91 100644
--- a/datafusion-cli/tests/snapshots/cli_format@json.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@json.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_format@nd-json.snap b/datafusion-cli/tests/snapshots/cli_format@nd-json.snap
index 7b4ce1e2530cf..513bcb7372ca6 100644
--- a/datafusion-cli/tests/snapshots/cli_format@nd-json.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@nd-json.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_format@table.snap b/datafusion-cli/tests/snapshots/cli_format@table.snap
index 99914182462aa..8677847588385 100644
--- a/datafusion-cli/tests/snapshots/cli_format@table.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@table.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_format@tsv.snap b/datafusion-cli/tests/snapshots/cli_format@tsv.snap
index 968268c31dd55..c56e60fcab155 100644
--- a/datafusion-cli/tests/snapshots/cli_format@tsv.snap
+++ b/datafusion-cli/tests/snapshots/cli_format@tsv.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap
index c27d527df0b6a..9fd07fa6f4e1b 100644
--- a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap b/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap
index 46ee6be64f624..8620f6da84488 100644
--- a/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap
@@ -5,7 +5,6 @@ info:
   args:
     - "--command"
     - EXPLAIN SELECT 123
-snapshot_kind: text
 ---
 success: true
 exit_code: 0
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap
index 7c44e41729a17..df3a10b6bb54b 100644
--- a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap
index 3b975bb6a927d..a394458768d1b 100644
--- a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap
@@ -1,5 +1,5 @@
 ---
-source: tests/cli_integration.rs
+source: datafusion-cli/tests/cli_integration.rs
 info:
   program: datafusion-cli
   args:
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 38f1f8b0e0cad..b0190dadf3c3f 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -35,18 +35,6 @@ rust-version = { workspace = true }
 [lints]
 workspace = true
 
-[[example]]
-name = "dataframe_to_s3"
-path = "examples/external_dependency/dataframe-to-s3.rs"
-
-[[example]]
-name = "query_aws_s3"
-path = "examples/external_dependency/query-aws-s3.rs"
-
-[[example]]
-name = "custom_file_casts"
-path = "examples/custom_file_casts.rs"
-
 [dev-dependencies]
 arrow = { workspace = true }
 # arrow_schema is required for record_batch! macro :sad:
@@ -58,17 +46,22 @@ dashmap = { workspace = true }
 # note only use main datafusion crate for examples
 base64 = "0.22.1"
 datafusion = { workspace = true, default-features = true, features = ["parquet_encryption"] }
-datafusion-ffi = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
 datafusion-physical-expr-adapter = { workspace = true }
 datafusion-proto = { workspace = true }
+datafusion-sql = { workspace = true }
 env_logger = { workspace = true }
 futures = { workspace = true }
+insta = { workspace = true }
 log = { workspace = true }
 mimalloc = { version = "0.1", default-features = false }
 object_store = { workspace = true, features = ["aws", "http"] }
 prost = { workspace = true }
 rand = { workspace = true }
 serde_json = { workspace = true }
+strum = { workspace = true }
+strum_macros = { workspace = true }
 tempfile = { workspace = true }
 test-utils = { path = "../test-utils" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
@@ -76,7 +69,7 @@ tonic = "0.14"
 tracing = { version = "0.1" }
 tracing-subscriber = { version = "0.3" }
 url = { workspace = true }
-uuid = "1.18"
+uuid = "1.19"
 
 [target.'cfg(not(target_os = "windows"))'.dev-dependencies]
 nix = { version = "0.30.1", features = ["fs"] }
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 1befba6be66fd..8f38b38990363 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -39,60 +39,178 @@ git submodule update --init
 # Change to the examples directory
 cd datafusion-examples/examples
 
-# Run the `dataframe` example:
-# ... use the equivalent for other examples
-cargo run --example dataframe
+# Run all examples in a group
+cargo run --example <group> -- all
+
+# Run a specific example within a group
+cargo run --example <group> -- <subcommand>
+
+# Run all examples in the `dataframe` group
+cargo run --example dataframe -- all
+
+# Run a single example from the `dataframe` group
+# (apply the same pattern for any other group)
+cargo run --example dataframe -- dataframe
 ```
 
-## Single Process
-
-- [`examples/udf/advanced_udaf.rs`](examples/udf/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
-- [`examples/udf/advanced_udf.rs`](examples/udf/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
-- [`examples/udf/advanced_udwf.rs`](examples/udf/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
-- [`advanced_parquet_index.rs`](examples/advanced_parquet_index.rs): Creates a detailed secondary index that covers the contents of several parquet files
-- [`examples/udf/async_udf.rs`](examples/udf/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
-- [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
-- [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog
-- [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
-- [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
-- [`csv_json_opener.rs`](examples/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es
-- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
-- [`custom_file_casts.rs`](examples/custom_file_casts.rs): Implement custom casting rules to adapt file schemas
-- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
-- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
-- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
-- [`examples/builtin_functions/date_time`](examples/builtin_functions/date_time.rs): Examples of date-time related functions and queries
-- [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
-- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
-- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
-- [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
-- [`flight/sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from Flight and and FlightSQL (e.g. JDBC) clients
-- [`examples/builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
-- [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
-- [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
-- [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
-- [`parquet_embedded_index.rs`](examples/parquet_embedded_index.rs): Store a custom index inside a Parquet file and use it to speed up queries
-- [`parquet_encrypted.rs`](examples/parquet_encrypted.rs): Read and write encrypted Parquet files using DataFusion
-- [`parquet_encrypted_with_kms.rs`](examples/parquet_encrypted_with_kms.rs): Read and write encrypted Parquet files using an encryption factory
-- [`parquet_index.rs`](examples/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries
-- [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
-- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
-- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
-- [`planner_api.rs`](examples/planner_api.rs) APIs to manipulate logical and physical plans
-- [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
-- [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
-- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
-- [`examples/builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions
-- [`remote_catalog.rs`](examples/regexp.rs): Examples of interfacing with a remote catalog (e.g. over a network)
-- [`examples/udf/simple_udaf.rs`](examples/udf/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
-- [`examples/udf/simple_udf.rs`](examples/udf/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF)
-- [`examples/udf/simple_udtf.rs`](examples/udf/simple_udtf.rs): Define and invoke a User Defined Table Function (UDTF)
-- [`examples/udf/simple_udfw.rs`](examples/udf/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
-- [`sql_analysis.rs`](examples/sql_analysis.rs): Analyse SQL queries with DataFusion structures
-- [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings
-- [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
-- [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files)
-
-## Distributed
-
-- [`examples/flight/client.rs`](examples/flight/client.rs) and [`examples/flight/server.rs`](examples/flight/server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Arrow Flight protocol.
+## Builtin Functions Examples
+
+### Group: `builtin_functions`
+
+#### Category: Single Process
+
+| Subcommand       | File Path                                                                                 | Description                                                |
+| ---------------- | ----------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
+| date_time        | [`builtin_functions/date_time.rs`](examples/builtin_functions/date_time.rs)               | Examples of date-time related functions and queries        |
+| function_factory | [`builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs) | Register `CREATE FUNCTION` handler to implement SQL macros |
+| regexp           | [`builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs)                     | Examples of using regular expression functions             |
+
+## Custom Data Source Examples
+
+### Group: `custom_data_source`
+
+#### Category: Single Process
+
+| Subcommand            | File Path                                                                                             | Description                                   |
+| --------------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------- |
+| csv_sql_streaming     | [`custom_data_source/csv_sql_streaming.rs`](examples/custom_data_source/csv_sql_streaming.rs)         | Run a streaming SQL query against CSV data    |
+| csv_json_opener       | [`custom_data_source/csv_json_opener.rs`](examples/custom_data_source/csv_json_opener.rs)             | Use low-level FileOpener APIs for CSV/JSON    |
+| custom_datasource     | [`custom_data_source/custom_datasource.rs`](examples/custom_data_source/custom_datasource.rs)         | Query a custom TableProvider                  |
+| custom_file_casts     | [`custom_data_source/custom_file_casts.rs`](examples/custom_data_source/custom_file_casts.rs)         | Implement custom casting rules                |
+| custom_file_format    | [`custom_data_source/custom_file_format.rs`](examples/custom_data_source/custom_file_format.rs)       | Write to a custom file format                 |
+| default_column_values | [`custom_data_source/default_column_values.rs`](examples/custom_data_source/default_column_values.rs) | Custom default values using metadata          |
+| file_stream_provider  | [`custom_data_source/file_stream_provider.rs`](examples/custom_data_source/file_stream_provider.rs)   | Read/write via FileStreamProvider for streams |
+
+## Data IO Examples
+
+### Group: `data_io`
+
+#### Category: Single Process
+
+| Subcommand           | File Path                                                                                 | Description                                            |
+| -------------------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------ |
+| catalog              | [`data_io/catalog.rs`](examples/data_io/catalog.rs)                                       | Register tables into a custom catalog                  |
+| json_shredding       | [`data_io/json_shredding.rs`](examples/data_io/json_shredding.rs)                         | Implement filter rewriting for JSON shredding          |
+| parquet_adv_idx      | [`data_io/parquet_advanced_index.rs`](examples/data_io/parquet_advanced_index.rs)         | Create a secondary index across multiple parquet files |
+| parquet_emb_idx      | [`data_io/parquet_embedded_index.rs`](examples/data_io/parquet_embedded_index.rs)         | Store a custom index inside Parquet files              |
+| parquet_enc          | [`data_io/parquet_encrypted.rs`](examples/data_io/parquet_encrypted.rs)                   | Read & write encrypted Parquet files                   |
+| parquet_enc_with_kms | [`data_io/parquet_encrypted_with_kms.rs`](examples/data_io/parquet_encrypted_with_kms.rs) | Encrypted Parquet I/O using a KMS-backed factory       |
+| parquet_exec_visitor | [`data_io/parquet_exec_visitor.rs`](examples/data_io/parquet_exec_visitor.rs)             | Extract statistics by visiting an ExecutionPlan        |
+| parquet_idx          | [`data_io/parquet_index.rs`](examples/data_io/parquet_index.rs)                           | Create a secondary index                               |
+| query_http_csv       | [`data_io/query_http_csv.rs`](examples/data_io/query_http_csv.rs)                         | Query CSV files via HTTP                               |
+| remote_catalog       | [`data_io/remote_catalog.rs`](examples/data_io/remote_catalog.rs)                         | Interact with a remote catalog                         |
+
+## DataFrame Examples
+
+### Group: `dataframe`
+
+#### Category: Single Process
+
+| Subcommand            | File Path                                                                           | Description                                            |
+| --------------------- | ----------------------------------------------------------------------------------- | ------------------------------------------------------ |
+| dataframe             | [`dataframe/dataframe.rs`](examples/dataframe/dataframe.rs)                         | Query DataFrames from various sources and write output |
+| deserialize_to_struct | [`dataframe/deserialize_to_struct.rs`](examples/dataframe/deserialize_to_struct.rs) | Convert Arrow arrays into Rust structs                 |
+
+## Execution Monitoring Examples
+
+### Group: `execution_monitoring`
+
+#### Category: Single Process
+
+| Subcommand         | File Path                                                                                                           | Description                              |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------- | ---------------------------------------- |
+| mem_pool_exec_plan | [`execution_monitoring/memory_pool_execution_plan.rs`](examples/execution_monitoring/memory_pool_execution_plan.rs) | Memory-aware ExecutionPlan with spilling |
+| mem_pool_tracking  | [`execution_monitoring/memory_pool_tracking.rs`](examples/execution_monitoring/memory_pool_tracking.rs)             | Demonstrates memory tracking             |
+| tracing            | [`execution_monitoring/tracing.rs`](examples/execution_monitoring/tracing.rs)                                       | Demonstrates tracing integration         |
+
+## External Dependency Examples
+
+### Group: `external_dependency`
+
+#### Category: Single Process
+
+| Subcommand      | File Path                                                                                   | Description                              |
+| --------------- | ------------------------------------------------------------------------------------------- | ---------------------------------------- |
+| dataframe_to_s3 | [`external_dependency/dataframe_to_s3.rs`](examples/external_dependency/dataframe_to_s3.rs) | Query DataFrames and write results to S3 |
+| query_aws_s3    | [`external_dependency/query_aws_s3.rs`](examples/external_dependency/query_aws_s3.rs)       | Query S3-backed data using object_store  |
+
+## Flight Examples
+
+### Group: `flight`
+
+#### Category: Distributed
+
+| Subcommand | File Path                                               | Description                                            |
+| ---------- | ------------------------------------------------------- | ------------------------------------------------------ |
+| server     | [`flight/server.rs`](examples/flight/server.rs)         | Run DataFusion server accepting FlightSQL/JDBC queries |
+| client     | [`flight/client.rs`](examples/flight/client.rs)         | Execute SQL queries via Arrow Flight protocol          |
+| sql_server | [`flight/sql_server.rs`](examples/flight/sql_server.rs) | Standalone SQL server for JDBC clients                 |
+
+## Proto Examples
+
+### Group: `proto`
+
+#### Category: Single Process
+
+| Subcommand               | File Path                                                                         | Description                                                     |
+| ------------------------ | --------------------------------------------------------------------------------- | --------------------------------------------------------------- |
+| composed_extension_codec | [`proto/composed_extension_codec.rs`](examples/proto/composed_extension_codec.rs) | Use multiple extension codecs for serialization/deserialization |
+
+## Query Planning Examples
+
+### Group: `query_planning`
+
+#### Category: Single Process
+
+| Subcommand     | File Path                                                                       | Description                                            |
+| -------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------ |
+| analyzer_rule  | [`query_planning/analyzer_rule.rs`](examples/query_planning/analyzer_rule.rs)   | Custom AnalyzerRule to change query semantics          |
+| expr_api       | [`query_planning/expr_api.rs`](examples/query_planning/expr_api.rs)             | Create, execute, analyze, and coerce Exprs             |
+| optimizer_rule | [`query_planning/optimizer_rule.rs`](examples/query_planning/optimizer_rule.rs) | Replace predicates via a custom OptimizerRule          |
+| parse_sql_expr | [`query_planning/parse_sql_expr.rs`](examples/query_planning/parse_sql_expr.rs) | Parse SQL into DataFusion Expr                         |
+| plan_to_sql    | [`query_planning/plan_to_sql.rs`](examples/query_planning/plan_to_sql.rs)       | Generate SQL from expressions or plans                 |
+| planner_api    | [`query_planning/planner_api.rs`](examples/query_planning/planner_api.rs)       | APIs for logical and physical plan manipulation        |
+| pruning        | [`query_planning/pruning.rs`](examples/query_planning/pruning.rs)               | Use pruning to skip irrelevant files                   |
+| thread_pools   | [`query_planning/thread_pools.rs`](examples/query_planning/thread_pools.rs)     | Configure custom thread pools for DataFusion execution |
+
+## Relation Planner Examples
+
+### Group: `relation_planner`
+
+#### Category: Single Process
+
+| Subcommand      | File Path                                                                             | Description                                |
+| --------------- | ------------------------------------------------------------------------------------- | ------------------------------------------ |
+| match_recognize | [`relation_planner/match_recognize.rs`](examples/relation_planner/match_recognize.rs) | Implement MATCH_RECOGNIZE pattern matching |
+| pivot_unpivot   | [`relation_planner/pivot_unpivot.rs`](examples/relation_planner/pivot_unpivot.rs)     | Implement PIVOT / UNPIVOT                  |
+| table_sample    | [`relation_planner/table_sample.rs`](examples/relation_planner/table_sample.rs)       | Implement TABLESAMPLE                      |
+
+## SQL Ops Examples
+
+### Group: `sql_ops`
+
+#### Category: Single Process
+
+| Subcommand        | File Path                                                               | Description                                        |
+| ----------------- | ----------------------------------------------------------------------- | -------------------------------------------------- |
+| analysis          | [`sql_ops/analysis.rs`](examples/sql_ops/analysis.rs)                   | Analyze SQL queries                                |
+| custom_sql_parser | [`sql_ops/custom_sql_parser.rs`](examples/sql_ops/custom_sql_parser.rs) | Implement a custom SQL parser to extend DataFusion |
+| frontend          | [`sql_ops/frontend.rs`](examples/sql_ops/frontend.rs)                   | Build LogicalPlans from SQL                        |
+| query             | [`sql_ops/query.rs`](examples/sql_ops/query.rs)                         | Query data using SQL                               |
+
+## UDF Examples
+
+### Group: `udf`
+
+#### Category: Single Process
+
+| Subcommand | File Path                                               | Description                                     |
+| ---------- | ------------------------------------------------------- | ----------------------------------------------- |
+| adv_udaf   | [`udf/advanced_udaf.rs`](examples/udf/advanced_udaf.rs) | Advanced User Defined Aggregate Function (UDAF) |
+| adv_udf    | [`udf/advanced_udf.rs`](examples/udf/advanced_udf.rs)   | Advanced User Defined Scalar Function (UDF)     |
+| adv_udwf   | [`udf/advanced_udwf.rs`](examples/udf/advanced_udwf.rs) | Advanced User Defined Window Function (UDWF)    |
+| async_udf  | [`udf/async_udf.rs`](examples/udf/async_udf.rs)         | Asynchronous User Defined Scalar Function       |
+| udaf       | [`udf/simple_udaf.rs`](examples/udf/simple_udaf.rs)     | Simple UDAF example                             |
+| udf        | [`udf/simple_udf.rs`](examples/udf/simple_udf.rs)       | Simple UDF example                              |
+| udtf       | [`udf/simple_udtf.rs`](examples/udf/simple_udtf.rs)     | Simple UDTF example                             |
+| udwf       | [`udf/simple_udwf.rs`](examples/udf/simple_udwf.rs)     | Simple UDWF example                             |
diff --git a/datafusion-examples/examples/builtin_functions/date_time.rs b/datafusion-examples/examples/builtin_functions/date_time.rs
index 178cba979cb95..08d4bc6e29978 100644
--- a/datafusion-examples/examples/builtin_functions/date_time.rs
+++ b/datafusion-examples/examples/builtin_functions/date_time.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::sync::Arc;
 
 use arrow::array::{Date32Array, Int32Array};
@@ -179,12 +181,13 @@ async fn query_make_date() -> Result<()> {
 
     // invalid column values will result in an error
     let result = ctx
-        .sql("select make_date(2024, null, 23)")
+        .sql("select make_date(2024, '', 23)")
         .await?
         .collect()
         .await;
 
-    let expected = "Execution error: Unable to parse date from null/empty value";
+    let expected =
+        "Arrow error: Cast error: Cannot cast string '' to value of Int32 type";
     assert_contains!(result.unwrap_err().to_string(), expected);
 
     // invalid date values will also result in an error
@@ -194,7 +197,7 @@ async fn query_make_date() -> Result<()> {
         .collect()
         .await;
 
-    let expected = "Execution error: Unable to parse date from 2024, 1, 32";
+    let expected = "Execution error: Day value '32' is out of range";
     assert_contains!(result.unwrap_err().to_string(), expected);
 
     Ok(())
diff --git a/datafusion-examples/examples/builtin_functions/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs
index 5d41e7a260713..7eff0d0b5c484 100644
--- a/datafusion-examples/examples/builtin_functions/function_factory.rs
+++ b/datafusion-examples/examples/builtin_functions/function_factory.rs
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::datatypes::DataType;
 use datafusion::common::tree_node::{Transformed, TreeNode};
-use datafusion::common::{exec_datafusion_err, exec_err, internal_err, DataFusionError};
+use datafusion::common::{DataFusionError, exec_datafusion_err, exec_err, internal_err};
 use datafusion::error::Result;
 use datafusion::execution::context::{
     FunctionFactory, RegisterFunction, SessionContext, SessionState,
diff --git a/datafusion-examples/examples/builtin_functions/main.rs b/datafusion-examples/examples/builtin_functions/main.rs
index 3399c395bfd62..f9e0a44a09a34 100644
--- a/datafusion-examples/examples/builtin_functions/main.rs
+++ b/datafusion-examples/examples/builtin_functions/main.rs
@@ -19,7 +19,13 @@
 //!
 //! These examples demonstrate miscellaneous function-related features.
 //!
+//! ## Usage
+//! ```bash
+//! cargo run --example builtin_functions -- [all|date_time|function_factory|regexp]
+//! ```
+//!
 //! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
 //! - `date_time` — examples of date-time related functions and queries
 //! - `function_factory` — register `CREATE FUNCTION` handler to implement SQL macros
 //! - `regexp` — examples of using regular expression functions
@@ -28,46 +34,39 @@ mod date_time;
 mod function_factory;
 mod regexp;
 
-use std::str::FromStr;
-
 use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
 
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
 enum ExampleKind {
+    All,
     DateTime,
     FunctionFactory,
     Regexp,
 }
 
-impl AsRef<str> for ExampleKind {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::DateTime => "date_time",
-            Self::FunctionFactory => "function_factory",
-            Self::Regexp => "regexp",
-        }
-    }
-}
-
-impl FromStr for ExampleKind {
-    type Err = DataFusionError;
-
-    fn from_str(s: &str) -> Result<Self> {
-        match s {
-            "date_time" => Ok(Self::DateTime),
-            "function_factory" => Ok(Self::FunctionFactory),
-            "regexp" => Ok(Self::Regexp),
-            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
-        }
-    }
-}
-
 impl ExampleKind {
-    const ALL: [Self; 3] = [Self::DateTime, Self::FunctionFactory, Self::Regexp];
-
     const EXAMPLE_NAME: &str = "builtin_functions";
 
-    fn variants() -> Vec<&'static str> {
-        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::DateTime => date_time::date_time().await?,
+            ExampleKind::FunctionFactory => function_factory::function_factory().await?,
+            ExampleKind::Regexp => regexp::regexp().await?,
+        }
+        Ok(())
     }
 }
 
@@ -76,19 +75,14 @@ async fn main() -> Result<()> {
     let usage = format!(
         "Usage: cargo run --example {} -- [{}]",
         ExampleKind::EXAMPLE_NAME,
-        ExampleKind::variants().join("|")
+        ExampleKind::VARIANTS.join("|")
     );
 
-    let arg = std::env::args().nth(1).ok_or_else(|| {
-        eprintln!("{usage}");
-        DataFusionError::Execution("Missing argument".to_string())
-    })?;
-
-    match arg.parse::<ExampleKind>()? {
-        ExampleKind::DateTime => date_time::date_time().await?,
-        ExampleKind::FunctionFactory => function_factory::function_factory().await?,
-        ExampleKind::Regexp => regexp::regexp().await?,
-    }
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
 
-    Ok(())
+    example.run().await
 }
diff --git a/datafusion-examples/examples/builtin_functions/regexp.rs b/datafusion-examples/examples/builtin_functions/regexp.rs
index 13c0786930283..e8376cd0c94eb 100644
--- a/datafusion-examples/examples/builtin_functions/regexp.rs
+++ b/datafusion-examples/examples/builtin_functions/regexp.rs
@@ -16,9 +16,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
+use std::{fs::File, io::Write};
+
 use datafusion::common::{assert_batches_eq, assert_contains};
 use datafusion::error::Result;
 use datafusion::prelude::*;
+use tempfile::tempdir;
 
 /// This example demonstrates how to use the regexp_* functions
 ///
@@ -30,12 +35,30 @@ use datafusion::prelude::*;
 /// https://docs.rs/regex/latest/regex/#grouping-and-flags
 pub async fn regexp() -> Result<()> {
     let ctx = SessionContext::new();
-    ctx.register_csv(
-        "examples",
-        "datafusion/physical-expr/tests/data/regex.csv",
-        CsvReadOptions::new(),
-    )
-    .await?;
+    // content from file 'datafusion/physical-expr/tests/data/regex.csv'
+    let csv_data = r#"values,patterns,replacement,flags
+abc,^(a),bb\1bb,i
+ABC,^(A).*,B,i
+aBc,(b|d),e,i
+AbC,(B|D),e,
+aBC,^(b|c),d,
+4000,\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b,xyz,
+4010,\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b,xyz,
+Düsseldorf,[\p{Letter}-]+,München,
+Москва,[\p{L}-]+,Moscow,
+Köln,[a-zA-Z]ö[a-zA-Z]{2},Koln,
+اليوم,^\p{Arabic}+$,Today,"#;
+    let dir = tempdir()?;
+    let file_path = dir.path().join("regex.csv");
+    {
+        let mut file = File::create(&file_path)?;
+        // write CSV data
+        file.write_all(csv_data.as_bytes())?;
+    } // scope closes the file
+    let file_path = file_path.to_str().unwrap();
+
+    ctx.register_csv("examples", file_path, CsvReadOptions::new())
+        .await?;
 
     //
     //
@@ -111,11 +134,11 @@ pub async fn regexp() -> Result<()> {
 
     assert_batches_eq!(
         &[
-    "+---------------------------------------------------+----------------------------------------------------+",
-    "| regexp_like(Utf8(\"John Smith\"),Utf8(\"^.*Smith$\")) | regexp_like(Utf8(\"Smith Jones\"),Utf8(\"^Smith.*$\")) |",
-    "+---------------------------------------------------+----------------------------------------------------+",
-    "| true                                              | true                                               |",
-    "+---------------------------------------------------+----------------------------------------------------+",
+            "+---------------------------------------------------+----------------------------------------------------+",
+            "| regexp_like(Utf8(\"John Smith\"),Utf8(\"^.*Smith$\")) | regexp_like(Utf8(\"Smith Jones\"),Utf8(\"^Smith.*$\")) |",
+            "+---------------------------------------------------+----------------------------------------------------+",
+            "| true                                              | true                                               |",
+            "+---------------------------------------------------+----------------------------------------------------+",
         ],
         &result
     );
@@ -241,11 +264,11 @@ pub async fn regexp() -> Result<()> {
 
     assert_batches_eq!(
         &[
-    "+----------------------------------------------------+-----------------------------------------------------+",
-    "| regexp_match(Utf8(\"John Smith\"),Utf8(\"^.*Smith$\")) | regexp_match(Utf8(\"Smith Jones\"),Utf8(\"^Smith.*$\")) |",
-    "+----------------------------------------------------+-----------------------------------------------------+",
-    "| [John Smith]                                       | [Smith Jones]                                       |",
-    "+----------------------------------------------------+-----------------------------------------------------+",
+            "+----------------------------------------------------+-----------------------------------------------------+",
+            "| regexp_match(Utf8(\"John Smith\"),Utf8(\"^.*Smith$\")) | regexp_match(Utf8(\"Smith Jones\"),Utf8(\"^Smith.*$\")) |",
+            "+----------------------------------------------------+-----------------------------------------------------+",
+            "| [John Smith]                                       | [Smith Jones]                                       |",
+            "+----------------------------------------------------+-----------------------------------------------------+",
         ],
         &result
     );
@@ -267,21 +290,21 @@ pub async fn regexp() -> Result<()> {
 
     assert_batches_eq!(
         &[
-    "+---------------------------------------------------------------------------------------------------------+",
-    "| regexp_replace(examples.values,examples.patterns,examples.replacement,concat(Utf8(\"g\"),examples.flags)) |",
-    "+---------------------------------------------------------------------------------------------------------+",
-    "| bbabbbc                                                                                                 |",
-    "| B                                                                                                       |",
-    "| aec                                                                                                     |",
-    "| AbC                                                                                                     |",
-    "| aBC                                                                                                     |",
-    "| 4000                                                                                                    |",
-    "| xyz                                                                                                     |",
-    "| München                                                                                                 |",
-    "| Moscow                                                                                                  |",
-    "| Koln                                                                                                    |",
-    "| Today                                                                                                   |",
-    "+---------------------------------------------------------------------------------------------------------+",
+            "+---------------------------------------------------------------------------------------------------------+",
+            "| regexp_replace(examples.values,examples.patterns,examples.replacement,concat(Utf8(\"g\"),examples.flags)) |",
+            "+---------------------------------------------------------------------------------------------------------+",
+            "| bbabbbc                                                                                                 |",
+            "| B                                                                                                       |",
+            "| aec                                                                                                     |",
+            "| AbC                                                                                                     |",
+            "| aBC                                                                                                     |",
+            "| 4000                                                                                                    |",
+            "| xyz                                                                                                     |",
+            "| München                                                                                                 |",
+            "| Moscow                                                                                                  |",
+            "| Koln                                                                                                    |",
+            "| Today                                                                                                   |",
+            "+---------------------------------------------------------------------------------------------------------+",
         ],
         &result
     );
@@ -295,11 +318,11 @@ pub async fn regexp() -> Result<()> {
 
     assert_batches_eq!(
         &[
-    "+------------------------------------------------------------------------+",
-    "| regexp_replace(Utf8(\"foobarbaz\"),Utf8(\"b(..)\"),Utf8(\"X\\1Y\"),Utf8(\"g\")) |",
-    "+------------------------------------------------------------------------+",
-    "| fooXarYXazY                                                            |",
-    "+------------------------------------------------------------------------+",
+            "+------------------------------------------------------------------------+",
+            "| regexp_replace(Utf8(\"foobarbaz\"),Utf8(\"b(..)\"),Utf8(\"X\\1Y\"),Utf8(\"g\")) |",
+            "+------------------------------------------------------------------------+",
+            "| fooXarYXazY                                                            |",
+            "+------------------------------------------------------------------------+",
         ],
         &result
     );
diff --git a/datafusion-examples/examples/csv_json_opener.rs b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
similarity index 80%
rename from datafusion-examples/examples/csv_json_opener.rs
rename to datafusion-examples/examples/custom_data_source/csv_json_opener.rs
index ef2a3eaca0c88..7b2e321362632 100644
--- a/datafusion-examples/examples/csv_json_opener.rs
+++ b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
@@ -15,9 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::sync::Arc;
 
 use arrow::datatypes::{DataType, Field, Schema};
+use datafusion::common::config::CsvOptions;
 use datafusion::{
     assert_batches_eq,
     datasource::{
@@ -31,18 +34,15 @@ use datafusion::{
     test_util::aggr_test_schema,
 };
 
-use datafusion::datasource::{
-    physical_plan::FileScanConfigBuilder, table_schema::TableSchema,
-};
+use datafusion::datasource::physical_plan::FileScanConfigBuilder;
 use futures::StreamExt;
-use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore};
+use object_store::{ObjectStore, local::LocalFileSystem, memory::InMemory};
 
 /// This example demonstrates using the low level [`FileStream`] / [`FileOpener`] APIs to directly
 /// read data from (CSV/JSON) into Arrow RecordBatches.
 ///
 /// If you want to query data in CSV or JSON files, see the [`dataframe.rs`] and [`sql_query.rs`] examples
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn csv_json_opener() -> Result<()> {
     csv_opener().await?;
     json_opener().await?;
     Ok(())
@@ -57,23 +57,29 @@ async fn csv_opener() -> Result<()> {
 
     let path = std::path::Path::new(&path).canonicalize()?;
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        Arc::clone(&schema),
-        Arc::new(CsvSource::default()),
-    )
-    .with_projection_indices(Some(vec![12, 0]))
-    .with_limit(Some(5))
-    .with_file(PartitionedFile::new(path.display().to_string(), 10))
-    .build();
+    let options = CsvOptions {
+        has_header: Some(true),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
 
-    let config = CsvSource::new(true, b',', b'"')
+    let source = CsvSource::new(Arc::clone(&schema))
+        .with_csv_options(options)
         .with_comment(Some(b'#'))
-        .with_schema(TableSchema::from_file_schema(schema))
-        .with_batch_size(8192)
-        .with_projection(&scan_config);
+        .with_batch_size(8192);
+
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_projection_indices(Some(vec![12, 0]))?
+            .with_limit(Some(5))
+            .with_file(PartitionedFile::new(path.display().to_string(), 10))
+            .build();
 
-    let opener = config.create_file_opener(object_store, &scan_config, 0);
+    let opener =
+        scan_config
+            .file_source()
+            .create_file_opener(object_store, &scan_config, 0)?;
 
     let mut result = vec![];
     let mut stream =
@@ -125,10 +131,9 @@ async fn json_opener() -> Result<()> {
 
     let scan_config = FileScanConfigBuilder::new(
         ObjectStoreUrl::local_filesystem(),
-        schema,
-        Arc::new(JsonSource::default()),
+        Arc::new(JsonSource::new(schema)),
     )
-    .with_projection_indices(Some(vec![1, 0]))
+    .with_projection_indices(Some(vec![1, 0]))?
     .with_limit(Some(5))
     .with_file(PartitionedFile::new(path.to_string(), 10))
     .build();
diff --git a/datafusion-examples/examples/csv_sql_streaming.rs b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
similarity index 96%
rename from datafusion-examples/examples/csv_sql_streaming.rs
rename to datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
index 99264bbcb486d..554382ea9549e 100644
--- a/datafusion-examples/examples/csv_sql_streaming.rs
+++ b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::common::test_util::datafusion_test_data;
 use datafusion::error::Result;
 use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
 /// fetching results with streaming aggregation and streaming window
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn csv_sql_streaming() -> Result<()> {
     // create local execution context
     let ctx = SessionContext::new();
 
diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_data_source/custom_datasource.rs
similarity index 95%
rename from datafusion-examples/examples/custom_datasource.rs
rename to datafusion-examples/examples/custom_data_source/custom_datasource.rs
index bc865fac5a338..b276ae32cf247 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_datasource.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::any::Any;
 use std::collections::{BTreeMap, HashMap};
 use std::fmt::{self, Debug, Formatter};
@@ -22,10 +24,10 @@ use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
 use async_trait::async_trait;
-use datafusion::arrow::array::{UInt64Builder, UInt8Builder};
+use datafusion::arrow::array::{UInt8Builder, UInt64Builder};
 use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::datasource::{provider_as_source, TableProvider, TableType};
+use datafusion::datasource::{TableProvider, TableType, provider_as_source};
 use datafusion::error::Result;
 use datafusion::execution::context::TaskContext;
 use datafusion::logical_expr::LogicalPlanBuilder;
@@ -33,8 +35,8 @@ use datafusion::physical_expr::EquivalenceProperties;
 use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion::physical_plan::memory::MemoryStream;
 use datafusion::physical_plan::{
-    project_schema, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream,
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream, project_schema,
 };
 use datafusion::prelude::*;
 
@@ -42,8 +44,7 @@ use datafusion::catalog::Session;
 use tokio::time::timeout;
 
 /// This example demonstrates executing a simple query against a custom datasource
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn custom_datasource() -> Result<()> {
     // create our custom datasource and adding some users
     let db = CustomDataSource::default();
     db.populate_users();
@@ -195,6 +196,7 @@ struct CustomExec {
 }
 
 impl CustomExec {
+    #[expect(clippy::needless_pass_by_value)]
     fn new(
         projections: Option<&Vec<usize>>,
         schema: SchemaRef,
diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs
similarity index 89%
rename from datafusion-examples/examples/custom_file_casts.rs
rename to datafusion-examples/examples/custom_data_source/custom_file_casts.rs
index 4d97ecd91dc64..895b6f52b6e1e 100644
--- a/datafusion-examples/examples/custom_file_casts.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs
@@ -15,23 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::sync::Arc;
 
-use arrow::array::{record_batch, RecordBatch};
-use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
+use arrow::array::{RecordBatch, record_batch};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 
 use datafusion::assert_batches_eq;
+use datafusion::common::Result;
 use datafusion::common::not_impl_err;
 use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion::common::{Result, ScalarValue};
 use datafusion::datasource::listing::{
     ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::parquet::arrow::ArrowWriter;
-use datafusion::physical_expr::expressions::CastExpr;
 use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_expr::expressions::{CastColumnExpr, CastExpr};
 use datafusion::prelude::SessionConfig;
 use datafusion_physical_expr_adapter::{
     DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory,
@@ -44,9 +46,7 @@ use object_store::{ObjectStore, PutPayload};
 // This example enforces that casts must be strictly widening: if the file type is Int64 and the table type is Int32, it will error
 // before even reading the data.
 // Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast.
-
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn custom_file_casts() -> Result<()> {
     println!("=== Creating example data ===");
 
     // Create a logical / table schema with an Int32 column
@@ -192,18 +192,21 @@ impl PhysicalExprAdapter for CustomCastsPhysicalExprAdapter {
                     );
                 }
             }
+            if let Some(cast) = expr.as_any().downcast_ref::<CastColumnExpr>() {
+                let input_data_type =
+                    cast.expr().data_type(&self.physical_file_schema)?;
+                let output_data_type = cast.data_type(&self.physical_file_schema)?;
+                if !CastExpr::check_bigger_cast(
+                    cast.target_field().data_type(),
+                    &input_data_type,
+                ) {
+                    return not_impl_err!(
+                        "Unsupported CAST from {input_data_type} to {output_data_type}"
+                    );
+                }
+            }
             Ok(Transformed::no(expr))
         })
         .data()
     }
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        Arc::new(Self {
-            inner: self.inner.with_partition_values(partition_values),
-            ..self.clone()
-        })
-    }
 }
diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_data_source/custom_file_format.rs
similarity index 93%
rename from datafusion-examples/examples/custom_file_format.rs
rename to datafusion-examples/examples/custom_data_source/custom_file_format.rs
index 67fe642fd46ee..6817beec41188 100644
--- a/datafusion-examples/examples/custom_file_format.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_file_format.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::{any::Any, sync::Arc};
 
 use arrow::{
@@ -25,12 +27,13 @@ use datafusion::{
     catalog::Session,
     common::{GetExt, Statistics},
     datasource::{
+        MemTable,
         file_format::{
-            csv::CsvFormatFactory, file_compression_type::FileCompressionType,
-            FileFormat, FileFormatFactory,
+            FileFormat, FileFormatFactory, csv::CsvFormatFactory,
+            file_compression_type::FileCompressionType,
         },
         physical_plan::{FileScanConfig, FileSinkConfig, FileSource},
-        MemTable,
+        table_schema::TableSchema,
     },
     error::Result,
     execution::session_state::SessionStateBuilder,
@@ -47,6 +50,42 @@ use tempfile::tempdir;
 /// TSVFileFormatFactory is responsible for creating instances of TSVFileFormat.
 /// The former, once registered with the SessionState, will then be used
 /// to facilitate SQL operations on TSV files, such as `COPY TO` shown here.
+pub async fn custom_file_format() -> Result<()> {
+    // Create a new context with the default configuration
+    let mut state = SessionStateBuilder::new().with_default_features().build();
+
+    // Register the custom file format
+    let file_format = Arc::new(TSVFileFactory::new());
+    state.register_file_format(file_format, true)?;
+
+    // Create a new context with the custom file format
+    let ctx = SessionContext::new_with_state(state);
+
+    let mem_table = create_mem_table();
+    ctx.register_table("mem_table", mem_table)?;
+
+    let temp_dir = tempdir().unwrap();
+    let table_save_path = temp_dir.path().join("mem_table.tsv");
+
+    let d = ctx
+        .sql(&format!(
+            "COPY mem_table TO '{}' STORED AS TSV;",
+            table_save_path.display(),
+        ))
+        .await?;
+
+    let results = d.collect().await?;
+    println!(
+        "Number of inserted rows: {:?}",
+        (results[0]
+            .column_by_name("count")
+            .unwrap()
+            .as_primitive::<UInt64Type>()
+            .value(0))
+    );
+
+    Ok(())
+}
 
 #[derive(Debug)]
 /// Custom file format that reads and writes TSV files
@@ -128,8 +167,8 @@ impl FileFormat for TSVFileFormat {
             .await
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        self.csv_file_format.file_source()
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        self.csv_file_format.file_source(table_schema)
     }
 }
 
@@ -180,44 +219,6 @@ impl GetExt for TSVFileFactory {
     }
 }
 
-#[tokio::main]
-async fn main() -> Result<()> {
-    // Create a new context with the default configuration
-    let mut state = SessionStateBuilder::new().with_default_features().build();
-
-    // Register the custom file format
-    let file_format = Arc::new(TSVFileFactory::new());
-    state.register_file_format(file_format, true).unwrap();
-
-    // Create a new context with the custom file format
-    let ctx = SessionContext::new_with_state(state);
-
-    let mem_table = create_mem_table();
-    ctx.register_table("mem_table", mem_table).unwrap();
-
-    let temp_dir = tempdir().unwrap();
-    let table_save_path = temp_dir.path().join("mem_table.tsv");
-
-    let d = ctx
-        .sql(&format!(
-            "COPY mem_table TO '{}' STORED AS TSV;",
-            table_save_path.display(),
-        ))
-        .await?;
-
-    let results = d.collect().await?;
-    println!(
-        "Number of inserted rows: {:?}",
-        (results[0]
-            .column_by_name("count")
-            .unwrap()
-            .as_primitive::<UInt64Type>()
-            .value(0))
-    );
-
-    Ok(())
-}
-
 // create a simple mem table
 fn create_mem_table() -> Arc<MemTable> {
     let fields = vec![
diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/custom_data_source/default_column_values.rs
similarity index 63%
rename from datafusion-examples/examples/default_column_values.rs
rename to datafusion-examples/examples/custom_data_source/default_column_values.rs
index d3a7d2ec67f3c..81d74cfbecabd 100644
--- a/datafusion-examples/examples/default_column_values.rs
+++ b/datafusion-examples/examples/custom_data_source/default_column_values.rs
@@ -15,18 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
 
 use arrow::array::RecordBatch;
-use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use async_trait::async_trait;
 
 use datafusion::assert_batches_eq;
 use datafusion::catalog::memory::DataSourceExec;
 use datafusion::catalog::{Session, TableProvider};
-use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion::common::DFSchema;
 use datafusion::common::{Result, ScalarValue};
 use datafusion::datasource::listing::PartitionedFile;
@@ -37,12 +38,12 @@ use datafusion::logical_expr::utils::conjunction;
 use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType};
 use datafusion::parquet::arrow::ArrowWriter;
 use datafusion::parquet::file::properties::WriterProperties;
-use datafusion::physical_expr::expressions::{CastExpr, Column, Literal};
 use datafusion::physical_expr::PhysicalExpr;
 use datafusion::physical_plan::ExecutionPlan;
-use datafusion::prelude::{lit, SessionConfig};
+use datafusion::prelude::{SessionConfig, lit};
 use datafusion_physical_expr_adapter::{
     DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory,
+    replace_columns_with_literals,
 };
 use futures::StreamExt;
 use object_store::memory::InMemory;
@@ -52,25 +53,22 @@ use object_store::{ObjectStore, PutPayload};
 // Metadata key for storing default values in field metadata
 const DEFAULT_VALUE_METADATA_KEY: &str = "example.default_value";
 
-// Example showing how to implement custom default value handling for missing columns
-// using field metadata and PhysicalExprAdapter.
-//
-// This example demonstrates how to:
-// 1. Store default values in field metadata using a constant key
-// 2. Create a custom PhysicalExprAdapter that reads these defaults
-// 3. Inject default values for missing columns in filter predicates
-// 4. Use the DefaultPhysicalExprAdapter as a fallback for standard schema adaptation
-// 5. Wrap string default values in cast expressions for proper type conversion
-//
-// Important: PhysicalExprAdapter is specifically designed for rewriting filter predicates
-// that get pushed down to file scans. For handling missing columns in projections,
-// other mechanisms in DataFusion are used (like SchemaAdapter).
-//
-// The metadata-based approach provides a flexible way to store default values as strings
-// and cast them to the appropriate types at query time.
-
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Example showing how to implement custom default value handling for missing columns
+/// using field metadata and PhysicalExprAdapter.
+///
+/// This example demonstrates how to:
+/// 1. Store default values in field metadata using a constant key
+/// 2. Create a custom PhysicalExprAdapter that reads these defaults
+/// 3. Inject default values for missing columns in filter predicates using `replace_columns_with_literals`
+/// 4. Use the DefaultPhysicalExprAdapter as a fallback for standard schema adaptation
+/// 5. Convert string default values to proper types using `ScalarValue::cast_to()` at planning time
+///
+/// Important: PhysicalExprAdapter handles rewriting both filter predicates and projection
+/// expressions for file scans, including handling missing columns.
+///
+/// The metadata-based approach provides a flexible way to store default values as strings
+/// and cast them to the appropriate types at planning time, avoiding runtime overhead.
+pub async fn default_column_values() -> Result<()> {
     println!("=== Creating example data with missing columns and default values ===");
 
     // Create sample data where the logical schema has more columns than the physical schema
@@ -85,11 +83,10 @@ async fn main() -> Result<()> {
             .build();
 
         let mut writer =
-            ArrowWriter::try_new(&mut buf, physical_schema.clone(), Some(props))
-                .expect("creating writer");
+            ArrowWriter::try_new(&mut buf, physical_schema.clone(), Some(props))?;
 
-        writer.write(&batch).expect("Writing batch");
-        writer.close().unwrap();
+        writer.write(&batch)?;
+        writer.close()?;
         buf
     };
     let path = Path::from("example.parquet");
@@ -138,12 +135,14 @@ async fn main() -> Result<()> {
     println!("\n=== Key Insight ===");
     println!("This example demonstrates how PhysicalExprAdapter works:");
     println!("1. Physical schema only has 'id' and 'name' columns");
-    println!("2. Logical schema has 'id', 'name', 'status', and 'priority' columns with defaults");
-    println!("3. Our custom adapter intercepts filter expressions on missing columns");
-    println!("4. Default values from metadata are injected as cast expressions");
+    println!(
+        "2. Logical schema has 'id', 'name', 'status', and 'priority' columns with defaults"
+    );
+    println!(
+        "3. Our custom adapter uses replace_columns_with_literals to inject default values"
+    );
+    println!("4. Default values from metadata are cast to proper types at planning time");
     println!("5. The DefaultPhysicalExprAdapter handles other schema adaptations");
-    println!("\nNote: PhysicalExprAdapter is specifically for filter predicates.");
-    println!("For projection columns, different mechanisms handle missing columns.");
 
     Ok(())
 }
@@ -207,7 +206,7 @@ impl TableProvider for DefaultValueTableProvider {
     }
 
     fn schema(&self) -> SchemaRef {
-        self.schema.clone()
+        Arc::clone(&self.schema)
     }
 
     fn table_type(&self) -> TableType {
@@ -228,14 +227,14 @@ impl TableProvider for DefaultValueTableProvider {
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let schema = self.schema.clone();
+        let schema = Arc::clone(&self.schema);
         let df_schema = DFSchema::try_from(schema.clone())?;
         let filter = state.create_physical_expr(
             conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true)),
             &df_schema,
         )?;
 
-        let parquet_source = ParquetSource::default()
+        let parquet_source = ParquetSource::new(schema.clone())
             .with_predicate(filter)
             .with_pushdown_filters(true);
 
@@ -257,10 +256,9 @@ impl TableProvider for DefaultValueTableProvider {
 
         let file_scan_config = FileScanConfigBuilder::new(
             ObjectStoreUrl::parse("memory://")?,
-            self.schema.clone(),
             Arc::new(parquet_source),
         )
-        .with_projection_indices(projection.cloned())
+        .with_projection_indices(projection.cloned())?
         .with_limit(limit)
         .with_file_group(file_group)
         .with_expr_adapter(Some(Arc::new(DefaultValuePhysicalExprAdapterFactory) as _));
@@ -282,14 +280,15 @@ impl PhysicalExprAdapterFactory for DefaultValuePhysicalExprAdapterFactory {
         physical_file_schema: SchemaRef,
     ) -> Arc<dyn PhysicalExprAdapter> {
         let default_factory = DefaultPhysicalExprAdapterFactory;
-        let default_adapter = default_factory
-            .create(logical_file_schema.clone(), physical_file_schema.clone());
+        let default_adapter = default_factory.create(
+            Arc::clone(&logical_file_schema),
+            Arc::clone(&physical_file_schema),
+        );
 
         Arc::new(DefaultValuePhysicalExprAdapter {
             logical_file_schema,
             physical_file_schema,
             default_adapter,
-            partition_values: Vec::new(),
         })
     }
 }
@@ -301,98 +300,36 @@ struct DefaultValuePhysicalExprAdapter {
     logical_file_schema: SchemaRef,
     physical_file_schema: SchemaRef,
     default_adapter: Arc<dyn PhysicalExprAdapter>,
-    partition_values: Vec<(FieldRef, ScalarValue)>,
 }
 
 impl PhysicalExprAdapter for DefaultValuePhysicalExprAdapter {
     fn rewrite(&self, expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-        // First try our custom default value injection for missing columns
-        let rewritten = expr
-            .transform(|expr| {
-                self.inject_default_values(
-                    expr,
-                    &self.logical_file_schema,
-                    &self.physical_file_schema,
-                )
-            })
-            .data()?;
-
-        // Then apply the default adapter as a fallback to handle standard schema differences
-        // like type casting, partition column handling, etc.
-        let default_adapter = if !self.partition_values.is_empty() {
-            self.default_adapter
-                .with_partition_values(self.partition_values.clone())
-        } else {
-            self.default_adapter.clone()
-        };
-
-        default_adapter.rewrite(rewritten)
-    }
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        Arc::new(DefaultValuePhysicalExprAdapter {
-            logical_file_schema: self.logical_file_schema.clone(),
-            physical_file_schema: self.physical_file_schema.clone(),
-            default_adapter: self.default_adapter.clone(),
-            partition_values,
-        })
-    }
-}
-
-impl DefaultValuePhysicalExprAdapter {
-    fn inject_default_values(
-        &self,
-        expr: Arc<dyn PhysicalExpr>,
-        logical_file_schema: &Schema,
-        physical_file_schema: &Schema,
-    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
-        if let Some(column) = expr.as_any().downcast_ref::<Column>() {
-            let column_name = column.name();
-
-            // Check if this column exists in the physical schema
-            if physical_file_schema.index_of(column_name).is_err() {
-                // Column is missing from physical schema, check if logical schema has a default
-                if let Ok(logical_field) =
-                    logical_file_schema.field_with_name(column_name)
-                {
-                    if let Some(default_value_str) =
-                        logical_field.metadata().get(DEFAULT_VALUE_METADATA_KEY)
-                    {
-                        // Create a string literal and wrap it in a cast expression
-                        let default_literal = self.create_default_value_expr(
-                            default_value_str,
-                            logical_field.data_type(),
-                        )?;
-                        return Ok(Transformed::yes(default_literal));
-                    }
-                }
+        // Pre-compute replacements for missing columns with default values
+        let mut replacements = HashMap::new();
+        for field in self.logical_file_schema.fields() {
+            // Skip columns that exist in physical schema
+            if self.physical_file_schema.index_of(field.name()).is_ok() {
+                continue;
             }
-        }
-
-        // No transformation needed
-        Ok(Transformed::no(expr))
-    }
 
-    fn create_default_value_expr(
-        &self,
-        value_str: &str,
-        data_type: &DataType,
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        // Create a string literal with the default value
-        let string_literal =
-            Arc::new(Literal::new(ScalarValue::Utf8(Some(value_str.to_string()))));
-
-        // If the target type is already Utf8, return the string literal directly
-        if matches!(data_type, DataType::Utf8) {
-            return Ok(string_literal);
+            // Check if this missing column has a default value in metadata
+            if let Some(default_str) = field.metadata().get(DEFAULT_VALUE_METADATA_KEY) {
+                // Create a Utf8 ScalarValue from the string and cast it to the target type
+                let string_value = ScalarValue::Utf8(Some(default_str.to_string()));
+                let typed_value = string_value.cast_to(field.data_type())?;
+                replacements.insert(field.name().as_str(), typed_value);
+            }
         }
 
-        // Otherwise, wrap the string literal in a cast expression
-        let cast_expr = Arc::new(CastExpr::new(string_literal, data_type.clone(), None));
+        // Replace columns with their default literals if any
+        let rewritten = if !replacements.is_empty() {
+            let refs: HashMap<_, _> = replacements.iter().map(|(k, v)| (*k, v)).collect();
+            replace_columns_with_literals(expr, &refs)?
+        } else {
+            expr
+        };
 
-        Ok(cast_expr)
+        // Apply the default adapter as a fallback for other schema adaptations
+        self.default_adapter.rewrite(rewritten)
     }
 }
diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs
similarity index 90%
rename from datafusion-examples/examples/file_stream_provider.rs
rename to datafusion-examples/examples/custom_data_source/file_stream_provider.rs
index e6c59d57e98de..936da0a33d47b 100644
--- a/datafusion-examples/examples/file_stream_provider.rs
+++ b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs
@@ -15,6 +15,31 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
+/// Demonstrates how to use [`FileStreamProvider`] and [`StreamTable`] to stream data
+/// from a file-like source (FIFO) into DataFusion for continuous querying.
+///
+/// On non-Windows systems, this example creates a named pipe (FIFO) and
+/// writes rows into it asynchronously while DataFusion reads the data
+/// through a `FileStreamProvider`.  
+///
+/// This illustrates how to integrate dynamically updated data sources
+/// with DataFusion without needing to reload the entire dataset each time.
+///
+/// This example does not work on Windows.
+pub async fn file_stream_provider() -> datafusion::error::Result<()> {
+    #[cfg(target_os = "windows")]
+    {
+        println!("file_stream_provider example does not work on windows");
+        Ok(())
+    }
+    #[cfg(not(target_os = "windows"))]
+    {
+        non_windows::main().await
+    }
+}
+
 #[cfg(not(target_os = "windows"))]
 mod non_windows {
     use datafusion::assert_batches_eq;
@@ -22,8 +47,8 @@ mod non_windows {
     use std::fs::{File, OpenOptions};
     use std::io::Write;
     use std::path::PathBuf;
-    use std::sync::atomic::{AtomicBool, Ordering};
     use std::sync::Arc;
+    use std::sync::atomic::{AtomicBool, Ordering};
     use std::thread;
     use std::time::Duration;
 
@@ -34,9 +59,9 @@ mod non_windows {
     use tempfile::TempDir;
     use tokio::task::JoinSet;
 
-    use datafusion::common::{exec_err, Result};
-    use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
+    use datafusion::common::{Result, exec_err};
     use datafusion::datasource::TableProvider;
+    use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
     use datafusion::logical_expr::SortExpr;
     use datafusion::prelude::{SessionConfig, SessionContext};
 
@@ -186,16 +211,3 @@ mod non_windows {
         Ok(())
     }
 }
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-    #[cfg(target_os = "windows")]
-    {
-        println!("file_stream_provider example does not work on windows");
-        Ok(())
-    }
-    #[cfg(not(target_os = "windows"))]
-    {
-        non_windows::main().await
-    }
-}
diff --git a/datafusion-examples/examples/custom_data_source/main.rs b/datafusion-examples/examples/custom_data_source/main.rs
new file mode 100644
index 0000000000000..b5dcf10f5cdaa
--- /dev/null
+++ b/datafusion-examples/examples/custom_data_source/main.rs
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These examples are all related to extending or defining how DataFusion reads data
+//!
+//! These examples demonstrate how DataFusion reads data.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example custom_data_source -- [all|csv_json_opener|csv_sql_streaming|custom_datasource|custom_file_casts|custom_file_format|default_column_values|file_stream_provider]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `csv_json_opener` — use low level FileOpener APIs to read CSV/JSON into Arrow RecordBatches
+//! - `csv_sql_streaming` — build and run a streaming query plan from a SQL statement against a local CSV file
+//! - `custom_datasource` — run queries against a custom datasource (TableProvider)
+//! - `custom_file_casts` — implement custom casting rules to adapt file schemas
+//! - `custom_file_format` — write data to a custom file format
+//! - `default_column_values` — implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
+//! - `file_stream_provider` — run a query on FileStreamProvider which implements StreamProvider for reading and writing to arbitrary stream sources/sinks
+
+mod csv_json_opener;
+mod csv_sql_streaming;
+mod custom_datasource;
+mod custom_file_casts;
+mod custom_file_format;
+mod default_column_values;
+mod file_stream_provider;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    CsvJsonOpener,
+    CsvSqlStreaming,
+    CustomDatasource,
+    CustomFileCasts,
+    CustomFileFormat,
+    DefaultColumnValues,
+    FileStreamProvider,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "custom_data_source";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::CsvJsonOpener => csv_json_opener::csv_json_opener().await?,
+            ExampleKind::CsvSqlStreaming => {
+                csv_sql_streaming::csv_sql_streaming().await?
+            }
+            ExampleKind::CustomDatasource => {
+                custom_datasource::custom_datasource().await?
+            }
+            ExampleKind::CustomFileCasts => {
+                custom_file_casts::custom_file_casts().await?
+            }
+            ExampleKind::CustomFileFormat => {
+                custom_file_format::custom_file_format().await?
+            }
+            ExampleKind::DefaultColumnValues => {
+                default_column_values::default_column_values().await?
+            }
+            ExampleKind::FileStreamProvider => {
+                file_stream_provider::file_stream_provider().await?
+            }
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/data_io/catalog.rs
similarity index 98%
rename from datafusion-examples/examples/catalog.rs
rename to datafusion-examples/examples/data_io/catalog.rs
index 229867cdfc5bb..d2ddff82e32db 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/data_io/catalog.rs
@@ -15,15 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! Simple example of a catalog/schema implementation.
 use async_trait::async_trait;
 use datafusion::{
     arrow::util::pretty,
     catalog::{CatalogProvider, CatalogProviderList, SchemaProvider},
     datasource::{
-        file_format::{csv::CsvFormat, FileFormat},
-        listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
         TableProvider,
+        file_format::{FileFormat, csv::CsvFormat},
+        listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
     },
     error::Result,
     execution::context::SessionState,
@@ -34,8 +36,8 @@ use std::{any::Any, collections::HashMap, path::Path, sync::Arc};
 use std::{fs::File, io::Write};
 use tempfile::TempDir;
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Register the table into a custom catalog
+pub async fn catalog() -> Result<()> {
     env_logger::builder()
         .filter_level(log::LevelFilter::Info)
         .init();
@@ -134,12 +136,14 @@ struct DirSchemaOpts<'a> {
     dir: &'a Path,
     format: Arc<dyn FileFormat>,
 }
+
 /// Schema where every file with extension `ext` in a given `dir` is a table.
 #[derive(Debug)]
 struct DirSchema {
     ext: String,
     tables: RwLock<HashMap<String, Arc<dyn TableProvider>>>,
 }
+
 impl DirSchema {
     async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
         let DirSchemaOpts { ext, dir, format } = opts;
@@ -172,6 +176,7 @@ impl DirSchema {
             ext: ext.to_string(),
         }))
     }
+
     #[allow(unused)]
     fn name(&self) -> &str {
         &self.ext
@@ -198,6 +203,7 @@ impl SchemaProvider for DirSchema {
         let tables = self.tables.read().unwrap();
         tables.contains_key(name)
     }
+
     fn register_table(
         &self,
         name: String,
@@ -223,6 +229,7 @@ impl SchemaProvider for DirSchema {
 struct DirCatalog {
     schemas: RwLock<HashMap<String, Arc<dyn SchemaProvider>>>,
 }
+
 impl DirCatalog {
     fn new() -> Self {
         Self {
@@ -230,10 +237,12 @@ impl DirCatalog {
         }
     }
 }
+
 impl CatalogProvider for DirCatalog {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn register_schema(
         &self,
         name: &str,
@@ -260,11 +269,13 @@ impl CatalogProvider for DirCatalog {
         }
     }
 }
+
 /// Catalog lists holds multiple catalog providers. Each context has a single catalog list.
 #[derive(Debug)]
 struct CustomCatalogProviderList {
     catalogs: RwLock<HashMap<String, Arc<dyn CatalogProvider>>>,
 }
+
 impl CustomCatalogProviderList {
     fn new() -> Self {
         Self {
@@ -272,10 +283,12 @@ impl CustomCatalogProviderList {
         }
     }
 }
+
 impl CatalogProviderList for CustomCatalogProviderList {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn register_catalog(
         &self,
         name: String,
diff --git a/datafusion-examples/examples/json_shredding.rs b/datafusion-examples/examples/data_io/json_shredding.rs
similarity index 76%
rename from datafusion-examples/examples/json_shredding.rs
rename to datafusion-examples/examples/data_io/json_shredding.rs
index 5ef8b59b64200..d2ffacc9464c2 100644
--- a/datafusion-examples/examples/json_shredding.rs
+++ b/datafusion-examples/examples/data_io/json_shredding.rs
@@ -15,17 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{RecordBatch, StringArray};
-use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 
 use datafusion::assert_batches_eq;
 use datafusion::common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion::common::{assert_contains, exec_datafusion_err, Result};
+use datafusion::common::{Result, assert_contains, exec_datafusion_err};
 use datafusion::datasource::listing::{
     ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
@@ -37,7 +39,7 @@ use datafusion::logical_expr::{
 use datafusion::parquet::arrow::ArrowWriter;
 use datafusion::parquet::file::properties::WriterProperties;
 use datafusion::physical_expr::PhysicalExpr;
-use datafusion::physical_expr::{expressions, ScalarFunctionExpr};
+use datafusion::physical_expr::{ScalarFunctionExpr, expressions};
 use datafusion::prelude::SessionConfig;
 use datafusion::scalar::ScalarValue;
 use datafusion_physical_expr_adapter::{
@@ -63,8 +65,7 @@ use object_store::{ObjectStore, PutPayload};
 // 1. Push down predicates for better filtering
 // 2. Avoid expensive JSON parsing at query time
 // 3. Leverage columnar storage benefits for the materialized fields
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn json_shredding() -> Result<()> {
     println!("=== Creating example data with flat columns and underscore prefixes ===");
 
     // Create sample data with flat columns using underscore prefixes
@@ -232,7 +233,7 @@ impl ScalarUDFImpl for JsonGetStr {
             _ => {
                 return Err(exec_datafusion_err!(
                     "json_get_str first argument must be a string"
-                ))
+                ));
             }
         };
         // We expect a string array that contains JSON strings
@@ -248,7 +249,7 @@ impl ScalarUDFImpl for JsonGetStr {
             _ => {
                 return Err(exec_datafusion_err!(
                     "json_get_str second argument must be a string array"
-                ))
+                ));
             }
         };
         let values = json_array
@@ -276,14 +277,14 @@ impl PhysicalExprAdapterFactory for ShreddedJsonRewriterFactory {
         physical_file_schema: SchemaRef,
     ) -> Arc<dyn PhysicalExprAdapter> {
         let default_factory = DefaultPhysicalExprAdapterFactory;
-        let default_adapter = default_factory
-            .create(logical_file_schema.clone(), physical_file_schema.clone());
+        let default_adapter = default_factory.create(
+            Arc::clone(&logical_file_schema),
+            Arc::clone(&physical_file_schema),
+        );
 
         Arc::new(ShreddedJsonRewriter {
-            logical_file_schema,
             physical_file_schema,
             default_adapter,
-            partition_values: Vec::new(),
         })
     }
 }
@@ -292,10 +293,8 @@ impl PhysicalExprAdapterFactory for ShreddedJsonRewriterFactory {
 /// and wraps DefaultPhysicalExprAdapter for standard schema adaptation
 #[derive(Debug)]
 struct ShreddedJsonRewriter {
-    logical_file_schema: SchemaRef,
     physical_file_schema: SchemaRef,
     default_adapter: Arc<dyn PhysicalExprAdapter>,
-    partition_values: Vec<(FieldRef, ScalarValue)>,
 }
 
 impl PhysicalExprAdapter for ShreddedJsonRewriter {
@@ -306,27 +305,8 @@ impl PhysicalExprAdapter for ShreddedJsonRewriter {
             .data()?;
 
         // Then apply the default adapter as a fallback to handle standard schema differences
-        // like type casting, missing columns, and partition column handling
-        let default_adapter = if !self.partition_values.is_empty() {
-            self.default_adapter
-                .with_partition_values(self.partition_values.clone())
-        } else {
-            self.default_adapter.clone()
-        };
-
-        default_adapter.rewrite(rewritten)
-    }
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        Arc::new(ShreddedJsonRewriter {
-            logical_file_schema: self.logical_file_schema.clone(),
-            physical_file_schema: self.physical_file_schema.clone(),
-            default_adapter: self.default_adapter.clone(),
-            partition_values,
-        })
+        // like type casting and missing columns
+        self.default_adapter.rewrite(rewritten)
     }
 }
 
@@ -336,44 +316,43 @@ impl ShreddedJsonRewriter {
         expr: Arc<dyn PhysicalExpr>,
         physical_file_schema: &Schema,
     ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
-        if let Some(func) = expr.as_any().downcast_ref::<ScalarFunctionExpr>() {
-            if func.name() == "json_get_str" && func.args().len() == 2 {
-                // Get the key from the first argument
-                if let Some(literal) = func.args()[0]
+        if let Some(func) = expr.as_any().downcast_ref::<ScalarFunctionExpr>()
+            && func.name() == "json_get_str"
+            && func.args().len() == 2
+        {
+            // Get the key from the first argument
+            if let Some(literal) = func.args()[0]
+                .as_any()
+                .downcast_ref::<expressions::Literal>()
+                && let ScalarValue::Utf8(Some(field_name)) = literal.value()
+            {
+                // Get the column from the second argument
+                if let Some(column) = func.args()[1]
                     .as_any()
-                    .downcast_ref::<expressions::Literal>()
+                    .downcast_ref::<expressions::Column>()
                 {
-                    if let ScalarValue::Utf8(Some(field_name)) = literal.value() {
-                        // Get the column from the second argument
-                        if let Some(column) = func.args()[1]
-                            .as_any()
-                            .downcast_ref::<expressions::Column>()
-                        {
-                            let column_name = column.name();
-                            // Check if there's a flat column with underscore prefix
-                            let flat_column_name = format!("_{column_name}.{field_name}");
-
-                            if let Ok(flat_field_index) =
-                                physical_file_schema.index_of(&flat_column_name)
-                            {
-                                let flat_field =
-                                    physical_file_schema.field(flat_field_index);
-
-                                if flat_field.data_type() == &DataType::Utf8 {
-                                    // Replace the whole expression with a direct column reference
-                                    let new_expr = Arc::new(expressions::Column::new(
-                                        &flat_column_name,
-                                        flat_field_index,
-                                    ))
-                                        as Arc<dyn PhysicalExpr>;
-
-                                    return Ok(Transformed {
-                                        data: new_expr,
-                                        tnr: TreeNodeRecursion::Stop,
-                                        transformed: true,
-                                    });
-                                }
-                            }
+                    let column_name = column.name();
+                    // Check if there's a flat column with underscore prefix
+                    let flat_column_name = format!("_{column_name}.{field_name}");
+
+                    if let Ok(flat_field_index) =
+                        physical_file_schema.index_of(&flat_column_name)
+                    {
+                        let flat_field = physical_file_schema.field(flat_field_index);
+
+                        if flat_field.data_type() == &DataType::Utf8 {
+                            // Replace the whole expression with a direct column reference
+                            let new_expr = Arc::new(expressions::Column::new(
+                                &flat_column_name,
+                                flat_field_index,
+                            ))
+                                as Arc<dyn PhysicalExpr>;
+
+                            return Ok(Transformed {
+                                data: new_expr,
+                                tnr: TreeNodeRecursion::Stop,
+                                transformed: true,
+                            });
                         }
                     }
                 }
diff --git a/datafusion-examples/examples/data_io/main.rs b/datafusion-examples/examples/data_io/main.rs
new file mode 100644
index 0000000000000..496a588d4087a
--- /dev/null
+++ b/datafusion-examples/examples/data_io/main.rs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These examples of data formats and I/O
+//!
+//! These examples demonstrate data formats and I/O.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example data_io -- [all|catalog|json_shredding|parquet_adv_idx|parquet_emb_idx|parquet_enc_with_kms|parquet_enc|parquet_exec_visitor|parquet_idx|query_http_csv|remote_catalog]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `catalog` — register the table into a custom catalog
+//! - `json_shredding` — shows how to implement custom filter rewriting for JSON shredding
+//! - `parquet_adv_idx` — create a detailed secondary index that covers the contents of several parquet files
+//! - `parquet_emb_idx` — store a custom index inside a Parquet file and use it to speed up queries
+//! - `parquet_enc_with_kms` — read and write encrypted Parquet files using an encryption factory
+//! - `parquet_enc` — read and write encrypted Parquet files using DataFusion
+//! - `parquet_exec_visitor` — extract statistics by visiting an ExecutionPlan after execution
+//! - `parquet_idx` — create an secondary index over several parquet files and use it to speed up queries
+//! - `query_http_csv` — configure `object_store` and run a query against files via HTTP
+//! - `remote_catalog` — interfacing with a remote catalog (e.g. over a network)
+
+mod catalog;
+mod json_shredding;
+mod parquet_advanced_index;
+mod parquet_embedded_index;
+mod parquet_encrypted;
+mod parquet_encrypted_with_kms;
+mod parquet_exec_visitor;
+mod parquet_index;
+mod query_http_csv;
+mod remote_catalog;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    Catalog,
+    JsonShredding,
+    ParquetAdvIdx,
+    ParquetEmbIdx,
+    ParquetEnc,
+    ParquetEncWithKms,
+    ParquetExecVisitor,
+    ParquetIdx,
+    QueryHttpCsv,
+    RemoteCatalog,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "data_io";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::Catalog => catalog::catalog().await?,
+            ExampleKind::JsonShredding => json_shredding::json_shredding().await?,
+            ExampleKind::ParquetAdvIdx => {
+                parquet_advanced_index::parquet_advanced_index().await?
+            }
+            ExampleKind::ParquetEmbIdx => {
+                parquet_embedded_index::parquet_embedded_index().await?
+            }
+            ExampleKind::ParquetEncWithKms => {
+                parquet_encrypted_with_kms::parquet_encrypted_with_kms().await?
+            }
+            ExampleKind::ParquetEnc => parquet_encrypted::parquet_encrypted().await?,
+            ExampleKind::ParquetExecVisitor => {
+                parquet_exec_visitor::parquet_exec_visitor().await?
+            }
+            ExampleKind::ParquetIdx => parquet_index::parquet_index().await?,
+            ExampleKind::QueryHttpCsv => query_http_csv::query_http_csv().await?,
+            ExampleKind::RemoteCatalog => remote_catalog::remote_catalog().await?,
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/data_io/parquet_advanced_index.rs
similarity index 98%
rename from datafusion-examples/examples/advanced_parquet_index.rs
rename to datafusion-examples/examples/data_io/parquet_advanced_index.rs
index 371c18de354ce..3f4ebe7a92055 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/data_io/parquet_advanced_index.rs
@@ -15,40 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::any::Any;
 use std::collections::{HashMap, HashSet};
 use std::fs::File;
 use std::ops::Range;
 use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
 
 use datafusion::catalog::Session;
 use datafusion::common::{
-    internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue,
+    DFSchema, DataFusionError, Result, ScalarValue, internal_datafusion_err,
 };
+use datafusion::datasource::TableProvider;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::physical_plan::parquet::ParquetAccessPlan;
 use datafusion::datasource::physical_plan::{
     FileScanConfigBuilder, ParquetFileReaderFactory, ParquetSource,
 };
-use datafusion::datasource::TableProvider;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::logical_expr::utils::conjunction;
 use datafusion::logical_expr::{TableProviderFilterPushDown, TableType};
+use datafusion::parquet::arrow::ArrowWriter;
 use datafusion::parquet::arrow::arrow_reader::{
     ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection, RowSelector,
 };
 use datafusion::parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
-use datafusion::parquet::arrow::ArrowWriter;
 use datafusion::parquet::file::metadata::ParquetMetaData;
 use datafusion::parquet::file::properties::{EnabledStatistics, WriterProperties};
 use datafusion::parquet::schema::types::ColumnPath;
-use datafusion::physical_expr::utils::{Guarantee, LiteralGuarantee};
 use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_expr::utils::{Guarantee, LiteralGuarantee};
 use datafusion::physical_optimizer::pruning::PruningPredicate;
-use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::prelude::*;
 
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
@@ -56,8 +58,8 @@ use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion::datasource::memory::DataSourceExec;
-use futures::future::BoxFuture;
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use object_store::ObjectStore;
 use tempfile::TempDir;
 use url::Url;
@@ -155,8 +157,7 @@ use url::Url;
 ///
 /// [`ListingTable`]: datafusion::datasource::listing::ListingTable
 /// [Page Index](https://github.com/apache/parquet-format/blob/master/PageIndex.md)
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn parquet_advanced_index() -> Result<()> {
     // the object store is used to read the parquet files (in this case, it is
     // a local file system, but in a real system it could be S3, GCS, etc)
     let object_store: Arc<dyn ObjectStore> =
@@ -239,6 +240,7 @@ pub struct IndexTableProvider {
     /// if true, use row selections in addition to row group selections
     use_row_selections: AtomicBool,
 }
+
 impl IndexTableProvider {
     /// Create a new IndexTableProvider
     /// * `object_store` - the object store implementation to use for reading files
@@ -491,19 +493,18 @@ impl TableProvider for IndexTableProvider {
                 .with_file(indexed_file);
 
         let file_source = Arc::new(
-            ParquetSource::default()
+            ParquetSource::new(schema.clone())
                 // provide the predicate so the DataSourceExec can try and prune
                 // row groups internally
                 .with_predicate(predicate)
                 // provide the factory to create parquet reader without re-reading metadata
                 .with_parquet_file_reader_factory(Arc::new(reader_factory)),
         );
-        let file_scan_config =
-            FileScanConfigBuilder::new(object_store_url, schema, file_source)
-                .with_limit(limit)
-                .with_projection_indices(projection.cloned())
-                .with_file(partitioned_file)
-                .build();
+        let file_scan_config = FileScanConfigBuilder::new(object_store_url, file_source)
+            .with_limit(limit)
+            .with_projection_indices(projection.cloned())?
+            .with_file(partitioned_file)
+            .build();
 
         // Finally, put it all together into a DataSourceExec
         Ok(DataSourceExec::from_data_source(file_scan_config))
@@ -540,6 +541,7 @@ impl CachedParquetFileReaderFactory {
             metadata: HashMap::new(),
         }
     }
+
     /// Add the pre-parsed information about the file to the factor
     fn with_file(mut self, indexed_file: &IndexedFile) -> Self {
         self.metadata.insert(
diff --git a/datafusion-examples/examples/parquet_embedded_index.rs b/datafusion-examples/examples/data_io/parquet_embedded_index.rs
similarity index 95%
rename from datafusion-examples/examples/parquet_embedded_index.rs
rename to datafusion-examples/examples/data_io/parquet_embedded_index.rs
index 3cbe189147752..bcaca2ed5c85b 100644
--- a/datafusion-examples/examples/parquet_embedded_index.rs
+++ b/datafusion-examples/examples/data_io/parquet_embedded_index.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! Embedding and using a custom index in Parquet files
 //!
 //! # Background
@@ -116,11 +118,11 @@ use arrow::record_batch::RecordBatch;
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use async_trait::async_trait;
 use datafusion::catalog::{Session, TableProvider};
-use datafusion::common::{exec_err, HashMap, HashSet, Result};
+use datafusion::common::{HashMap, HashSet, Result, exec_err};
+use datafusion::datasource::TableType;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::memory::DataSourceExec;
 use datafusion::datasource::physical_plan::{FileScanConfigBuilder, ParquetSource};
-use datafusion::datasource::TableType;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::logical_expr::{Operator, TableProviderFilterPushDown};
 use datafusion::parquet::arrow::ArrowWriter;
@@ -130,12 +132,37 @@ use datafusion::parquet::file::reader::{FileReader, SerializedFileReader};
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::*;
 use datafusion::scalar::ScalarValue;
-use std::fs::{read_dir, File};
+use std::fs::{File, read_dir};
 use std::io::{Read, Seek, SeekFrom, Write};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use tempfile::TempDir;
 
+/// Store a custom index inside a Parquet file and use it to speed up queries
+pub async fn parquet_embedded_index() -> Result<()> {
+    // 1. Create temp dir and write 3 Parquet files with different category sets
+    let tmp = TempDir::new()?;
+    let dir = tmp.path();
+    write_file_with_index(&dir.join("a.parquet"), &["foo", "bar", "foo"])?;
+    write_file_with_index(&dir.join("b.parquet"), &["baz", "qux"])?;
+    write_file_with_index(&dir.join("c.parquet"), &["foo", "quux", "quux"])?;
+
+    // 2. Register our custom TableProvider
+    let field = Field::new("category", DataType::Utf8, false);
+    let schema_ref = Arc::new(Schema::new(vec![field]));
+    let provider = Arc::new(DistinctIndexTable::try_new(dir, schema_ref.clone())?);
+
+    let ctx = SessionContext::new();
+    ctx.register_table("t", provider)?;
+
+    // 3. Run a query: only files containing 'foo' get scanned. The rest are pruned.
+    // based on the distinct index.
+    let df = ctx.sql("SELECT * FROM t WHERE category = 'foo'").await?;
+    df.show().await?;
+
+    Ok(())
+}
+
 /// An index of distinct values for a single column
 ///
 /// In this example the index is a simple set of strings, but in a real
@@ -392,21 +419,15 @@ impl TableProvider for DistinctIndexTable {
         // equality analysis or write your own custom logic.
         let mut target: Option<&str> = None;
 
-        if filters.len() == 1 {
-            if let Expr::BinaryExpr(expr) = &filters[0] {
-                if expr.op == Operator::Eq {
-                    if let (
-                        Expr::Column(c),
-                        Expr::Literal(ScalarValue::Utf8(Some(v)), _),
-                    ) = (&*expr.left, &*expr.right)
-                    {
-                        if c.name == "category" {
-                            println!("Filtering for category: {v}");
-                            target = Some(v);
-                        }
-                    }
-                }
-            }
+        if filters.len() == 1
+            && let Expr::BinaryExpr(expr) = &filters[0]
+            && expr.op == Operator::Eq
+            && let (Expr::Column(c), Expr::Literal(ScalarValue::Utf8(Some(v)), _)) =
+                (&*expr.left, &*expr.right)
+            && c.name == "category"
+        {
+            println!("Filtering for category: {v}");
+            target = Some(v);
         }
         // Determine which files to scan
         let files_to_scan: Vec<_> = self
@@ -426,8 +447,10 @@ impl TableProvider for DistinctIndexTable {
 
         // Build ParquetSource to actually read the files
         let url = ObjectStoreUrl::parse("file://")?;
-        let source = Arc::new(ParquetSource::default().with_enable_page_index(true));
-        let mut builder = FileScanConfigBuilder::new(url, self.schema.clone(), source);
+        let source = Arc::new(
+            ParquetSource::new(self.schema.clone()).with_enable_page_index(true),
+        );
+        let mut builder = FileScanConfigBuilder::new(url, source);
         for file in files_to_scan {
             let path = self.dir.join(file);
             let len = std::fs::metadata(&path)?.len();
@@ -450,28 +473,3 @@ impl TableProvider for DistinctIndexTable {
         Ok(vec![TableProviderFilterPushDown::Inexact; fs.len()])
     }
 }
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    // 1. Create temp dir and write 3 Parquet files with different category sets
-    let tmp = TempDir::new()?;
-    let dir = tmp.path();
-    write_file_with_index(&dir.join("a.parquet"), &["foo", "bar", "foo"])?;
-    write_file_with_index(&dir.join("b.parquet"), &["baz", "qux"])?;
-    write_file_with_index(&dir.join("c.parquet"), &["foo", "quux", "quux"])?;
-
-    // 2. Register our custom TableProvider
-    let field = Field::new("category", DataType::Utf8, false);
-    let schema_ref = Arc::new(Schema::new(vec![field]));
-    let provider = Arc::new(DistinctIndexTable::try_new(dir, schema_ref.clone())?);
-
-    let ctx = SessionContext::new();
-    ctx.register_table("t", provider)?;
-
-    // 3. Run a query: only files containing 'foo' get scanned. The rest are pruned.
-    // based on the distinct index.
-    let df = ctx.sql("SELECT * FROM t WHERE category = 'foo'").await?;
-    df.show().await?;
-
-    Ok(())
-}
diff --git a/datafusion-examples/examples/parquet_encrypted.rs b/datafusion-examples/examples/data_io/parquet_encrypted.rs
similarity index 94%
rename from datafusion-examples/examples/parquet_encrypted.rs
rename to datafusion-examples/examples/data_io/parquet_encrypted.rs
index 690d9f2a5f140..f88ab91321e91 100644
--- a/datafusion-examples/examples/parquet_encrypted.rs
+++ b/datafusion-examples/examples/data_io/parquet_encrypted.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::common::DataFusionError;
 use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
@@ -25,8 +27,8 @@ use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use std::sync::Arc;
 use tempfile::TempDir;
 
-#[tokio::main]
-async fn main() -> datafusion::common::Result<()> {
+/// Read and write encrypted Parquet files using DataFusion
+pub async fn parquet_encrypted() -> datafusion::common::Result<()> {
     // The SessionContext is the main high level API for interacting with DataFusion
     let ctx = SessionContext::new();
 
@@ -73,7 +75,9 @@ async fn main() -> datafusion::common::Result<()> {
     let encrypted_parquet_df = ctx.read_parquet(tempfile_str, read_options).await?;
 
     // Show information from the dataframe
-    println!("\n\n===============================================================================");
+    println!(
+        "\n\n==============================================================================="
+    );
     println!("Encrypted Parquet DataFrame:");
     query_dataframe(&encrypted_parquet_df).await?;
 
diff --git a/datafusion-examples/examples/parquet_encrypted_with_kms.rs b/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs
similarity index 99%
rename from datafusion-examples/examples/parquet_encrypted_with_kms.rs
rename to datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs
index 45bfd183773a0..1a9bf56c09b35 100644
--- a/datafusion-examples/examples/parquet_encrypted_with_kms.rs
+++ b/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
 use arrow_schema::SchemaRef;
 use async_trait::async_trait;
@@ -53,8 +55,7 @@ const ENCRYPTION_FACTORY_ID: &str = "example.mock_kms_encryption";
 /// which is not a secure way to store encryption keys.
 /// For production use, it is recommended to use a key-management service (KMS) to encrypt
 /// data encryption keys.
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn parquet_encrypted_with_kms() -> Result<()> {
     let ctx = SessionContext::new();
 
     // Register an `EncryptionFactory` implementation to be used for Parquet encryption
diff --git a/datafusion-examples/examples/parquet_exec_visitor.rs b/datafusion-examples/examples/data_io/parquet_exec_visitor.rs
similarity index 83%
rename from datafusion-examples/examples/parquet_exec_visitor.rs
rename to datafusion-examples/examples/data_io/parquet_exec_visitor.rs
index 84f92d4f450e1..d38fe9e171205 100644
--- a/datafusion-examples/examples/parquet_exec_visitor.rs
+++ b/datafusion-examples/examples/data_io/parquet_exec_visitor.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::sync::Arc;
 
 use datafusion::datasource::file_format::parquet::ParquetFormat;
@@ -25,13 +27,12 @@ use datafusion::error::DataFusionError;
 use datafusion::execution::context::SessionContext;
 use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::physical_plan::{
-    execute_stream, visit_execution_plan, ExecutionPlan, ExecutionPlanVisitor,
+    ExecutionPlan, ExecutionPlanVisitor, execute_stream, visit_execution_plan,
 };
 use futures::StreamExt;
 
 /// Example of collecting metrics after execution by visiting the `ExecutionPlan`
-#[tokio::main]
-async fn main() {
+pub async fn parquet_exec_visitor() -> datafusion::common::Result<()> {
     let ctx = SessionContext::new();
 
     let test_data = datafusion::test_util::parquet_test_data();
@@ -51,8 +52,8 @@ async fn main() {
         )
         .await;
 
-    let df = ctx.sql("SELECT * FROM my_table").await.unwrap();
-    let plan = df.create_physical_plan().await.unwrap();
+    let df = ctx.sql("SELECT * FROM my_table").await?;
+    let plan = df.create_physical_plan().await?;
 
     // Create empty visitor
     let mut visitor = ParquetExecVisitor {
@@ -63,12 +64,12 @@ async fn main() {
     // Make sure you execute the plan to collect actual execution statistics.
     // For example, in this example the `file_scan_config` is known without executing
     // but the `bytes_scanned` would be None if we did not execute.
-    let mut batch_stream = execute_stream(plan.clone(), ctx.task_ctx()).unwrap();
+    let mut batch_stream = execute_stream(plan.clone(), ctx.task_ctx())?;
     while let Some(batch) = batch_stream.next().await {
         println!("Batch rows: {}", batch.unwrap().num_rows());
     }
 
-    visit_execution_plan(plan.as_ref(), &mut visitor).unwrap();
+    visit_execution_plan(plan.as_ref(), &mut visitor)?;
 
     println!(
         "ParquetExecVisitor bytes_scanned: {:?}",
@@ -78,6 +79,8 @@ async fn main() {
         "ParquetExecVisitor file_groups: {:?}",
         visitor.file_groups.unwrap()
     );
+
+    Ok(())
 }
 
 /// Define a struct with fields to hold the execution information you want to
@@ -97,18 +100,17 @@ impl ExecutionPlanVisitor for ParquetExecVisitor {
     /// or `post_visit` (visit each node after its children/inputs)
     fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
         // If needed match on a specific `ExecutionPlan` node type
-        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>() {
-            if let Some((file_config, _)) =
+        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>()
+            && let Some((file_config, _)) =
                 data_source_exec.downcast_to_file_source::<ParquetSource>()
-            {
-                self.file_groups = Some(file_config.file_groups.clone());
-
-                let metrics = match data_source_exec.metrics() {
-                    None => return Ok(true),
-                    Some(metrics) => metrics,
-                };
-                self.bytes_scanned = metrics.sum_by_name("bytes_scanned");
-            }
+        {
+            self.file_groups = Some(file_config.file_groups.clone());
+
+            let metrics = match data_source_exec.metrics() {
+                None => return Ok(true),
+                Some(metrics) => metrics,
+            };
+            self.bytes_scanned = metrics.sum_by_name("bytes_scanned");
         }
         Ok(true)
     }
diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/data_io/parquet_index.rs
similarity index 97%
rename from datafusion-examples/examples/parquet_index.rs
rename to datafusion-examples/examples/data_io/parquet_index.rs
index a1dd1f1ffd10d..e11a303f442a4 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/data_io/parquet_index.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{
     Array, ArrayRef, AsArray, BooleanArray, Int32Array, RecordBatch, StringArray,
     UInt64Array,
@@ -25,19 +27,19 @@ use async_trait::async_trait;
 use datafusion::catalog::Session;
 use datafusion::common::pruning::PruningStatistics;
 use datafusion::common::{
-    internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue,
+    DFSchema, DataFusionError, Result, ScalarValue, internal_datafusion_err,
 };
+use datafusion::datasource::TableProvider;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::memory::DataSourceExec;
 use datafusion::datasource::physical_plan::{FileScanConfigBuilder, ParquetSource};
-use datafusion::datasource::TableProvider;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::logical_expr::{
-    utils::conjunction, TableProviderFilterPushDown, TableType,
+    TableProviderFilterPushDown, TableType, utils::conjunction,
 };
 use datafusion::parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use datafusion::parquet::arrow::{
-    arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter,
+    ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder,
 };
 use datafusion::physical_expr::PhysicalExpr;
 use datafusion::physical_optimizer::pruning::PruningPredicate;
@@ -50,8 +52,8 @@ use std::fs;
 use std::fs::{DirEntry, File};
 use std::ops::Range;
 use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use tempfile::TempDir;
 use url::Url;
 
@@ -102,8 +104,7 @@ use url::Url;
 /// ```
 ///
 /// [`ListingTable`]: datafusion::datasource::listing::ListingTable
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn parquet_index() -> Result<()> {
     // Demo data has three files, each with schema
     // * file_name (string)
     // * value (int32)
@@ -242,10 +243,11 @@ impl TableProvider for IndexTableProvider {
         let files = self.index.get_files(predicate.clone())?;
 
         let object_store_url = ObjectStoreUrl::parse("file://")?;
-        let source = Arc::new(ParquetSource::default().with_predicate(predicate));
+        let source =
+            Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate));
         let mut file_scan_config_builder =
-            FileScanConfigBuilder::new(object_store_url, self.schema(), source)
-                .with_projection_indices(projection.cloned())
+            FileScanConfigBuilder::new(object_store_url, source)
+                .with_projection_indices(projection.cloned())?
                 .with_limit(limit);
 
         // Transform to the format needed to pass to DataSourceExec
@@ -509,7 +511,7 @@ impl ParquetMetadataIndexBuilder {
 
         // Get the schema of the file. A real system might have to handle the
         // case where the schema of the file is not the same as the schema of
-        // the other files e.g. using SchemaAdapter.
+        // the other files e.g. using PhysicalExprAdapterFactory.
         if self.file_schema.is_none() {
             self.file_schema = Some(reader.schema().clone());
         }
diff --git a/datafusion-examples/examples/query-http-csv.rs b/datafusion-examples/examples/data_io/query_http_csv.rs
similarity index 91%
rename from datafusion-examples/examples/query-http-csv.rs
rename to datafusion-examples/examples/data_io/query_http_csv.rs
index fa3fd2ac068df..71421e6270ccb 100644
--- a/datafusion-examples/examples/query-http-csv.rs
+++ b/datafusion-examples/examples/data_io/query_http_csv.rs
@@ -15,16 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use object_store::http::HttpBuilder;
 use std::sync::Arc;
 use url::Url;
 
-/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
-/// fetching results
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Configure `object_store` and run a query against files via HTTP
+pub async fn query_http_csv() -> Result<()> {
     // create local execution context
     let ctx = SessionContext::new();
 
diff --git a/datafusion-examples/examples/remote_catalog.rs b/datafusion-examples/examples/data_io/remote_catalog.rs
similarity index 98%
rename from datafusion-examples/examples/remote_catalog.rs
rename to datafusion-examples/examples/data_io/remote_catalog.rs
index 74575554ec0af..10ec26b1d5c05 100644
--- a/datafusion-examples/examples/remote_catalog.rs
+++ b/datafusion-examples/examples/data_io/remote_catalog.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 /// This example shows how to implement the DataFusion [`CatalogProvider`] API
 /// for catalogs that are remote (require network access) and/or offer only
 /// asynchronous APIs such as [Polaris], [Unity], and [Hive].
@@ -39,15 +41,15 @@ use datafusion::common::{assert_batches_eq, internal_datafusion_err, plan_err};
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::execution::SendableRecordBatchStream;
 use datafusion::logical_expr::{Expr, TableType};
-use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::prelude::{DataFrame, SessionContext};
 use futures::TryStreamExt;
 use std::any::Any;
 use std::sync::Arc;
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Interfacing with a remote catalog (e.g. over a network)
+pub async fn remote_catalog() -> Result<()> {
     // As always, we create a session context to interact with DataFusion
     let ctx = SessionContext::new();
 
diff --git a/datafusion-examples/examples/dataframe/cache_factory.rs b/datafusion-examples/examples/dataframe/cache_factory.rs
new file mode 100644
index 0000000000000..a6c465720c626
--- /dev/null
+++ b/datafusion-examples/examples/dataframe/cache_factory.rs
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! See `main.rs` for how to run it.
+
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::sync::Arc;
+use std::sync::RwLock;
+
+use arrow::array::RecordBatch;
+use async_trait::async_trait;
+use datafusion::catalog::memory::MemorySourceConfig;
+use datafusion::common::DFSchemaRef;
+use datafusion::error::Result;
+use datafusion::execution::SessionState;
+use datafusion::execution::SessionStateBuilder;
+use datafusion::execution::context::QueryPlanner;
+use datafusion::execution::session_state::CacheFactory;
+use datafusion::logical_expr::Extension;
+use datafusion::logical_expr::LogicalPlan;
+use datafusion::logical_expr::UserDefinedLogicalNode;
+use datafusion::logical_expr::UserDefinedLogicalNodeCore;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::collect_partitioned;
+use datafusion::physical_planner::DefaultPhysicalPlanner;
+use datafusion::physical_planner::ExtensionPlanner;
+use datafusion::physical_planner::PhysicalPlanner;
+use datafusion::prelude::ParquetReadOptions;
+use datafusion::prelude::SessionContext;
+use datafusion::prelude::*;
+use datafusion_common::HashMap;
+
+/// This example demonstrates how to leverage [CacheFactory] to implement custom caching strategies for dataframes in DataFusion.
+/// By default, [DataFrame::cache] in Datafusion is eager and creates an in-memory table. This example shows a basic alternative implementation for lazy caching.
+/// Specifically, it implements:
+/// - A [CustomCacheFactory] that creates a logical node [CacheNode] representing the cache operation.
+/// - A [CacheNodePlanner] (an [ExtensionPlanner]) that understands [CacheNode] and performs caching.
+/// - A [CacheNodeQueryPlanner] that installs [CacheNodePlanner].
+/// - A simple in-memory [CacheManager] that stores cached [RecordBatch]es. Note that the implementation for this example is very naive and only implements put, but for real production use cases cache eviction and drop should also be implemented.
+pub async fn cache_dataframe_with_custom_logic() -> Result<()> {
+    let testdata = datafusion::test_util::parquet_test_data();
+    let filename = &format!("{testdata}/alltypes_plain.parquet");
+
+    let session_state = SessionStateBuilder::new()
+        .with_cache_factory(Some(Arc::new(CustomCacheFactory {})))
+        .with_query_planner(Arc::new(CacheNodeQueryPlanner::default()))
+        .build();
+    let ctx = SessionContext::new_with_state(session_state);
+
+    // Read the parquet files and show its schema using 'describe'
+    let parquet_df = ctx
+        .read_parquet(filename, ParquetReadOptions::default())
+        .await?;
+
+    let df_cached = parquet_df
+        .select_columns(&["id", "bool_col", "timestamp_col"])?
+        .filter(col("id").gt(lit(1)))?
+        .cache()
+        .await?;
+
+    let df1 = df_cached.clone().filter(col("bool_col").is_true())?;
+    let df2 = df1.clone().sort(vec![col("id").sort(true, false)])?;
+
+    // should see log for caching only once
+    df_cached.show().await?;
+    df1.show().await?;
+    df2.show().await?;
+
+    Ok(())
+}
+
+#[derive(Debug)]
+struct CustomCacheFactory {}
+
+impl CacheFactory for CustomCacheFactory {
+    fn create(
+        &self,
+        plan: LogicalPlan,
+        _session_state: &SessionState,
+    ) -> Result<LogicalPlan> {
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(CacheNode { input: plan }),
+        }))
+    }
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Hash, Debug)]
+struct CacheNode {
+    input: LogicalPlan,
+}
+
+impl UserDefinedLogicalNodeCore for CacheNode {
+    fn name(&self) -> &str {
+        "CacheNode"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.input.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "CacheNode")
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        assert_eq!(inputs.len(), 1, "input size must be one");
+        Ok(Self {
+            input: inputs.swap_remove(0),
+        })
+    }
+}
+
+struct CacheNodePlanner {
+    cache_manager: Arc<RwLock<CacheManager>>,
+}
+
+#[async_trait]
+impl ExtensionPlanner for CacheNodePlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        if let Some(cache_node) = node.as_any().downcast_ref::<CacheNode>() {
+            assert_eq!(logical_inputs.len(), 1, "Inconsistent number of inputs");
+            assert_eq!(physical_inputs.len(), 1, "Inconsistent number of inputs");
+            if self
+                .cache_manager
+                .read()
+                .unwrap()
+                .get(&cache_node.input)
+                .is_none()
+            {
+                let ctx = session_state.task_ctx();
+                println!("caching in memory");
+                let batches =
+                    collect_partitioned(physical_inputs[0].clone(), ctx).await?;
+                self.cache_manager
+                    .write()
+                    .unwrap()
+                    .put(cache_node.input.clone(), batches);
+            } else {
+                println!("fetching directly from cache manager");
+            }
+            Ok(self
+                .cache_manager
+                .read()
+                .unwrap()
+                .get(&cache_node.input)
+                .map(|batches| {
+                    let exec: Arc<dyn ExecutionPlan> = MemorySourceConfig::try_new_exec(
+                        batches,
+                        physical_inputs[0].schema(),
+                        None,
+                    )
+                    .unwrap();
+                    exec
+                }))
+        } else {
+            Ok(None)
+        }
+    }
+}
+
+#[derive(Debug, Default)]
+struct CacheNodeQueryPlanner {
+    cache_manager: Arc<RwLock<CacheManager>>,
+}
+
+#[async_trait]
+impl QueryPlanner for CacheNodeQueryPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let physical_planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
+                CacheNodePlanner {
+                    cache_manager: Arc::clone(&self.cache_manager),
+                },
+            )]);
+        physical_planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
+
+// This naive implementation only includes put, but for real production use cases cache eviction and drop should also be implemented.
+#[derive(Debug, Default)]
+struct CacheManager {
+    cache: HashMap<LogicalPlan, Vec<Vec<RecordBatch>>>,
+}
+
+impl CacheManager {
+    pub fn put(&mut self, k: LogicalPlan, v: Vec<Vec<RecordBatch>>) {
+        self.cache.insert(k, v);
+    }
+
+    pub fn get(&self, k: &LogicalPlan) -> Option<&Vec<Vec<RecordBatch>>> {
+        self.cache.get(k)
+    }
+}
diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe/dataframe.rs
similarity index 90%
rename from datafusion-examples/examples/dataframe.rs
rename to datafusion-examples/examples/dataframe/dataframe.rs
index a5ee571a14764..94653e80c8695 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe/dataframe.rs
@@ -15,22 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray, StringViewArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::catalog::MemTable;
+use datafusion::common::ScalarValue;
 use datafusion::common::config::CsvOptions;
 use datafusion::common::parsers::CompressionTypeVariant;
-use datafusion::common::DataFusionError;
-use datafusion::common::ScalarValue;
 use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::error::Result;
 use datafusion::functions_aggregate::average::avg;
 use datafusion::functions_aggregate::min_max::max;
 use datafusion::prelude::*;
-use std::fs::File;
+use std::fs::{File, create_dir_all};
 use std::io::Write;
 use std::sync::Arc;
-use tempfile::tempdir;
+use tempfile::{TempDir, tempdir};
 
 /// This example demonstrates using DataFusion's DataFrame API
 ///
@@ -39,6 +40,7 @@ use tempfile::tempdir;
 /// * [read_parquet]: execute queries against parquet files
 /// * [read_csv]: execute queries against csv files
 /// * [read_memory]: execute queries against in-memory arrow data
+/// * [read_memory_macro]: execute queries against in-memory arrow data using macro
 ///
 /// # Writing out to local storage
 ///
@@ -53,12 +55,7 @@ use tempfile::tempdir;
 /// * [where_scalar_subquery]: execute a scalar subquery
 /// * [where_in_subquery]: execute a subquery with an IN clause
 /// * [where_exist_subquery]: execute a subquery with an EXISTS clause
-///
-/// # Querying data
-///
-/// * [query_to_date]: execute queries against parquet files
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn dataframe_example() -> Result<()> {
     env_logger::init();
     // The SessionContext is the main high level API for interacting with DataFusion
     let ctx = SessionContext::new();
@@ -199,7 +196,7 @@ async fn read_memory_macro() -> Result<()> {
 /// 2. Write out a DataFrame to a parquet file
 /// 3. Write out a DataFrame to a csv file
 /// 4. Write out a DataFrame to a json file
-async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionError> {
+async fn write_out(ctx: &SessionContext) -> Result<()> {
     let array = StringViewArray::from(vec!["a", "b", "c"]);
     let schema = Arc::new(Schema::new(vec![Field::new(
         "tablecol1",
@@ -211,15 +208,26 @@ async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionEr
     ctx.register_table("initial_data", Arc::new(mem_table))?;
     let df = ctx.table("initial_data").await?;
 
-    ctx.sql(
-        "create external table
-    test(tablecol1 varchar)
-    stored as parquet
-    location './datafusion-examples/test_table/'",
-    )
-    .await?
-    .collect()
-    .await?;
+    // Create a single temp root with subdirectories
+    let tmp_root = TempDir::new()?;
+    let examples_root = tmp_root.path().join("datafusion-examples");
+    create_dir_all(&examples_root)?;
+    let table_dir = examples_root.join("test_table");
+    let parquet_dir = examples_root.join("test_parquet");
+    let csv_dir = examples_root.join("test_csv");
+    let json_dir = examples_root.join("test_json");
+    create_dir_all(&table_dir)?;
+    create_dir_all(&parquet_dir)?;
+    create_dir_all(&csv_dir)?;
+    create_dir_all(&json_dir)?;
+
+    let create_sql = format!(
+        "CREATE EXTERNAL TABLE test(tablecol1 varchar)
+         STORED AS parquet
+         LOCATION '{}'",
+        table_dir.display()
+    );
+    ctx.sql(&create_sql).await?.collect().await?;
 
     // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
     // The behavior of write_table depends on the TableProvider's implementation
@@ -230,7 +238,7 @@ async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionEr
 
     df.clone()
         .write_parquet(
-            "./datafusion-examples/test_parquet/",
+            parquet_dir.to_str().unwrap(),
             DataFrameWriteOptions::new(),
             None,
         )
@@ -238,7 +246,7 @@ async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionEr
 
     df.clone()
         .write_csv(
-            "./datafusion-examples/test_csv/",
+            csv_dir.to_str().unwrap(),
             // DataFrameWriteOptions contains options which control how data is written
             // such as compression codec
             DataFrameWriteOptions::new(),
@@ -248,7 +256,7 @@ async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionEr
 
     df.clone()
         .write_json(
-            "./datafusion-examples/test_json/",
+            json_dir.to_str().unwrap(),
             DataFrameWriteOptions::new(),
             None,
         )
diff --git a/datafusion-examples/examples/deserialize_to_struct.rs b/datafusion-examples/examples/dataframe/deserialize_to_struct.rs
similarity index 98%
rename from datafusion-examples/examples/deserialize_to_struct.rs
rename to datafusion-examples/examples/dataframe/deserialize_to_struct.rs
index d6655b3b654f9..e19d45554131a 100644
--- a/datafusion-examples/examples/deserialize_to_struct.rs
+++ b/datafusion-examples/examples/dataframe/deserialize_to_struct.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{AsArray, PrimitiveArray};
 use arrow::datatypes::{Float64Type, Int32Type};
 use datafusion::common::assert_batches_eq;
@@ -29,8 +31,7 @@ use futures::StreamExt;
 /// as [ArrayRef]
 ///
 /// [ArrayRef]: arrow::array::ArrayRef
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn deserialize_to_struct() -> Result<()> {
     // Run a query that returns two columns of data
     let ctx = SessionContext::new();
     let testdata = datafusion::test_util::parquet_test_data();
diff --git a/datafusion-examples/examples/dataframe/main.rs b/datafusion-examples/examples/dataframe/main.rs
new file mode 100644
index 0000000000000..9a2604e97136d
--- /dev/null
+++ b/datafusion-examples/examples/dataframe/main.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These are core DataFrame API usage
+//!
+//! These examples demonstrate core DataFrame API usage.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example dataframe -- [all|dataframe|deserialize_to_struct]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `dataframe` — run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries
+//! - `deserialize_to_struct` — convert query results (Arrow ArrayRefs) into Rust structs
+
+mod cache_factory;
+mod dataframe;
+mod deserialize_to_struct;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    Dataframe,
+    DeserializeToStruct,
+    CacheFactory,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "dataframe";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::Dataframe => {
+                dataframe::dataframe_example().await?;
+            }
+            ExampleKind::DeserializeToStruct => {
+                deserialize_to_struct::deserialize_to_struct().await?;
+            }
+            ExampleKind::CacheFactory => {
+                cache_factory::cache_dataframe_with_custom_logic().await?;
+            }
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/execution_monitoring/main.rs b/datafusion-examples/examples/execution_monitoring/main.rs
new file mode 100644
index 0000000000000..3043a80363086
--- /dev/null
+++ b/datafusion-examples/examples/execution_monitoring/main.rs
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These examples of memory and performance management
+//!
+//! These examples demonstrate memory and performance management.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example execution_monitoring -- [all|mem_pool_exec_plan|mem_pool_tracking|tracing]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `mem_pool_exec_plan` — shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
+//! - `mem_pool_tracking` — demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
+//! - `tracing` — demonstrates the tracing injection feature for the DataFusion runtime
+
+mod memory_pool_execution_plan;
+mod memory_pool_tracking;
+mod tracing;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    MemPoolExecPlan,
+    MemPoolTracking,
+    Tracing,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "execution_monitoring";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::MemPoolExecPlan => {
+                memory_pool_execution_plan::memory_pool_execution_plan().await?
+            }
+            ExampleKind::MemPoolTracking => {
+                memory_pool_tracking::mem_pool_tracking().await?
+            }
+            ExampleKind::Tracing => tracing::tracing().await?,
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/memory_pool_execution_plan.rs b/datafusion-examples/examples/execution_monitoring/memory_pool_execution_plan.rs
similarity index 97%
rename from datafusion-examples/examples/memory_pool_execution_plan.rs
rename to datafusion-examples/examples/execution_monitoring/memory_pool_execution_plan.rs
index 3258cde17625f..48475acbb1542 100644
--- a/datafusion-examples/examples/memory_pool_execution_plan.rs
+++ b/datafusion-examples/examples/execution_monitoring/memory_pool_execution_plan.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example demonstrates how to implement custom ExecutionPlans that properly
 //! use memory tracking through TrackConsumersPool.
 //!
@@ -28,7 +30,7 @@ use arrow::record_batch::RecordBatch;
 use arrow_schema::SchemaRef;
 use datafusion::common::record_batch;
 use datafusion::common::{exec_datafusion_err, internal_err};
-use datafusion::datasource::{memory::MemTable, DefaultTableSource};
+use datafusion::datasource::{DefaultTableSource, memory::MemTable};
 use datafusion::error::Result;
 use datafusion::execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
@@ -44,8 +46,8 @@ use std::any::Any;
 use std::fmt;
 use std::sync::Arc;
 
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
+pub async fn memory_pool_execution_plan() -> Result<()> {
     println!("=== DataFusion ExecutionPlan Memory Tracking Example ===\n");
 
     // Set up a runtime with memory tracking
@@ -140,6 +142,7 @@ impl ExternalBatchBufferer {
         }
     }
 
+    #[expect(clippy::needless_pass_by_value)]
     fn add_batch(&mut self, batch_data: Vec<u8>) -> Result<()> {
         let additional_memory = batch_data.len();
 
diff --git a/datafusion-examples/examples/memory_pool_tracking.rs b/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
similarity index 95%
rename from datafusion-examples/examples/memory_pool_tracking.rs
rename to datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
index d5823b1173ab3..8d6e5dd7e444d 100644
--- a/datafusion-examples/examples/memory_pool_tracking.rs
+++ b/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example demonstrates how to use TrackConsumersPool for memory tracking and debugging.
 //!
 //! The TrackConsumersPool provides enhanced error messages that show the top memory consumers
@@ -24,11 +26,12 @@
 //!
 //! * [`automatic_usage_example`]: Shows how to use RuntimeEnvBuilder to automatically enable memory tracking
 
+use datafusion::error::Result;
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
 use datafusion::prelude::*;
 
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
+pub async fn mem_pool_tracking() -> Result<()> {
     println!("=== DataFusion Memory Pool Tracking Example ===\n");
 
     // Example 1: Automatic Usage with RuntimeEnvBuilder
@@ -41,7 +44,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 ///
 /// This shows the recommended way to use TrackConsumersPool through RuntimeEnvBuilder,
 /// which automatically creates a TrackConsumersPool with sensible defaults.
-async fn automatic_usage_example() -> datafusion::error::Result<()> {
+async fn automatic_usage_example() -> Result<()> {
     println!("Example 1: Automatic Usage with RuntimeEnvBuilder");
     println!("------------------------------------------------");
 
diff --git a/datafusion-examples/examples/tracing.rs b/datafusion-examples/examples/execution_monitoring/tracing.rs
similarity index 92%
rename from datafusion-examples/examples/tracing.rs
rename to datafusion-examples/examples/execution_monitoring/tracing.rs
index 334ee0f4e5686..5fa759f2d541d 100644
--- a/datafusion-examples/examples/tracing.rs
+++ b/datafusion-examples/examples/execution_monitoring/tracing.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example demonstrates the tracing injection feature for the DataFusion runtime.
 //! Tasks spawned on new threads behave differently depending on whether a tracer is injected.
 //! The log output clearly distinguishes the two cases.
@@ -49,20 +51,20 @@
 //! 10:29:40.809  INFO                 main ThreadId(01) tracing: ***** WITH tracer: Non-main tasks DID inherit the `run_instrumented_query` span *****
 //! ```
 
-use datafusion::common::runtime::{set_join_set_tracer, JoinSetTracer};
+use datafusion::common::runtime::{JoinSetTracer, set_join_set_tracer};
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use datafusion::test_util::parquet_test_data;
-use futures::future::BoxFuture;
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use std::any::Any;
 use std::sync::Arc;
-use tracing::{info, instrument, Instrument, Level, Span};
+use tracing::{Instrument, Level, Span, info, instrument};
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Demonstrates the tracing injection feature for the DataFusion runtime
+pub async fn tracing() -> Result<()> {
     // Initialize tracing subscriber with thread info.
     tracing_subscriber::fmt()
         .with_thread_ids(true)
@@ -73,7 +75,9 @@ async fn main() -> Result<()> {
     // Run query WITHOUT tracer injection.
     info!("***** RUNNING WITHOUT INJECTED TRACER *****");
     run_instrumented_query().await?;
-    info!("***** WITHOUT tracer: `tokio-runtime-worker` tasks did NOT inherit the `run_instrumented_query` span *****");
+    info!(
+        "***** WITHOUT tracer: `tokio-runtime-worker` tasks did NOT inherit the `run_instrumented_query` span *****"
+    );
 
     // Inject custom tracer so tasks run in the current span.
     info!("Injecting custom tracer...");
@@ -82,7 +86,9 @@ async fn main() -> Result<()> {
     // Run query WITH tracer injection.
     info!("***** RUNNING WITH INJECTED TRACER *****");
     run_instrumented_query().await?;
-    info!("***** WITH tracer: `tokio-runtime-worker` tasks DID inherit the `run_instrumented_query` span *****");
+    info!(
+        "***** WITH tracer: `tokio-runtime-worker` tasks DID inherit the `run_instrumented_query` span *****"
+    );
 
     Ok(())
 }
diff --git a/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs b/datafusion-examples/examples/external_dependency/dataframe_to_s3.rs
similarity index 87%
rename from datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
rename to datafusion-examples/examples/external_dependency/dataframe_to_s3.rs
index e75ba5dd5328a..fdb8a3c9c051a 100644
--- a/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
+++ b/datafusion-examples/examples/external_dependency/dataframe_to_s3.rs
@@ -15,12 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::env;
 use std::sync::Arc;
 
 use datafusion::dataframe::DataFrameWriteOptions;
-use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
 use datafusion::error::Result;
 use datafusion::prelude::*;
@@ -28,14 +30,18 @@ use datafusion::prelude::*;
 use object_store::aws::AmazonS3Builder;
 use url::Url;
 
-/// This example demonstrates querying data from AmazonS3 and writing
-/// the result of a query back to AmazonS3
-#[tokio::main]
-async fn main() -> Result<()> {
+/// This example demonstrates querying data from Amazon S3 and writing
+/// the result of a query back to Amazon S3.
+///
+/// The following environment variables must be defined:
+///
+/// - AWS_ACCESS_KEY_ID
+/// - AWS_SECRET_ACCESS_KEY
+pub async fn dataframe_to_s3() -> Result<()> {
     // create local execution context
     let ctx = SessionContext::new();
 
-    //enter region and bucket to which your credentials have GET and PUT access
+    // enter region and bucket to which your credentials have GET and PUT access
     let region = "<bucket-region-here>";
     let bucket_name = "<bucket-name-here>";
 
@@ -66,13 +72,13 @@ async fn main() -> Result<()> {
         .write_parquet(&out_path, DataFrameWriteOptions::new(), None)
         .await?;
 
-    //write as JSON to s3
+    // write as JSON to s3
     let json_out = format!("s3://{bucket_name}/json_out");
     df.clone()
         .write_json(&json_out, DataFrameWriteOptions::new(), None)
         .await?;
 
-    //write as csv to s3
+    // write as csv to s3
     let csv_out = format!("s3://{bucket_name}/csv_out");
     df.write_csv(&csv_out, DataFrameWriteOptions::new(), None)
         .await?;
diff --git a/datafusion-examples/examples/external_dependency/main.rs b/datafusion-examples/examples/external_dependency/main.rs
new file mode 100644
index 0000000000000..abcba61421bdb
--- /dev/null
+++ b/datafusion-examples/examples/external_dependency/main.rs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These are using data from Amazon S3 examples
+//!
+//! These examples demonstrate how to work with data from Amazon S3.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example external_dependency -- [all|dataframe_to_s3|query_aws_s3]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `dataframe_to_s3` — run a query using a DataFrame against a parquet file from AWS S3 and writing back to AWS S3
+//! - `query_aws_s3` — configure `object_store` and run a query against files stored in AWS S3
+
+mod dataframe_to_s3;
+mod query_aws_s3;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    DataframeToS3,
+    QueryAwsS3,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "external_dependency";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::DataframeToS3 => dataframe_to_s3::dataframe_to_s3().await?,
+            ExampleKind::QueryAwsS3 => query_aws_s3::query_aws_s3().await?,
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/external_dependency/query-aws-s3.rs b/datafusion-examples/examples/external_dependency/query_aws_s3.rs
similarity index 90%
rename from datafusion-examples/examples/external_dependency/query-aws-s3.rs
rename to datafusion-examples/examples/external_dependency/query_aws_s3.rs
index cd0b4562d5f2d..63507bb3eed11 100644
--- a/datafusion-examples/examples/external_dependency/query-aws-s3.rs
+++ b/datafusion-examples/examples/external_dependency/query_aws_s3.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use object_store::aws::AmazonS3Builder;
@@ -22,14 +24,13 @@ use std::env;
 use std::sync::Arc;
 use url::Url;
 
-/// This example demonstrates querying data in an S3 bucket.
+/// This example demonstrates querying data in a public S3 bucket
+/// (the NYC TLC open dataset: `s3://nyc-tlc`).
 ///
 /// The following environment variables must be defined:
-///
-/// - AWS_ACCESS_KEY_ID
-/// - AWS_SECRET_ACCESS_KEY
-#[tokio::main]
-async fn main() -> Result<()> {
+/// - `AWS_ACCESS_KEY_ID`
+/// - `AWS_SECRET_ACCESS_KEY`
+pub async fn query_aws_s3() -> Result<()> {
     let ctx = SessionContext::new();
 
     // the region must be set to the region where the bucket exists until the following
diff --git a/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs b/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs
index a83f15926f054..eb217ef9e4832 100644
--- a/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs
+++ b/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs
@@ -21,6 +21,7 @@ use abi_stable::{export_root_module, prefix_type::PrefixTypeTrait};
 use arrow::array::RecordBatch;
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::{common::record_batch, datasource::MemTable};
+use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
 use datafusion_ffi::table_provider::FFI_TableProvider;
 use ffi_module_interface::{TableProviderModule, TableProviderModuleRef};
 
@@ -34,7 +35,9 @@ fn create_record_batch(start_value: i32, num_values: usize) -> RecordBatch {
 
 /// Here we only wish to create a simple table provider as an example.
 /// We create an in-memory table and convert it to it's FFI counterpart.
-extern "C" fn construct_simple_table_provider() -> FFI_TableProvider {
+extern "C" fn construct_simple_table_provider(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_TableProvider {
     let schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Int32, true),
         Field::new("b", DataType::Float64, true),
@@ -50,7 +53,7 @@ extern "C" fn construct_simple_table_provider() -> FFI_TableProvider {
 
     let table_provider = MemTable::try_new(schema, vec![batches]).unwrap();
 
-    FFI_TableProvider::new(Arc::new(table_provider), true, None)
+    FFI_TableProvider::new_with_ffi_codec(Arc::new(table_provider), true, None, codec)
 }
 
 #[export_root_module]
diff --git a/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml b/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml
index 612a219324763..f393b2971e454 100644
--- a/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml
+++ b/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "ffi_module_interface"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [dependencies]
diff --git a/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs b/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs
index 88690e9297135..3b2b9e1871dae 100644
--- a/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs
+++ b/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs
@@ -16,12 +16,12 @@
 // under the License.
 
 use abi_stable::{
-    declare_root_module_statics,
+    StableAbi, declare_root_module_statics,
     library::{LibraryError, RootModule},
     package_version_strings,
     sabi_types::VersionStrings,
-    StableAbi,
 };
+use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
 use datafusion_ffi::table_provider::FFI_TableProvider;
 
 #[repr(C)]
@@ -34,7 +34,8 @@ use datafusion_ffi::table_provider::FFI_TableProvider;
 /// how a user may wish to separate these concerns.
 pub struct TableProviderModule {
     /// Constructs the table provider
-    pub create_table: extern "C" fn() -> FFI_TableProvider,
+    pub create_table:
+        extern "C" fn(codec: FFI_LogicalExtensionCodec) -> FFI_TableProvider,
 }
 
 impl RootModule for TableProviderModuleRef {
diff --git a/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml b/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml
index 028a366aab1c0..823c9afddee2a 100644
--- a/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml
+++ b/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "ffi_module_loader"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [dependencies]
diff --git a/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs b/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs
index 6e376ca866e8f..8ce5b156df3b1 100644
--- a/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs
+++ b/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs
@@ -22,8 +22,10 @@ use datafusion::{
     prelude::SessionContext,
 };
 
-use abi_stable::library::{development_utils::compute_library_path, RootModule};
-use datafusion_ffi::table_provider::ForeignTableProvider;
+use abi_stable::library::{RootModule, development_utils::compute_library_path};
+use datafusion::datasource::TableProvider;
+use datafusion::execution::TaskContextProvider;
+use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
 use ffi_module_interface::TableProviderModuleRef;
 
 #[tokio::main]
@@ -39,6 +41,11 @@ async fn main() -> Result<()> {
         TableProviderModuleRef::load_from_directory(&library_path)
             .map_err(|e| DataFusionError::External(Box::new(e)))?;
 
+    let ctx = Arc::new(SessionContext::new());
+    let codec = FFI_LogicalExtensionCodec::new_default(
+        &(Arc::clone(&ctx) as Arc<dyn TaskContextProvider>),
+    );
+
     // By calling the code below, the table provided will be created within
     // the module's code.
     let ffi_table_provider =
@@ -46,16 +53,14 @@ async fn main() -> Result<()> {
             .create_table()
             .ok_or(DataFusionError::NotImplemented(
                 "External table provider failed to implement create_table".to_string(),
-            ))?();
+            ))?(codec);
 
     // In order to access the table provider within this executable, we need to
-    // turn it into a `ForeignTableProvider`.
-    let foreign_table_provider: ForeignTableProvider = (&ffi_table_provider).into();
-
-    let ctx = SessionContext::new();
+    // turn it into a `TableProvider`.
+    let foreign_table_provider: Arc<dyn TableProvider> = (&ffi_table_provider).into();
 
     // Display the data to show the full cycle works.
-    ctx.register_table("external_table", Arc::new(foreign_table_provider))?;
+    ctx.register_table("external_table", foreign_table_provider)?;
     let df = ctx.table("external_table").await?;
     df.show().await?;
 
diff --git a/datafusion-examples/examples/flight/client.rs b/datafusion-examples/examples/flight/client.rs
index 031beea47d57a..484576975a6f2 100644
--- a/datafusion-examples/examples/flight/client.rs
+++ b/datafusion-examples/examples/flight/client.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::collections::HashMap;
 use std::sync::Arc;
 use tonic::transport::Endpoint;
diff --git a/datafusion-examples/examples/flight/main.rs b/datafusion-examples/examples/flight/main.rs
index a448789b353b9..25965a3011c60 100644
--- a/datafusion-examples/examples/flight/main.rs
+++ b/datafusion-examples/examples/flight/main.rs
@@ -19,7 +19,16 @@
 //!
 //! These examples demonstrate Arrow Flight usage.
 //!
+//! ## Usage
+//! ```bash
+//! cargo run --example flight -- [all|client|server|sql_server]
+//! ```
+//!
 //! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//!   Note: The Flight server must be started in a separate process
+//!   before running the `client` example. Therefore, running `all` will
+//!   not produce a full server+client workflow automatically.
 //! - `client` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol
 //! - `server` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol
 //! - `sql_server` — run DataFusion as a standalone process and execute SQL queries from JDBC clients
@@ -28,46 +37,43 @@ mod client;
 mod server;
 mod sql_server;
 
-use std::str::FromStr;
-
 use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
 
+/// The `all` option cannot run all examples end-to-end because the
+/// `server` example must run in a separate process before the `client`
+/// example can connect.  
+/// Therefore, `all` only iterates over individually runnable examples.
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
 enum ExampleKind {
+    All,
     Client,
     Server,
     SqlServer,
 }
 
-impl AsRef<str> for ExampleKind {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::Client => "client",
-            Self::Server => "server",
-            Self::SqlServer => "sql_server",
-        }
-    }
-}
-
-impl FromStr for ExampleKind {
-    type Err = DataFusionError;
-
-    fn from_str(s: &str) -> Result<Self> {
-        match s {
-            "client" => Ok(Self::Client),
-            "server" => Ok(Self::Server),
-            "sql_server" => Ok(Self::SqlServer),
-            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
-        }
-    }
-}
-
 impl ExampleKind {
-    const ALL: [Self; 3] = [Self::Client, Self::Server, Self::SqlServer];
-
     const EXAMPLE_NAME: &str = "flight";
 
-    fn variants() -> Vec<&'static str> {
-        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<(), Box<dyn std::error::Error>> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::Client => client::client().await?,
+            ExampleKind::Server => server::server().await?,
+            ExampleKind::SqlServer => sql_server::sql_server().await?,
+        }
+        Ok(())
     }
 }
 
@@ -76,19 +82,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let usage = format!(
         "Usage: cargo run --example {} -- [{}]",
         ExampleKind::EXAMPLE_NAME,
-        ExampleKind::variants().join("|")
+        ExampleKind::VARIANTS.join("|")
     );
 
-    let arg = std::env::args().nth(1).ok_or_else(|| {
-        eprintln!("{usage}");
-        DataFusionError::Execution("Missing argument".to_string())
-    })?;
-
-    match arg.parse::<ExampleKind>()? {
-        ExampleKind::Client => client::client().await?,
-        ExampleKind::Server => server::server().await?,
-        ExampleKind::SqlServer => sql_server::sql_server().await?,
-    }
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
 
-    Ok(())
+    example.run().await
 }
diff --git a/datafusion-examples/examples/flight/server.rs b/datafusion-examples/examples/flight/server.rs
index dc75287cf2e2b..aad82e28b15ef 100644
--- a/datafusion-examples/examples/flight/server.rs
+++ b/datafusion-examples/examples/flight/server.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator};
 use std::sync::Arc;
 
@@ -29,9 +31,9 @@ use tonic::{Request, Response, Status, Streaming};
 use datafusion::prelude::*;
 
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
     Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
     HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
 };
 
 #[derive(Clone)]
@@ -187,6 +189,7 @@ impl FlightService for FlightServiceImpl {
     }
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status {
     Status::internal(format!("{e:?}"))
 }
diff --git a/datafusion-examples/examples/flight/sql_server.rs b/datafusion-examples/examples/flight/sql_server.rs
index d86860f9d4364..435e05ffc0cec 100644
--- a/datafusion-examples/examples/flight/sql_server.rs
+++ b/datafusion-examples/examples/flight/sql_server.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{ArrayRef, StringArray};
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::ipc::writer::IpcWriteOptions;
@@ -414,7 +416,9 @@ impl FlightSqlService for FlightSqlServiceImpl {
     ) -> Result<(), Status> {
         let handle = std::str::from_utf8(&handle.prepared_statement_handle);
         if let Ok(handle) = handle {
-            info!("do_action_close_prepared_statement: removing plan and results for {handle}");
+            info!(
+                "do_action_close_prepared_statement: removing plan and results for {handle}"
+            );
             let _ = self.remove_plan(handle);
             let _ = self.remove_result(handle);
         }
diff --git a/datafusion-examples/examples/composed_extension_codec.rs b/datafusion-examples/examples/proto/composed_extension_codec.rs
similarity index 95%
rename from datafusion-examples/examples/composed_extension_codec.rs
rename to datafusion-examples/examples/proto/composed_extension_codec.rs
index 57f2c370413aa..f3910d461b6a8 100644
--- a/datafusion-examples/examples/composed_extension_codec.rs
+++ b/datafusion-examples/examples/proto/composed_extension_codec.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example demonstrates how to compose multiple PhysicalExtensionCodecs
 //!
 //! This can be helpful when an Execution plan tree has different nodes from different crates
@@ -34,8 +36,8 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use datafusion::common::internal_err;
 use datafusion::common::Result;
+use datafusion::common::internal_err;
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::{DisplayAs, ExecutionPlan};
 use datafusion::prelude::SessionContext;
@@ -44,8 +46,8 @@ use datafusion_proto::physical_plan::{
 };
 use datafusion_proto::protobuf;
 
-#[tokio::main]
-async fn main() {
+/// Example of using multiple extension codecs for serialization / deserialization
+pub async fn composed_extension_codec() -> Result<()> {
     // build execution plan that has both types of nodes
     //
     // Note each node requires a different `PhysicalExtensionCodec` to decode
@@ -66,16 +68,16 @@ async fn main() {
         protobuf::PhysicalPlanNode::try_from_physical_plan(
             exec_plan.clone(),
             &composed_codec,
-        )
-        .expect("to proto");
+        )?;
 
     // deserialize proto back to execution plan
-    let result_exec_plan: Arc<dyn ExecutionPlan> = proto
-        .try_into_physical_plan(&ctx.task_ctx(), &composed_codec)
-        .expect("from proto");
+    let result_exec_plan: Arc<dyn ExecutionPlan> =
+        proto.try_into_physical_plan(&ctx.task_ctx(), &composed_codec)?;
 
     // assert that the original and deserialized execution plans are equal
     assert_eq!(format!("{exec_plan:?}"), format!("{result_exec_plan:?}"));
+
+    Ok(())
 }
 
 /// This example has two types of nodes: `ParentExec` and `ChildExec` which can only
diff --git a/datafusion-examples/examples/proto/main.rs b/datafusion-examples/examples/proto/main.rs
new file mode 100644
index 0000000000000..9e4ae728206c4
--- /dev/null
+++ b/datafusion-examples/examples/proto/main.rs
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # Examples demonstrating DataFusion's plan serialization via the `datafusion-proto` crate
+//!
+//! These examples show how to use multiple extension codecs for serialization / deserialization.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example proto -- [all|composed_extension_codec]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `composed_extension_codec` — example of using multiple extension codecs for serialization / deserialization
+
+mod composed_extension_codec;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    ComposedExtensionCodec,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "proto";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::ComposedExtensionCodec => {
+                composed_extension_codec::composed_extension_codec().await?
+            }
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/analyzer_rule.rs b/datafusion-examples/examples/query_planning/analyzer_rule.rs
similarity index 97%
rename from datafusion-examples/examples/analyzer_rule.rs
rename to datafusion-examples/examples/query_planning/analyzer_rule.rs
index cb81cd167a88b..a86f5cdd2a5e3 100644
--- a/datafusion-examples/examples/analyzer_rule.rs
+++ b/datafusion-examples/examples/query_planning/analyzer_rule.rs
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
+use datafusion::common::Result;
 use datafusion::common::config::ConfigOptions;
 use datafusion::common::tree_node::{Transformed, TreeNode};
-use datafusion::common::Result;
-use datafusion::logical_expr::{col, lit, Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder, col, lit};
 use datafusion::optimizer::analyzer::AnalyzerRule;
 use datafusion::prelude::SessionContext;
 use std::sync::{Arc, Mutex};
@@ -35,8 +37,7 @@ use std::sync::{Arc, Mutex};
 /// level access control scheme by introducing a filter to the query.
 ///
 /// See [optimizer_rule.rs] for an example of a optimizer rule
-#[tokio::main]
-pub async fn main() -> Result<()> {
+pub async fn analyzer_rule() -> Result<()> {
     // AnalyzerRules run before OptimizerRules.
     //
     // DataFusion includes several built in AnalyzerRules for tasks such as type
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/query_planning/expr_api.rs
similarity index 97%
rename from datafusion-examples/examples/expr_api.rs
rename to datafusion-examples/examples/query_planning/expr_api.rs
index 56f960870e58a..47de669023f7c 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/query_planning/expr_api.rs
@@ -15,10 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow::array::{BooleanArray, Int32Array, Int8Array};
+use arrow::array::{BooleanArray, Int8Array, Int32Array};
 use arrow::record_batch::RecordBatch;
 
 use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
@@ -35,7 +37,7 @@ use datafusion::logical_expr::simplify::SimplifyContext;
 use datafusion::logical_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator};
 use datafusion::optimizer::analyzer::type_coercion::TypeCoercionRewriter;
 use datafusion::optimizer::simplify_expressions::ExprSimplifier;
-use datafusion::physical_expr::{analyze, AnalysisContext, ExprBoundaries};
+use datafusion::physical_expr::{AnalysisContext, ExprBoundaries, analyze};
 use datafusion::prelude::*;
 
 /// This example demonstrates the DataFusion [`Expr`] API.
@@ -55,8 +57,7 @@ use datafusion::prelude::*;
 /// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
 /// 6. Get the types of the expressions: [`expression_type_demo`]
 /// 7. Apply type coercion to expressions: [`type_coercion_demo`]
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn expr_api() -> Result<()> {
     // The easiest way to do create expressions is to use the
     // "fluent"-style API:
     let expr = col("a") + lit(5);
@@ -302,6 +303,7 @@ fn boundary_analysis_and_selectivity_demo() -> Result<()> {
         min_value: Precision::Exact(ScalarValue::Int64(Some(1))),
         sum_value: Precision::Absent,
         distinct_count: Precision::Absent,
+        byte_size: Precision::Absent,
     };
 
     // We can then build our expression boundaries from the column statistics
@@ -342,9 +344,11 @@ fn boundary_analysis_and_selectivity_demo() -> Result<()> {
     //
     // (a' - b' + 1) / (a - b)
     // (10000 - 5000 + 1) / (10000 - 1)
-    assert!(analysis
-        .selectivity
-        .is_some_and(|selectivity| (0.5..=0.6).contains(&selectivity)));
+    assert!(
+        analysis
+            .selectivity
+            .is_some_and(|selectivity| (0.5..=0.6).contains(&selectivity))
+    );
 
     Ok(())
 }
@@ -369,6 +373,7 @@ fn boundary_analysis_in_conjunctions_demo() -> Result<()> {
         min_value: Precision::Exact(ScalarValue::Int64(Some(14))),
         sum_value: Precision::Absent,
         distinct_count: Precision::Absent,
+        byte_size: Precision::Absent,
     };
 
     let initial_boundaries =
@@ -414,9 +419,11 @@ fn boundary_analysis_in_conjunctions_demo() -> Result<()> {
     //
     // Granted a column such as age will more likely follow a Normal distribution
     // as such our selectivity estimation will not be as good as it can.
-    assert!(analysis
-        .selectivity
-        .is_some_and(|selectivity| (0.1..=0.2).contains(&selectivity)));
+    assert!(
+        analysis
+            .selectivity
+            .is_some_and(|selectivity| (0.1..=0.2).contains(&selectivity))
+    );
 
     // The above example was a good way to look at how we can derive better
     // interval and get a lower selectivity during boundary analysis.
@@ -532,10 +539,11 @@ fn type_coercion_demo() -> Result<()> {
     let physical_expr =
         datafusion::physical_expr::create_physical_expr(&expr, &df_schema, &props)?;
     let e = physical_expr.evaluate(&batch).unwrap_err();
-    assert!(e
-        .find_root()
-        .to_string()
-        .contains("Invalid comparison operation: Int8 > Int32"));
+    assert!(
+        e.find_root()
+            .to_string()
+            .contains("Invalid comparison operation: Int8 > Int32")
+    );
 
     // 1. Type coercion with `SessionContext::create_physical_expr` which implicitly applies type coercion before constructing the physical expr.
     let physical_expr =
diff --git a/datafusion-examples/examples/query_planning/main.rs b/datafusion-examples/examples/query_planning/main.rs
new file mode 100644
index 0000000000000..247f468735359
--- /dev/null
+++ b/datafusion-examples/examples/query_planning/main.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These are all internal mechanics of the query planning and optimization layers
+//!
+//! These examples demonstrate internal mechanics of the query planning and optimization layers.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example query_planning -- [all|analyzer_rule|expr_api|optimizer_rule|parse_sql_expr|plan_to_sql|planner_api|pruning|thread_pools]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `analyzer_rule` — use a custom AnalyzerRule to change a query's semantics (row level access control)
+//! - `expr_api` — create, execute, simplify, analyze and coerce `Expr`s
+//! - `optimizer_rule` — use a custom OptimizerRule to replace certain predicates
+//! - `parse_sql_expr` — parse SQL text into DataFusion `Expr`
+//! - `plan_to_sql` — generate SQL from DataFusion `Expr` and `LogicalPlan`
+//! - `planner_api` — APIs to manipulate logical and physical plans
+//! - `pruning` — APIs to manipulate logical and physical plans
+//! - `thread_pools` — demonstrate TrackConsumersPool for memory tracking and debugging with enhanced error messages and shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
+
+mod analyzer_rule;
+mod expr_api;
+mod optimizer_rule;
+mod parse_sql_expr;
+mod plan_to_sql;
+mod planner_api;
+mod pruning;
+mod thread_pools;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    AnalyzerRule,
+    ExprApi,
+    OptimizerRule,
+    ParseSqlExpr,
+    PlanToSql,
+    PlannerApi,
+    Pruning,
+    ThreadPools,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "query_planning";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::AnalyzerRule => analyzer_rule::analyzer_rule().await?,
+            ExampleKind::ExprApi => expr_api::expr_api().await?,
+            ExampleKind::OptimizerRule => optimizer_rule::optimizer_rule().await?,
+            ExampleKind::ParseSqlExpr => parse_sql_expr::parse_sql_expr().await?,
+            ExampleKind::PlanToSql => plan_to_sql::plan_to_sql_examples().await?,
+            ExampleKind::PlannerApi => planner_api::planner_api().await?,
+            ExampleKind::Pruning => pruning::pruning().await?,
+            ExampleKind::ThreadPools => thread_pools::thread_pools().await?,
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/query_planning/optimizer_rule.rs
similarity index 98%
rename from datafusion-examples/examples/optimizer_rule.rs
rename to datafusion-examples/examples/query_planning/optimizer_rule.rs
index 9c137b67432c5..de9de7737a6a0 100644
--- a/datafusion-examples/examples/optimizer_rule.rs
+++ b/datafusion-examples/examples/query_planning/optimizer_rule.rs
@@ -15,10 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
 use arrow::datatypes::DataType;
 use datafusion::common::tree_node::{Transformed, TreeNode};
-use datafusion::common::{assert_batches_eq, Result, ScalarValue};
+use datafusion::common::{Result, ScalarValue, assert_batches_eq};
 use datafusion::logical_expr::{
     BinaryExpr, ColumnarValue, Expr, LogicalPlan, Operator, ScalarFunctionArgs,
     ScalarUDF, ScalarUDFImpl, Signature, Volatility,
@@ -37,8 +39,7 @@ use std::sync::Arc;
 ///
 /// See [analyzer_rule.rs] for an example of AnalyzerRules, which are for
 /// changing plan semantics.
-#[tokio::main]
-pub async fn main() -> Result<()> {
+pub async fn optimizer_rule() -> Result<()> {
     // DataFusion includes many built in OptimizerRules for tasks such as outer
     // to inner join conversion and constant folding.
     //
diff --git a/datafusion-examples/examples/parse_sql_expr.rs b/datafusion-examples/examples/query_planning/parse_sql_expr.rs
similarity index 96%
rename from datafusion-examples/examples/parse_sql_expr.rs
rename to datafusion-examples/examples/query_planning/parse_sql_expr.rs
index 5387e7c4a05dc..376120de9d492 100644
--- a/datafusion-examples/examples/parse_sql_expr.rs
+++ b/datafusion-examples/examples/query_planning/parse_sql_expr.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::common::DFSchema;
 use datafusion::logical_expr::{col, lit};
@@ -32,17 +34,15 @@ use datafusion::{
 /// The code in this example shows how to:
 ///
 /// 1. [`simple_session_context_parse_sql_expr_demo`]: Parse a simple SQL text into a logical
-/// expression using a schema at [`SessionContext`].
+///    expression using a schema at [`SessionContext`].
 ///
 /// 2. [`simple_dataframe_parse_sql_expr_demo`]: Parse a simple SQL text into a logical expression
-/// using a schema at [`DataFrame`].
+///    using a schema at [`DataFrame`].
 ///
 /// 3. [`query_parquet_demo`]: Query a parquet file using the parsed_sql_expr from a DataFrame.
 ///
 /// 4. [`round_trip_parse_sql_expr_demo`]: Parse a SQL text and convert it back to SQL using [`Unparser`].
-
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn parse_sql_expr() -> Result<()> {
     // See how to evaluate expressions
     simple_session_context_parse_sql_expr_demo()?;
     simple_dataframe_parse_sql_expr_demo().await?;
diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/query_planning/plan_to_sql.rs
similarity index 95%
rename from datafusion-examples/examples/plan_to_sql.rs
rename to datafusion-examples/examples/query_planning/plan_to_sql.rs
index 54483b143a169..756cc80b8f3c7 100644
--- a/datafusion-examples/examples/plan_to_sql.rs
+++ b/datafusion-examples/examples/query_planning/plan_to_sql.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::common::DFSchemaRef;
 use datafusion::error::Result;
 use datafusion::logical_expr::sqlparser::ast::Statement;
@@ -32,7 +34,7 @@ use datafusion::sql::unparser::extension_unparser::UserDefinedLogicalNodeUnparse
 use datafusion::sql::unparser::extension_unparser::{
     UnparseToStatementResult, UnparseWithinStatementResult,
 };
-use datafusion::sql::unparser::{plan_to_sql, Unparser};
+use datafusion::sql::unparser::{Unparser, plan_to_sql};
 use std::fmt;
 use std::sync::Arc;
 
@@ -43,28 +45,26 @@ use std::sync::Arc;
 /// The code in this example shows how to:
 ///
 /// 1. [`simple_expr_to_sql_demo`]: Create a simple expression [`Exprs`] with
-/// fluent API and convert to sql suitable for passing to another database
+///    fluent API and convert to sql suitable for passing to another database
 ///
 /// 2. [`simple_expr_to_pretty_sql_demo`] Create a simple expression
-/// [`Exprs`] with fluent API and convert to sql without extra parentheses,
-/// suitable for displaying to humans
+///    [`Exprs`] with fluent API and convert to sql without extra parentheses,
+///    suitable for displaying to humans
 ///
 /// 3. [`simple_expr_to_sql_demo_escape_mysql_style`]" Create a simple
-/// expression [`Exprs`] with fluent API and convert to sql escaping column
-/// names in MySQL style.
+///    expression [`Exprs`] with fluent API and convert to sql escaping column
+///    names in MySQL style.
 ///
 /// 4. [`simple_plan_to_sql_demo`]: Create a simple logical plan using the
-/// DataFrames API and convert to sql string.
+///    DataFrames API and convert to sql string.
 ///
 /// 5. [`round_trip_plan_to_sql_demo`]: Create a logical plan from a SQL string, modify it using the
-/// DataFrames API and convert it back to a  sql string.
+///    DataFrames API and convert it back to a  sql string.
 ///
 /// 6. [`unparse_my_logical_plan_as_statement`]: Create a custom logical plan and unparse it as a statement.
 ///
 /// 7. [`unparse_my_logical_plan_as_subquery`]: Create a custom logical plan and unparse it as a subquery.
-
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn plan_to_sql_examples() -> Result<()> {
     // See how to evaluate expressions
     simple_expr_to_sql_demo()?;
     simple_expr_to_pretty_sql_demo()?;
diff --git a/datafusion-examples/examples/planner_api.rs b/datafusion-examples/examples/query_planning/planner_api.rs
similarity index 98%
rename from datafusion-examples/examples/planner_api.rs
rename to datafusion-examples/examples/query_planning/planner_api.rs
index 55aec7b0108a4..9b8aa1c2fe649 100644
--- a/datafusion-examples/examples/planner_api.rs
+++ b/datafusion-examples/examples/query_planning/planner_api.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::error::Result;
 use datafusion::logical_expr::LogicalPlan;
 use datafusion::physical_plan::displayable;
@@ -32,8 +34,7 @@ use datafusion::prelude::*;
 /// physical plan:
 /// - Via the combined `create_physical_plan` API.
 /// - Utilizing the analyzer, optimizer, and query planner APIs separately.
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn planner_api() -> Result<()> {
     // Set up a DataFusion context and load a Parquet file
     let ctx = SessionContext::new();
     let testdata = datafusion::test_util::parquet_test_data();
diff --git a/datafusion-examples/examples/pruning.rs b/datafusion-examples/examples/query_planning/pruning.rs
similarity index 97%
rename from datafusion-examples/examples/pruning.rs
rename to datafusion-examples/examples/query_planning/pruning.rs
index 9a61789662cdd..33f3f8428a77f 100644
--- a/datafusion-examples/examples/pruning.rs
+++ b/datafusion-examples/examples/query_planning/pruning.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::collections::HashSet;
 use std::sync::Arc;
 
@@ -22,6 +24,7 @@ use arrow::array::{ArrayRef, BooleanArray, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::common::pruning::PruningStatistics;
 use datafusion::common::{DFSchema, ScalarValue};
+use datafusion::error::Result;
 use datafusion::execution::context::ExecutionProps;
 use datafusion::physical_expr::create_physical_expr;
 use datafusion::physical_optimizer::pruning::PruningPredicate;
@@ -40,8 +43,7 @@ use datafusion::prelude::*;
 /// one might do as part of a higher level storage engine. See
 /// `parquet_index.rs` for an example that uses pruning in the context of an
 /// individual query.
-#[tokio::main]
-async fn main() {
+pub async fn pruning() -> Result<()> {
     // In this example, we'll use the PruningPredicate to determine if
     // the expression `x = 5 AND y = 10` can never be true based on statistics
 
@@ -69,7 +71,7 @@ async fn main() {
     let predicate = create_pruning_predicate(expr, &my_catalog.schema);
 
     // Evaluate the predicate for the three files in the catalog
-    let prune_results = predicate.prune(&my_catalog).unwrap();
+    let prune_results = predicate.prune(&my_catalog)?;
     println!("Pruning results: {prune_results:?}");
 
     // The result is a `Vec` of bool values, one for each file in the catalog
@@ -93,6 +95,8 @@ async fn main() {
             false
         ]
     );
+
+    Ok(())
 }
 
 /// A simple model catalog that has information about the three files that store
@@ -186,6 +190,7 @@ impl PruningStatistics for MyCatalog {
     }
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate {
     let df_schema = DFSchema::try_from(Arc::clone(schema)).unwrap();
     let props = ExecutionProps::new();
diff --git a/datafusion-examples/examples/thread_pools.rs b/datafusion-examples/examples/query_planning/thread_pools.rs
similarity index 99%
rename from datafusion-examples/examples/thread_pools.rs
rename to datafusion-examples/examples/query_planning/thread_pools.rs
index 9842cccfbfe83..6fc7d51e91c1f 100644
--- a/datafusion-examples/examples/thread_pools.rs
+++ b/datafusion-examples/examples/query_planning/thread_pools.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example shows how to use separate thread pools (tokio [`Runtime`]))s to
 //! run the IO and CPU intensive parts of DataFusion plans.
 //!
@@ -64,8 +66,7 @@ use url::Url;
 /// when using Rust libraries such as `tonic`. Using a separate `Runtime` for
 /// CPU bound tasks will often be simpler in larger applications, even though it
 /// makes this example slightly more complex.
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn thread_pools() -> Result<()> {
     // The first two examples read local files. Enabling the URL table feature
     // lets us treat filenames as tables in SQL.
     let ctx = SessionContext::new().enable_url_table();
@@ -121,7 +122,7 @@ async fn same_runtime(ctx: &SessionContext, sql: &str) -> Result<()> {
     // Executing the plan using this pattern intermixes any IO and CPU intensive
     // work on same Runtime
     while let Some(batch) = stream.next().await {
-        println!("{}", pretty_format_batches(&[batch?]).unwrap());
+        println!("{}", pretty_format_batches(&[batch?])?);
     }
     Ok(())
 }
diff --git a/datafusion-examples/examples/relation_planner/main.rs b/datafusion-examples/examples/relation_planner/main.rs
new file mode 100644
index 0000000000000..d2ba2202d1787
--- /dev/null
+++ b/datafusion-examples/examples/relation_planner/main.rs
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # Relation Planner Examples
+//!
+//! These examples demonstrate how to use custom relation planners to extend
+//! DataFusion's SQL syntax with custom table operators.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example relation_planner -- [all|match_recognize|pivot_unpivot|table_sample]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `match_recognize` — MATCH_RECOGNIZE pattern matching on event streams
+//! - `pivot_unpivot` — PIVOT and UNPIVOT operations for reshaping data
+//! - `table_sample` — TABLESAMPLE clause for sampling rows from tables
+//!
+//! ## Snapshot Testing
+//!
+//! These examples use [insta](https://insta.rs) for inline snapshot assertions.
+//! If query output changes, regenerate the snapshots with:
+//! ```bash
+//! cargo insta test --example relation_planner --accept
+//! ```
+
+mod match_recognize;
+mod pivot_unpivot;
+mod table_sample;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    MatchRecognize,
+    PivotUnpivot,
+    TableSample,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "relation_planner";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::MatchRecognize => match_recognize::match_recognize().await?,
+            ExampleKind::PivotUnpivot => pivot_unpivot::pivot_unpivot().await?,
+            ExampleKind::TableSample => table_sample::table_sample().await?,
+        }
+
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
+
+/// Test wrappers that enable `cargo insta test --example relation_planner --accept`
+/// to regenerate inline snapshots. Without these, insta cannot run the examples
+/// in test mode since they only have `main()` functions.
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_match_recognize() {
+        match_recognize::match_recognize().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_pivot_unpivot() {
+        pivot_unpivot::pivot_unpivot().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_table_sample() {
+        table_sample::table_sample().await.unwrap();
+    }
+}
diff --git a/datafusion-examples/examples/relation_planner/match_recognize.rs b/datafusion-examples/examples/relation_planner/match_recognize.rs
new file mode 100644
index 0000000000000..60baf9bd61a62
--- /dev/null
+++ b/datafusion-examples/examples/relation_planner/match_recognize.rs
@@ -0,0 +1,406 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # MATCH_RECOGNIZE Example
+//!
+//! This example demonstrates implementing SQL `MATCH_RECOGNIZE` pattern matching
+//! using a custom [`RelationPlanner`]. Unlike the [`pivot_unpivot`] example that
+//! rewrites SQL to standard operations, this example creates a **custom logical
+//! plan node** (`MiniMatchRecognizeNode`) to represent the operation.
+//!
+//! ## Supported Syntax
+//!
+//! ```sql
+//! SELECT * FROM events
+//!   MATCH_RECOGNIZE (
+//!     PARTITION BY region
+//!     MEASURES SUM(price) AS total, AVG(price) AS average
+//!     PATTERN (A B+ C)
+//!     DEFINE
+//!       A AS price < 100,
+//!       B AS price BETWEEN 100 AND 200,
+//!       C AS price > 200
+//!   ) AS matches
+//! ```
+//!
+//! ## Architecture
+//!
+//! This example demonstrates **logical planning only**. Physical execution would
+//! require implementing an [`ExecutionPlan`] (see the [`table_sample`] example
+//! for a complete implementation with physical planning).
+//!
+//! ```text
+//! SQL Query
+//!     │
+//!     ▼
+//! ┌─────────────────────────────────────┐
+//! │ MatchRecognizePlanner               │
+//! │ (RelationPlanner trait)             │
+//! │                                     │
+//! │ • Parses MATCH_RECOGNIZE syntax     │
+//! │ • Creates MiniMatchRecognizeNode    │
+//! │ • Converts SQL exprs to DataFusion  │
+//! └─────────────────────────────────────┘
+//!     │
+//!     ▼
+//! ┌─────────────────────────────────────┐
+//! │ MiniMatchRecognizeNode              │
+//! │ (UserDefinedLogicalNode)            │
+//! │                                     │
+//! │ • measures: [(alias, expr), ...]    │
+//! │ • definitions: [(symbol, expr), ...]│
+//! └─────────────────────────────────────┘
+//! ```
+//!
+//! [`pivot_unpivot`]: super::pivot_unpivot
+//! [`table_sample`]: super::table_sample
+//! [`ExecutionPlan`]: datafusion::physical_plan::ExecutionPlan
+
+use std::{any::Any, cmp::Ordering, hash::Hasher, sync::Arc};
+
+use arrow::array::{ArrayRef, Float64Array, Int32Array, StringArray};
+use arrow::record_batch::RecordBatch;
+use datafusion::prelude::*;
+use datafusion_common::{DFSchemaRef, Result};
+use datafusion_expr::{
+    Expr, UserDefinedLogicalNode,
+    logical_plan::{Extension, InvariantLevel, LogicalPlan},
+    planner::{
+        PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
+    },
+};
+use datafusion_sql::sqlparser::ast::TableFactor;
+use insta::assert_snapshot;
+
+// ============================================================================
+// Example Entry Point
+// ============================================================================
+
+/// Runs the MATCH_RECOGNIZE examples demonstrating pattern matching on event streams.
+///
+/// Note: This example demonstrates **logical planning only**. Physical execution
+/// would require additional implementation of an [`ExecutionPlan`].
+pub async fn match_recognize() -> Result<()> {
+    let ctx = SessionContext::new();
+    ctx.register_relation_planner(Arc::new(MatchRecognizePlanner))?;
+    register_sample_data(&ctx)?;
+
+    println!("MATCH_RECOGNIZE Example (Logical Planning Only)");
+    println!("================================================\n");
+
+    run_examples(&ctx).await
+}
+
+async fn run_examples(ctx: &SessionContext) -> Result<()> {
+    // Example 1: Basic MATCH_RECOGNIZE with MEASURES and DEFINE
+    // Demonstrates: Aggregate measures over matched rows
+    let plan = run_example(
+        ctx,
+        "Example 1: MATCH_RECOGNIZE with aggregations",
+        r#"SELECT * FROM events
+           MATCH_RECOGNIZE (
+             PARTITION BY 1
+             MEASURES SUM(price) AS total_price, AVG(price) AS avg_price
+             PATTERN (A)
+             DEFINE A AS price > 10
+           ) AS matches"#,
+    )
+    .await?;
+    assert_snapshot!(plan, @r"
+    Projection: matches.price
+      SubqueryAlias: matches
+        MiniMatchRecognize measures=[total_price := sum(events.price), avg_price := avg(events.price)] define=[a := events.price > Int64(10)]
+          TableScan: events
+    ");
+
+    // Example 2: Stock price pattern detection
+    // Demonstrates: Real-world use case finding prices above threshold
+    let plan = run_example(
+        ctx,
+        "Example 2: Detect high stock prices",
+        r#"SELECT * FROM stock_prices
+           MATCH_RECOGNIZE (
+             MEASURES
+               MIN(price) AS min_price,
+               MAX(price) AS max_price,
+               AVG(price) AS avg_price
+             PATTERN (HIGH)
+             DEFINE HIGH AS price > 151.0
+           ) AS trends"#,
+    )
+    .await?;
+    assert_snapshot!(plan, @r"
+    Projection: trends.symbol, trends.price
+      SubqueryAlias: trends
+        MiniMatchRecognize measures=[min_price := min(stock_prices.price), max_price := max(stock_prices.price), avg_price := avg(stock_prices.price)] define=[high := stock_prices.price > Float64(151)]
+          TableScan: stock_prices
+    ");
+
+    Ok(())
+}
+
+/// Helper to run a single example query and display the logical plan.
+async fn run_example(ctx: &SessionContext, title: &str, sql: &str) -> Result<String> {
+    println!("{title}:\n{sql}\n");
+    let plan = ctx.sql(sql).await?.into_unoptimized_plan();
+    let plan_str = plan.display_indent().to_string();
+    println!("{plan_str}\n");
+    Ok(plan_str)
+}
+
+/// Register test data tables.
+fn register_sample_data(ctx: &SessionContext) -> Result<()> {
+    // events: simple price series
+    ctx.register_batch(
+        "events",
+        RecordBatch::try_from_iter(vec![(
+            "price",
+            Arc::new(Int32Array::from(vec![5, 12, 8, 15, 20])) as ArrayRef,
+        )])?,
+    )?;
+
+    // stock_prices: realistic stock data
+    ctx.register_batch(
+        "stock_prices",
+        RecordBatch::try_from_iter(vec![
+            (
+                "symbol",
+                Arc::new(StringArray::from(vec!["DDOG", "DDOG", "DDOG", "DDOG"]))
+                    as ArrayRef,
+            ),
+            (
+                "price",
+                Arc::new(Float64Array::from(vec![150.0, 155.0, 152.0, 158.0])),
+            ),
+        ])?,
+    )?;
+
+    Ok(())
+}
+
+// ============================================================================
+// Logical Plan Node: MiniMatchRecognizeNode
+// ============================================================================
+
+/// A custom logical plan node representing MATCH_RECOGNIZE operations.
+///
+/// This is a simplified implementation that captures the essential structure:
+/// - `measures`: Aggregate expressions computed over matched rows
+/// - `definitions`: Symbol definitions (predicate expressions)
+///
+/// A production implementation would also include:
+/// - Pattern specification (regex-like pattern)
+/// - Partition and order by clauses
+/// - Output mode (ONE ROW PER MATCH, ALL ROWS PER MATCH)
+/// - After match skip strategy
+#[derive(Debug)]
+struct MiniMatchRecognizeNode {
+    input: Arc<LogicalPlan>,
+    schema: DFSchemaRef,
+    /// Measures: (alias, aggregate_expr)
+    measures: Vec<(String, Expr)>,
+    /// Symbol definitions: (symbol_name, predicate_expr)
+    definitions: Vec<(String, Expr)>,
+}
+
+impl UserDefinedLogicalNode for MiniMatchRecognizeNode {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "MiniMatchRecognize"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn check_invariants(&self, _check: InvariantLevel) -> Result<()> {
+        Ok(())
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        self.measures
+            .iter()
+            .chain(&self.definitions)
+            .map(|(_, expr)| expr.clone())
+            .collect()
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "MiniMatchRecognize")?;
+
+        if !self.measures.is_empty() {
+            write!(f, " measures=[")?;
+            for (i, (alias, expr)) in self.measures.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{alias} := {expr}")?;
+            }
+            write!(f, "]")?;
+        }
+
+        if !self.definitions.is_empty() {
+            write!(f, " define=[")?;
+            for (i, (symbol, expr)) in self.definitions.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{symbol} := {expr}")?;
+            }
+            write!(f, "]")?;
+        }
+
+        Ok(())
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        exprs: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> Result<Arc<dyn UserDefinedLogicalNode>> {
+        let expected_len = self.measures.len() + self.definitions.len();
+        if exprs.len() != expected_len {
+            return Err(datafusion_common::plan_datafusion_err!(
+                "MiniMatchRecognize: expected {expected_len} expressions, got {}",
+                exprs.len()
+            ));
+        }
+
+        let input = inputs.into_iter().next().ok_or_else(|| {
+            datafusion_common::plan_datafusion_err!(
+                "MiniMatchRecognize requires exactly one input"
+            )
+        })?;
+
+        let (measure_exprs, definition_exprs) = exprs.split_at(self.measures.len());
+
+        let measures = self
+            .measures
+            .iter()
+            .zip(measure_exprs)
+            .map(|((alias, _), expr)| (alias.clone(), expr.clone()))
+            .collect();
+
+        let definitions = self
+            .definitions
+            .iter()
+            .zip(definition_exprs)
+            .map(|((symbol, _), expr)| (symbol.clone(), expr.clone()))
+            .collect();
+
+        Ok(Arc::new(Self {
+            input: Arc::new(input),
+            schema: Arc::clone(&self.schema),
+            measures,
+            definitions,
+        }))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        state.write_usize(Arc::as_ptr(&self.input) as usize);
+        state.write_usize(self.measures.len());
+        state.write_usize(self.definitions.len());
+    }
+
+    fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool {
+        other.as_any().downcast_ref::<Self>().is_some_and(|o| {
+            Arc::ptr_eq(&self.input, &o.input)
+                && self.measures == o.measures
+                && self.definitions == o.definitions
+        })
+    }
+
+    fn dyn_ord(&self, other: &dyn UserDefinedLogicalNode) -> Option<Ordering> {
+        if self.dyn_eq(other) {
+            Some(Ordering::Equal)
+        } else {
+            None
+        }
+    }
+}
+
+// ============================================================================
+// Relation Planner: MatchRecognizePlanner
+// ============================================================================
+
+/// Relation planner that creates `MiniMatchRecognizeNode` for MATCH_RECOGNIZE queries.
+#[derive(Debug)]
+struct MatchRecognizePlanner;
+
+impl RelationPlanner for MatchRecognizePlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        ctx: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        let TableFactor::MatchRecognize {
+            table,
+            measures,
+            symbols,
+            alias,
+            ..
+        } = relation
+        else {
+            return Ok(RelationPlanning::Original(relation));
+        };
+
+        // Plan the input table
+        let input = ctx.plan(*table)?;
+        let schema = input.schema().clone();
+
+        // Convert MEASURES: SQL expressions → DataFusion expressions
+        let planned_measures: Vec<(String, Expr)> = measures
+            .iter()
+            .map(|m| {
+                let alias = ctx.normalize_ident(m.alias.clone());
+                let expr = ctx.sql_to_expr(m.expr.clone(), schema.as_ref())?;
+                Ok((alias, expr))
+            })
+            .collect::<Result<_>>()?;
+
+        // Convert DEFINE: symbol definitions → DataFusion expressions
+        let planned_definitions: Vec<(String, Expr)> = symbols
+            .iter()
+            .map(|s| {
+                let name = ctx.normalize_ident(s.symbol.clone());
+                let expr = ctx.sql_to_expr(s.definition.clone(), schema.as_ref())?;
+                Ok((name, expr))
+            })
+            .collect::<Result<_>>()?;
+
+        // Create the custom node
+        let node = MiniMatchRecognizeNode {
+            input: Arc::new(input),
+            schema,
+            measures: planned_measures,
+            definitions: planned_definitions,
+        };
+
+        let plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(node),
+        });
+
+        Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+    }
+}
diff --git a/datafusion-examples/examples/relation_planner/pivot_unpivot.rs b/datafusion-examples/examples/relation_planner/pivot_unpivot.rs
new file mode 100644
index 0000000000000..86a6cb955500e
--- /dev/null
+++ b/datafusion-examples/examples/relation_planner/pivot_unpivot.rs
@@ -0,0 +1,567 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # PIVOT and UNPIVOT Example
+//!
+//! This example demonstrates implementing SQL `PIVOT` and `UNPIVOT` operations
+//! using a custom [`RelationPlanner`]. Unlike the other examples that create
+//! custom logical/physical nodes, this example shows how to **rewrite** SQL
+//! constructs into equivalent standard SQL operations:
+//!
+//! ## Supported Syntax
+//!
+//! ```sql
+//! -- PIVOT: Transform rows into columns
+//! SELECT * FROM sales
+//!   PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))
+//!
+//! -- UNPIVOT: Transform columns into rows
+//! SELECT * FROM wide_table
+//!   UNPIVOT (value FOR name IN (col1, col2, col3))
+//! ```
+//!
+//! ## Rewrite Strategy
+//!
+//! **PIVOT** is rewritten to `GROUP BY` with `CASE` expressions:
+//! ```sql
+//! -- Original:
+//! SELECT * FROM sales PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2'))
+//!
+//! -- Rewritten to:
+//! SELECT region,
+//!        SUM(CASE quarter WHEN 'Q1' THEN amount END) AS Q1,
+//!        SUM(CASE quarter WHEN 'Q2' THEN amount END) AS Q2
+//! FROM sales
+//! GROUP BY region
+//! ```
+//!
+//! **UNPIVOT** is rewritten to `UNION ALL` of projections:
+//! ```sql
+//! -- Original:
+//! SELECT * FROM wide UNPIVOT (sales FOR quarter IN (q1, q2))
+//!
+//! -- Rewritten to:
+//! SELECT region, 'q1' AS quarter, q1 AS sales FROM wide
+//! UNION ALL
+//! SELECT region, 'q2' AS quarter, q2 AS sales FROM wide
+//! ```
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, Int64Array, StringArray};
+use arrow::record_batch::RecordBatch;
+use datafusion::prelude::*;
+use datafusion_common::{Result, ScalarValue, plan_datafusion_err};
+use datafusion_expr::{
+    Expr, case, col, lit,
+    logical_plan::builder::LogicalPlanBuilder,
+    planner::{
+        PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
+    },
+};
+use datafusion_sql::sqlparser::ast::{NullInclusion, PivotValueSource, TableFactor};
+use insta::assert_snapshot;
+
+// ============================================================================
+// Example Entry Point
+// ============================================================================
+
+/// Runs the PIVOT/UNPIVOT examples demonstrating data reshaping operations.
+pub async fn pivot_unpivot() -> Result<()> {
+    let ctx = SessionContext::new();
+    ctx.register_relation_planner(Arc::new(PivotUnpivotPlanner))?;
+    register_sample_data(&ctx)?;
+
+    println!("PIVOT and UNPIVOT Example");
+    println!("=========================\n");
+
+    run_examples(&ctx).await
+}
+
+async fn run_examples(ctx: &SessionContext) -> Result<()> {
+    // ----- PIVOT Examples -----
+
+    // Example 1: Basic PIVOT
+    // Transforms: (region, quarter, amount) → (region, Q1, Q2)
+    let results = run_example(
+        ctx,
+        "Example 1: Basic PIVOT",
+        r#"SELECT * FROM quarterly_sales
+           PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2')) AS p
+           ORDER BY region"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+------+------+
+    | region | Q1   | Q2   |
+    +--------+------+------+
+    | North  | 1000 | 1500 |
+    | South  | 1200 | 1300 |
+    +--------+------+------+
+    ");
+
+    // Example 2: PIVOT with multiple aggregates
+    // Creates columns for each (aggregate, value) combination
+    let results = run_example(
+        ctx,
+        "Example 2: PIVOT with multiple aggregates",
+        r#"SELECT * FROM quarterly_sales
+           PIVOT (SUM(amount), AVG(amount) FOR quarter IN ('Q1', 'Q2')) AS p
+           ORDER BY region"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+--------+--------+--------+--------+
+    | region | sum_Q1 | sum_Q2 | avg_Q1 | avg_Q2 |
+    +--------+--------+--------+--------+--------+
+    | North  | 1000   | 1500   | 1000.0 | 1500.0 |
+    | South  | 1200   | 1300   | 1200.0 | 1300.0 |
+    +--------+--------+--------+--------+--------+
+    ");
+
+    // Example 3: PIVOT with multiple grouping columns
+    // Non-pivot, non-aggregate columns become GROUP BY columns
+    let results = run_example(
+        ctx,
+        "Example 3: PIVOT with multiple grouping columns",
+        r#"SELECT * FROM product_sales
+           PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2')) AS p
+           ORDER BY region, product"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+----------+-----+-----+
+    | region | product  | Q1  | Q2  |
+    +--------+----------+-----+-----+
+    | North  | ProductA | 500 |     |
+    | North  | ProductB | 500 |     |
+    | South  | ProductA |     | 650 |
+    +--------+----------+-----+-----+
+    ");
+
+    // ----- UNPIVOT Examples -----
+
+    // Example 4: Basic UNPIVOT
+    // Transforms: (region, q1, q2) → (region, quarter, sales)
+    let results = run_example(
+        ctx,
+        "Example 4: Basic UNPIVOT",
+        r#"SELECT * FROM wide_sales
+           UNPIVOT (sales FOR quarter IN (q1 AS 'Q1', q2 AS 'Q2')) AS u
+           ORDER BY quarter, region"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+---------+-------+
+    | region | quarter | sales |
+    +--------+---------+-------+
+    | North  | Q1      | 1000  |
+    | South  | Q1      | 1200  |
+    | North  | Q2      | 1500  |
+    | South  | Q2      | 1300  |
+    +--------+---------+-------+
+    ");
+
+    // Example 5: UNPIVOT with INCLUDE NULLS
+    // By default, UNPIVOT excludes rows where the value column is NULL.
+    // INCLUDE NULLS keeps them (same result here since no NULLs in data).
+    let results = run_example(
+        ctx,
+        "Example 5: UNPIVOT INCLUDE NULLS",
+        r#"SELECT * FROM wide_sales
+           UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS 'Q1', q2 AS 'Q2')) AS u
+           ORDER BY quarter, region"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+---------+-------+
+    | region | quarter | sales |
+    +--------+---------+-------+
+    | North  | Q1      | 1000  |
+    | South  | Q1      | 1200  |
+    | North  | Q2      | 1500  |
+    | South  | Q2      | 1300  |
+    +--------+---------+-------+
+    ");
+
+    // Example 6: PIVOT with column projection
+    // Standard SQL operations work seamlessly after PIVOT
+    let results = run_example(
+        ctx,
+        "Example 6: PIVOT with projection",
+        r#"SELECT region FROM quarterly_sales
+           PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2')) AS p
+           ORDER BY region"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +--------+
+    | region |
+    +--------+
+    | North  |
+    | South  |
+    +--------+
+    ");
+
+    Ok(())
+}
+
+/// Helper to run a single example query and capture results.
+async fn run_example(ctx: &SessionContext, title: &str, sql: &str) -> Result<String> {
+    println!("{title}:\n{sql}\n");
+    let df = ctx.sql(sql).await?;
+    println!("{}\n", df.logical_plan().display_indent());
+
+    let batches = df.collect().await?;
+    let results = arrow::util::pretty::pretty_format_batches(&batches)?.to_string();
+    println!("{results}\n");
+
+    Ok(results)
+}
+
+/// Register test data tables.
+fn register_sample_data(ctx: &SessionContext) -> Result<()> {
+    // quarterly_sales: normalized sales data (region, quarter, amount)
+    ctx.register_batch(
+        "quarterly_sales",
+        RecordBatch::try_from_iter(vec![
+            (
+                "region",
+                Arc::new(StringArray::from(vec!["North", "North", "South", "South"]))
+                    as ArrayRef,
+            ),
+            (
+                "quarter",
+                Arc::new(StringArray::from(vec!["Q1", "Q2", "Q1", "Q2"])),
+            ),
+            (
+                "amount",
+                Arc::new(Int64Array::from(vec![1000, 1500, 1200, 1300])),
+            ),
+        ])?,
+    )?;
+
+    // product_sales: sales with additional grouping dimension
+    ctx.register_batch(
+        "product_sales",
+        RecordBatch::try_from_iter(vec![
+            (
+                "region",
+                Arc::new(StringArray::from(vec!["North", "North", "South"])) as ArrayRef,
+            ),
+            (
+                "quarter",
+                Arc::new(StringArray::from(vec!["Q1", "Q1", "Q2"])),
+            ),
+            (
+                "product",
+                Arc::new(StringArray::from(vec!["ProductA", "ProductB", "ProductA"])),
+            ),
+            ("amount", Arc::new(Int64Array::from(vec![500, 500, 650]))),
+        ])?,
+    )?;
+
+    // wide_sales: denormalized/wide format (for UNPIVOT)
+    ctx.register_batch(
+        "wide_sales",
+        RecordBatch::try_from_iter(vec![
+            (
+                "region",
+                Arc::new(StringArray::from(vec!["North", "South"])) as ArrayRef,
+            ),
+            ("q1", Arc::new(Int64Array::from(vec![1000, 1200]))),
+            ("q2", Arc::new(Int64Array::from(vec![1500, 1300]))),
+        ])?,
+    )?;
+
+    Ok(())
+}
+
+// ============================================================================
+// Relation Planner: PivotUnpivotPlanner
+// ============================================================================
+
+/// Relation planner that rewrites PIVOT and UNPIVOT into standard SQL.
+#[derive(Debug)]
+struct PivotUnpivotPlanner;
+
+impl RelationPlanner for PivotUnpivotPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        ctx: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        match relation {
+            TableFactor::Pivot {
+                table,
+                aggregate_functions,
+                value_column,
+                value_source,
+                alias,
+                ..
+            } => plan_pivot(
+                ctx,
+                *table,
+                &aggregate_functions,
+                &value_column,
+                value_source,
+                alias,
+            ),
+
+            TableFactor::Unpivot {
+                table,
+                value,
+                name,
+                columns,
+                null_inclusion,
+                alias,
+            } => plan_unpivot(
+                ctx,
+                *table,
+                &value,
+                name,
+                &columns,
+                null_inclusion.as_ref(),
+                alias,
+            ),
+
+            other => Ok(RelationPlanning::Original(other)),
+        }
+    }
+}
+
+// ============================================================================
+// PIVOT Implementation
+// ============================================================================
+
+/// Rewrite PIVOT to GROUP BY with CASE expressions.
+fn plan_pivot(
+    ctx: &mut dyn RelationPlannerContext,
+    table: TableFactor,
+    aggregate_functions: &[datafusion_sql::sqlparser::ast::ExprWithAlias],
+    value_column: &[datafusion_sql::sqlparser::ast::Expr],
+    value_source: PivotValueSource,
+    alias: Option<datafusion_sql::sqlparser::ast::TableAlias>,
+) -> Result<RelationPlanning> {
+    // Plan the input table
+    let input = ctx.plan(table)?;
+    let schema = input.schema();
+
+    // Parse aggregate functions
+    let aggregates: Vec<Expr> = aggregate_functions
+        .iter()
+        .map(|agg| ctx.sql_to_expr(agg.expr.clone(), schema.as_ref()))
+        .collect::<Result<_>>()?;
+
+    // Get the pivot column (only single-column pivot supported)
+    if value_column.len() != 1 {
+        return Err(plan_datafusion_err!(
+            "Only single-column PIVOT is supported"
+        ));
+    }
+    let pivot_col = ctx.sql_to_expr(value_column[0].clone(), schema.as_ref())?;
+    let pivot_col_name = extract_column_name(&pivot_col)?;
+
+    // Parse pivot values
+    let pivot_values = match value_source {
+        PivotValueSource::List(list) => list
+            .iter()
+            .map(|item| {
+                let alias = item
+                    .alias
+                    .as_ref()
+                    .map(|id| ctx.normalize_ident(id.clone()));
+                let expr = ctx.sql_to_expr(item.expr.clone(), schema.as_ref())?;
+                Ok((alias, expr))
+            })
+            .collect::<Result<Vec<_>>>()?,
+        _ => {
+            return Err(plan_datafusion_err!(
+                "Dynamic PIVOT (ANY/Subquery) is not supported"
+            ));
+        }
+    };
+
+    // Determine GROUP BY columns (non-pivot, non-aggregate columns)
+    let agg_input_cols: Vec<&str> = aggregates
+        .iter()
+        .filter_map(|agg| {
+            if let Expr::AggregateFunction(f) = agg {
+                f.params.args.first().and_then(|e| {
+                    if let Expr::Column(c) = e {
+                        Some(c.name.as_str())
+                    } else {
+                        None
+                    }
+                })
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    let group_by_cols: Vec<Expr> = schema
+        .fields()
+        .iter()
+        .map(|f| f.name().as_str())
+        .filter(|name| *name != pivot_col_name.as_str() && !agg_input_cols.contains(name))
+        .map(col)
+        .collect();
+
+    // Build CASE expressions for each (aggregate, pivot_value) pair
+    let mut pivot_exprs = Vec::new();
+    for agg in &aggregates {
+        let Expr::AggregateFunction(agg_fn) = agg else {
+            continue;
+        };
+        let Some(agg_input) = agg_fn.params.args.first().cloned() else {
+            continue;
+        };
+
+        for (value_alias, pivot_value) in &pivot_values {
+            // CASE pivot_col WHEN pivot_value THEN agg_input END
+            let case_expr = case(col(&pivot_col_name))
+                .when(pivot_value.clone(), agg_input.clone())
+                .end()?;
+
+            // Wrap in aggregate function
+            let pivoted = agg_fn.func.call(vec![case_expr]);
+
+            // Determine column alias
+            let value_str = value_alias
+                .clone()
+                .unwrap_or_else(|| expr_to_string(pivot_value));
+            let col_alias = if aggregates.len() > 1 {
+                format!("{}_{}", agg_fn.func.name(), value_str)
+            } else {
+                value_str
+            };
+
+            pivot_exprs.push(pivoted.alias(col_alias));
+        }
+    }
+
+    let plan = LogicalPlanBuilder::from(input)
+        .aggregate(group_by_cols, pivot_exprs)?
+        .build()?;
+
+    Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+}
+
+// ============================================================================
+// UNPIVOT Implementation
+// ============================================================================
+
+/// Rewrite UNPIVOT to UNION ALL of projections.
+fn plan_unpivot(
+    ctx: &mut dyn RelationPlannerContext,
+    table: TableFactor,
+    value: &datafusion_sql::sqlparser::ast::Expr,
+    name: datafusion_sql::sqlparser::ast::Ident,
+    columns: &[datafusion_sql::sqlparser::ast::ExprWithAlias],
+    null_inclusion: Option<&NullInclusion>,
+    alias: Option<datafusion_sql::sqlparser::ast::TableAlias>,
+) -> Result<RelationPlanning> {
+    // Plan the input table
+    let input = ctx.plan(table)?;
+    let schema = input.schema();
+
+    // Output column names
+    let value_col_name = value.to_string();
+    let name_col_name = ctx.normalize_ident(name);
+
+    // Parse columns to unpivot: (source_column, label)
+    let unpivot_cols: Vec<(String, String)> = columns
+        .iter()
+        .map(|c| {
+            let label = c
+                .alias
+                .as_ref()
+                .map(|id| ctx.normalize_ident(id.clone()))
+                .unwrap_or_else(|| c.expr.to_string());
+            let expr = ctx.sql_to_expr(c.expr.clone(), schema.as_ref())?;
+            let col_name = extract_column_name(&expr)?;
+            Ok((col_name.to_string(), label))
+        })
+        .collect::<Result<_>>()?;
+
+    // Columns to preserve (not being unpivoted)
+    let keep_cols: Vec<&str> = schema
+        .fields()
+        .iter()
+        .map(|f| f.name().as_str())
+        .filter(|name| !unpivot_cols.iter().any(|(c, _)| c == *name))
+        .collect();
+
+    // Build UNION ALL: one SELECT per unpivot column
+    if unpivot_cols.is_empty() {
+        return Err(plan_datafusion_err!("UNPIVOT requires at least one column"));
+    }
+
+    let mut union_inputs: Vec<_> = unpivot_cols
+        .iter()
+        .map(|(col_name, label)| {
+            let mut projection: Vec<Expr> = keep_cols.iter().map(|c| col(*c)).collect();
+            projection.push(lit(label.clone()).alias(&name_col_name));
+            projection.push(col(col_name).alias(&value_col_name));
+
+            LogicalPlanBuilder::from(input.clone())
+                .project(projection)?
+                .build()
+        })
+        .collect::<Result<_>>()?;
+
+    // Combine with UNION ALL
+    let mut plan = union_inputs.remove(0);
+    for branch in union_inputs {
+        plan = LogicalPlanBuilder::from(plan).union(branch)?.build()?;
+    }
+
+    // Apply EXCLUDE NULLS filter (default behavior)
+    let exclude_nulls = null_inclusion.is_none()
+        || matches!(null_inclusion, Some(&NullInclusion::ExcludeNulls));
+    if exclude_nulls {
+        plan = LogicalPlanBuilder::from(plan)
+            .filter(col(&value_col_name).is_not_null())?
+            .build()?;
+    }
+
+    Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/// Extract column name from an expression.
+fn extract_column_name(expr: &Expr) -> Result<String> {
+    match expr {
+        Expr::Column(c) => Ok(c.name.clone()),
+        _ => Err(plan_datafusion_err!(
+            "Expected column reference, got {expr}"
+        )),
+    }
+}
+
+/// Convert an expression to a string for use as column alias.
+fn expr_to_string(expr: &Expr) -> String {
+    match expr {
+        Expr::Literal(ScalarValue::Utf8(Some(s)), _) => s.clone(),
+        Expr::Literal(v, _) => v.to_string(),
+        other => other.to_string(),
+    }
+}
diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs
new file mode 100644
index 0000000000000..207fffe1327a3
--- /dev/null
+++ b/datafusion-examples/examples/relation_planner/table_sample.rs
@@ -0,0 +1,845 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # TABLESAMPLE Example
+//!
+//! This example demonstrates implementing SQL `TABLESAMPLE` support using
+//! DataFusion's extensibility APIs.
+//!
+//! This is a working `TABLESAMPLE` implementation that can serve as a starting
+//! point for your own projects. It also works as a template for adding other
+//! custom SQL operators, covering the full pipeline from parsing to execution.
+//!
+//! It shows how to:
+//!
+//! 1. **Parse** TABLESAMPLE syntax via a custom [`RelationPlanner`]
+//! 2. **Plan** sampling as a custom logical node ([`TableSamplePlanNode`])
+//! 3. **Execute** sampling via a custom physical operator ([`SampleExec`])
+//!
+//! ## Supported Syntax
+//!
+//! ```sql
+//! -- Bernoulli sampling (each row has N% chance of selection)
+//! SELECT * FROM table TABLESAMPLE BERNOULLI(10 PERCENT)
+//!
+//! -- Fractional sampling (0.0 to 1.0)
+//! SELECT * FROM table TABLESAMPLE (0.1)
+//!
+//! -- Row count limit
+//! SELECT * FROM table TABLESAMPLE (100 ROWS)
+//!
+//! -- Reproducible sampling with a seed
+//! SELECT * FROM table TABLESAMPLE (10 PERCENT) REPEATABLE(42)
+//! ```
+//!
+//! ## Architecture
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                         SQL Query                               │
+//! │  SELECT * FROM t TABLESAMPLE BERNOULLI(10 PERCENT) REPEATABLE(1)│
+//! └─────────────────────────────────────────────────────────────────┘
+//!                                │
+//!                                ▼
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                    TableSamplePlanner                           │
+//! │    (RelationPlanner: parses TABLESAMPLE, creates logical node)  │
+//! └─────────────────────────────────────────────────────────────────┘
+//!                                │
+//!                                ▼
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                   TableSamplePlanNode                           │
+//! │         (UserDefinedLogicalNode: stores sampling params)        │
+//! └─────────────────────────────────────────────────────────────────┘
+//!                                │
+//!                                ▼
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                TableSampleExtensionPlanner                      │
+//! │       (ExtensionPlanner: creates physical execution plan)       │
+//! └─────────────────────────────────────────────────────────────────┘
+//!                                │
+//!                                ▼
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                        SampleExec                               │
+//! │    (ExecutionPlan: performs actual row sampling at runtime)     │
+//! └─────────────────────────────────────────────────────────────────┘
+//! ```
+
+use std::{
+    any::Any,
+    fmt::{self, Debug, Formatter},
+    hash::{Hash, Hasher},
+    ops::{Add, Div, Mul, Sub},
+    pin::Pin,
+    str::FromStr,
+    sync::Arc,
+    task::{Context, Poll},
+};
+
+use arrow::{
+    array::{ArrayRef, Int32Array, RecordBatch, StringArray, UInt32Array},
+    compute,
+};
+use arrow_schema::SchemaRef;
+use futures::{
+    ready,
+    stream::{Stream, StreamExt},
+};
+use rand::{Rng, SeedableRng, rngs::StdRng};
+use tonic::async_trait;
+
+use datafusion::{
+    execution::{
+        RecordBatchStream, SendableRecordBatchStream, SessionState, SessionStateBuilder,
+        TaskContext, context::QueryPlanner,
+    },
+    physical_expr::EquivalenceProperties,
+    physical_plan::{
+        DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
+        metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput},
+    },
+    physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner},
+    prelude::*,
+};
+use datafusion_common::{
+    DFSchemaRef, DataFusionError, Result, Statistics, internal_err, not_impl_err,
+    plan_datafusion_err, plan_err,
+};
+use datafusion_expr::{
+    UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
+    logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder},
+    planner::{
+        PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
+    },
+};
+use datafusion_sql::sqlparser::ast::{
+    self, TableFactor, TableSampleMethod, TableSampleUnit,
+};
+use insta::assert_snapshot;
+
+// ============================================================================
+// Example Entry Point
+// ============================================================================
+
+/// Runs the TABLESAMPLE examples demonstrating various sampling techniques.
+pub async fn table_sample() -> Result<()> {
+    // Build session with custom query planner for physical planning
+    let state = SessionStateBuilder::new()
+        .with_default_features()
+        .with_query_planner(Arc::new(TableSampleQueryPlanner))
+        .build();
+
+    let ctx = SessionContext::new_with_state(state);
+
+    // Register custom relation planner for logical planning
+    ctx.register_relation_planner(Arc::new(TableSamplePlanner))?;
+    register_sample_data(&ctx)?;
+
+    println!("TABLESAMPLE Example");
+    println!("===================\n");
+
+    run_examples(&ctx).await
+}
+
+async fn run_examples(ctx: &SessionContext) -> Result<()> {
+    // Example 1: Baseline - full table scan
+    let results = run_example(
+        ctx,
+        "Example 1: Full table (baseline)",
+        "SELECT * FROM sample_data",
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+
+    | column1 | column2 |
+    +---------+---------+
+    | 1       | row_1   |
+    | 2       | row_2   |
+    | 3       | row_3   |
+    | 4       | row_4   |
+    | 5       | row_5   |
+    | 6       | row_6   |
+    | 7       | row_7   |
+    | 8       | row_8   |
+    | 9       | row_9   |
+    | 10      | row_10  |
+    +---------+---------+
+    ");
+
+    // Example 2: Percentage-based Bernoulli sampling
+    // REPEATABLE(seed) ensures deterministic results for snapshot testing
+    let results = run_example(
+        ctx,
+        "Example 2: BERNOULLI percentage sampling",
+        "SELECT * FROM sample_data TABLESAMPLE BERNOULLI(30 PERCENT) REPEATABLE(123)",
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+
+    | column1 | column2 |
+    +---------+---------+
+    | 1       | row_1   |
+    | 2       | row_2   |
+    | 7       | row_7   |
+    | 8       | row_8   |
+    +---------+---------+
+    ");
+
+    // Example 3: Fractional sampling (0.0 to 1.0)
+    // REPEATABLE(seed) ensures deterministic results for snapshot testing
+    let results = run_example(
+        ctx,
+        "Example 3: Fractional sampling",
+        "SELECT * FROM sample_data TABLESAMPLE (0.5) REPEATABLE(456)",
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+
+    | column1 | column2 |
+    +---------+---------+
+    | 2       | row_2   |
+    | 4       | row_4   |
+    | 8       | row_8   |
+    +---------+---------+
+    ");
+
+    // Example 4: Row count limit (deterministic, no seed needed)
+    let results = run_example(
+        ctx,
+        "Example 4: Row count limit",
+        "SELECT * FROM sample_data TABLESAMPLE (3 ROWS)",
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+
+    | column1 | column2 |
+    +---------+---------+
+    | 1       | row_1   |
+    | 2       | row_2   |
+    | 3       | row_3   |
+    +---------+---------+
+    ");
+
+    // Example 5: Sampling combined with filtering
+    let results = run_example(
+        ctx,
+        "Example 5: Sampling with WHERE clause",
+        "SELECT * FROM sample_data TABLESAMPLE (5 ROWS) WHERE column1 > 2",
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+
+    | column1 | column2 |
+    +---------+---------+
+    | 3       | row_3   |
+    | 4       | row_4   |
+    | 5       | row_5   |
+    +---------+---------+
+    ");
+
+    // Example 6: Sampling in JOIN queries
+    // REPEATABLE(seed) ensures deterministic results for snapshot testing
+    let results = run_example(
+        ctx,
+        "Example 6: Sampling in JOINs",
+        r#"SELECT t1.column1, t2.column1, t1.column2, t2.column2
+           FROM sample_data t1 TABLESAMPLE (0.7) REPEATABLE(789)
+           JOIN sample_data t2 TABLESAMPLE (0.7) REPEATABLE(123)
+           ON t1.column1 = t2.column1"#,
+    )
+    .await?;
+    assert_snapshot!(results, @r"
+    +---------+---------+---------+---------+
+    | column1 | column1 | column2 | column2 |
+    +---------+---------+---------+---------+
+    | 2       | 2       | row_2   | row_2   |
+    | 5       | 5       | row_5   | row_5   |
+    | 7       | 7       | row_7   | row_7   |
+    | 8       | 8       | row_8   | row_8   |
+    | 10      | 10      | row_10  | row_10  |
+    +---------+---------+---------+---------+
+    ");
+
+    Ok(())
+}
+
+/// Helper to run a single example query and capture results.
+async fn run_example(ctx: &SessionContext, title: &str, sql: &str) -> Result<String> {
+    println!("{title}:\n{sql}\n");
+    let df = ctx.sql(sql).await?;
+    println!("{}\n", df.logical_plan().display_indent());
+
+    let batches = df.collect().await?;
+    let results = arrow::util::pretty::pretty_format_batches(&batches)?.to_string();
+    println!("{results}\n");
+
+    Ok(results)
+}
+
+/// Register test data: 10 rows with column1=1..10 and column2="row_1".."row_10"
+fn register_sample_data(ctx: &SessionContext) -> Result<()> {
+    let column1: ArrayRef = Arc::new(Int32Array::from((1..=10).collect::<Vec<i32>>()));
+    let column2: ArrayRef = Arc::new(StringArray::from(
+        (1..=10).map(|i| format!("row_{i}")).collect::<Vec<_>>(),
+    ));
+    let batch =
+        RecordBatch::try_from_iter(vec![("column1", column1), ("column2", column2)])?;
+    ctx.register_batch("sample_data", batch)?;
+    Ok(())
+}
+
+// ============================================================================
+// Logical Planning: TableSamplePlanner + TableSamplePlanNode
+// ============================================================================
+
+/// Relation planner that intercepts `TABLESAMPLE` clauses in SQL and creates
+/// [`TableSamplePlanNode`] logical nodes.
+#[derive(Debug)]
+struct TableSamplePlanner;
+
+impl RelationPlanner for TableSamplePlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        // Only handle Table relations with TABLESAMPLE clause
+        let TableFactor::Table {
+            sample: Some(sample),
+            alias,
+            name,
+            args,
+            with_hints,
+            version,
+            with_ordinality,
+            partitions,
+            json_path,
+            index_hints,
+        } = relation
+        else {
+            return Ok(RelationPlanning::Original(relation));
+        };
+
+        // Extract sample spec (handles both before/after alias positions)
+        let sample = match sample {
+            ast::TableSampleKind::BeforeTableAlias(s)
+            | ast::TableSampleKind::AfterTableAlias(s) => s,
+        };
+
+        // Validate sampling method
+        if let Some(method) = &sample.name
+            && *method != TableSampleMethod::Bernoulli
+            && *method != TableSampleMethod::Row
+        {
+            return not_impl_err!(
+                "Sampling method {} is not supported (only BERNOULLI and ROW)",
+                method
+            );
+        }
+
+        // Offset sampling (ClickHouse-style) not supported
+        if sample.offset.is_some() {
+            return not_impl_err!(
+                "TABLESAMPLE with OFFSET is not supported (requires total row count)"
+            );
+        }
+
+        // Parse optional REPEATABLE seed
+        let seed = sample
+            .seed
+            .map(|s| {
+                s.value.to_string().parse::<u64>().map_err(|_| {
+                    plan_datafusion_err!("REPEATABLE seed must be an integer")
+                })
+            })
+            .transpose()?;
+
+        // Plan the underlying table without the sample clause
+        let base_relation = TableFactor::Table {
+            sample: None,
+            alias: alias.clone(),
+            name,
+            args,
+            with_hints,
+            version,
+            with_ordinality,
+            partitions,
+            json_path,
+            index_hints,
+        };
+        let input = context.plan(base_relation)?;
+
+        // Handle bucket sampling (Hive-style: TABLESAMPLE(BUCKET x OUT OF y))
+        if let Some(bucket) = sample.bucket {
+            if bucket.on.is_some() {
+                return not_impl_err!(
+                    "TABLESAMPLE BUCKET with ON clause requires CLUSTERED BY table"
+                );
+            }
+            let bucket_num: u64 =
+                bucket.bucket.to_string().parse().map_err(|_| {
+                    plan_datafusion_err!("bucket number must be an integer")
+                })?;
+            let total: u64 =
+                bucket.total.to_string().parse().map_err(|_| {
+                    plan_datafusion_err!("bucket total must be an integer")
+                })?;
+
+            let fraction = bucket_num as f64 / total as f64;
+            let plan = TableSamplePlanNode::new(input, fraction, seed).into_plan();
+            return Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)));
+        }
+
+        // Handle quantity-based sampling
+        let Some(quantity) = sample.quantity else {
+            return plan_err!(
+                "TABLESAMPLE requires a quantity (percentage, fraction, or row count)"
+            );
+        };
+
+        match quantity.unit {
+            // TABLESAMPLE (N ROWS) - exact row limit
+            Some(TableSampleUnit::Rows) => {
+                let rows = parse_quantity::<i64>(&quantity.value)?;
+                if rows < 0 {
+                    return plan_err!("row count must be non-negative, got {}", rows);
+                }
+                let plan = LogicalPlanBuilder::from(input)
+                    .limit(0, Some(rows as usize))?
+                    .build()?;
+                Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+            }
+
+            // TABLESAMPLE (N PERCENT) - percentage sampling
+            Some(TableSampleUnit::Percent) => {
+                let percent = parse_quantity::<f64>(&quantity.value)?;
+                let fraction = percent / 100.0;
+                let plan = TableSamplePlanNode::new(input, fraction, seed).into_plan();
+                Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+            }
+
+            // TABLESAMPLE (N) - fraction if <1.0, row limit if >=1.0
+            None => {
+                let value = parse_quantity::<f64>(&quantity.value)?;
+                if value < 0.0 {
+                    return plan_err!("sample value must be non-negative, got {}", value);
+                }
+                let plan = if value >= 1.0 {
+                    // Interpret as row limit
+                    LogicalPlanBuilder::from(input)
+                        .limit(0, Some(value as usize))?
+                        .build()?
+                } else {
+                    // Interpret as fraction
+                    TableSamplePlanNode::new(input, value, seed).into_plan()
+                };
+                Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+            }
+        }
+    }
+}
+
+/// Parse a SQL expression as a numeric value (supports basic arithmetic).
+fn parse_quantity<T>(expr: &ast::Expr) -> Result<T>
+where
+    T: FromStr + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T>,
+{
+    eval_numeric_expr(expr)
+        .ok_or_else(|| plan_datafusion_err!("invalid numeric expression: {:?}", expr))
+}
+
+/// Recursively evaluate numeric SQL expressions.
+fn eval_numeric_expr<T>(expr: &ast::Expr) -> Option<T>
+where
+    T: FromStr + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T>,
+{
+    match expr {
+        ast::Expr::Value(v) => match &v.value {
+            ast::Value::Number(n, _) => n.to_string().parse().ok(),
+            _ => None,
+        },
+        ast::Expr::BinaryOp { left, op, right } => {
+            let l = eval_numeric_expr::<T>(left)?;
+            let r = eval_numeric_expr::<T>(right)?;
+            match op {
+                ast::BinaryOperator::Plus => Some(l + r),
+                ast::BinaryOperator::Minus => Some(l - r),
+                ast::BinaryOperator::Multiply => Some(l * r),
+                ast::BinaryOperator::Divide => Some(l / r),
+                _ => None,
+            }
+        }
+        _ => None,
+    }
+}
+
+/// Custom logical plan node representing a TABLESAMPLE operation.
+///
+/// Stores sampling parameters (bounds, seed) and wraps the input plan.
+/// Gets converted to [`SampleExec`] during physical planning.
+#[derive(Debug, Clone, Hash, Eq, PartialEq, PartialOrd)]
+struct TableSamplePlanNode {
+    input: LogicalPlan,
+    lower_bound: HashableF64,
+    upper_bound: HashableF64,
+    seed: u64,
+}
+
+impl TableSamplePlanNode {
+    /// Create a new sampling node with the given fraction (0.0 to 1.0).
+    fn new(input: LogicalPlan, fraction: f64, seed: Option<u64>) -> Self {
+        Self {
+            input,
+            lower_bound: HashableF64(0.0),
+            upper_bound: HashableF64(fraction),
+            seed: seed.unwrap_or_else(rand::random),
+        }
+    }
+
+    /// Wrap this node in a LogicalPlan::Extension.
+    fn into_plan(self) -> LogicalPlan {
+        LogicalPlan::Extension(Extension {
+            node: Arc::new(self),
+        })
+    }
+}
+
+impl UserDefinedLogicalNodeCore for TableSamplePlanNode {
+    fn name(&self) -> &str {
+        "TableSample"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.input.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Sample: bounds=[{}, {}], seed={}",
+            self.lower_bound.0, self.upper_bound.0, self.seed
+        )
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        Ok(Self {
+            input: inputs.swap_remove(0),
+            lower_bound: self.lower_bound,
+            upper_bound: self.upper_bound,
+            seed: self.seed,
+        })
+    }
+}
+
+/// Wrapper for f64 that implements Hash and Eq (required for LogicalPlan).
+#[derive(Debug, Clone, Copy, PartialOrd)]
+struct HashableF64(f64);
+
+impl PartialEq for HashableF64 {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.to_bits() == other.0.to_bits()
+    }
+}
+
+impl Eq for HashableF64 {}
+
+impl Hash for HashableF64 {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.0.to_bits().hash(state);
+    }
+}
+
+// ============================================================================
+// Physical Planning: TableSampleQueryPlanner + TableSampleExtensionPlanner
+// ============================================================================
+
+/// Custom query planner that registers [`TableSampleExtensionPlanner`] to
+/// convert [`TableSamplePlanNode`] into [`SampleExec`].
+#[derive(Debug)]
+struct TableSampleQueryPlanner;
+
+#[async_trait]
+impl QueryPlanner for TableSampleQueryPlanner {
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+        session_state: &SessionState,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let planner = DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
+            TableSampleExtensionPlanner,
+        )]);
+        planner
+            .create_physical_plan(logical_plan, session_state)
+            .await
+    }
+}
+
+/// Extension planner that converts [`TableSamplePlanNode`] to [`SampleExec`].
+struct TableSampleExtensionPlanner;
+
+#[async_trait]
+impl ExtensionPlanner for TableSampleExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        let Some(sample_node) = node.as_any().downcast_ref::<TableSamplePlanNode>()
+        else {
+            return Ok(None);
+        };
+
+        let exec = SampleExec::try_new(
+            Arc::clone(&physical_inputs[0]),
+            sample_node.lower_bound.0,
+            sample_node.upper_bound.0,
+            sample_node.seed,
+        )?;
+        Ok(Some(Arc::new(exec)))
+    }
+}
+
+// ============================================================================
+// Physical Execution: SampleExec + BernoulliSampler
+// ============================================================================
+
+/// Physical execution plan that samples rows from its input using Bernoulli sampling.
+///
+/// Each row is independently selected with probability `(upper_bound - lower_bound)`
+/// and appears at most once.
+#[derive(Debug, Clone)]
+pub struct SampleExec {
+    input: Arc<dyn ExecutionPlan>,
+    lower_bound: f64,
+    upper_bound: f64,
+    seed: u64,
+    metrics: ExecutionPlanMetricsSet,
+    cache: PlanProperties,
+}
+
+impl SampleExec {
+    /// Create a new SampleExec with Bernoulli sampling (without replacement).
+    ///
+    /// # Arguments
+    /// * `input` - The input execution plan
+    /// * `lower_bound` - Lower bound of sampling range (typically 0.0)
+    /// * `upper_bound` - Upper bound of sampling range (0.0 to 1.0)
+    /// * `seed` - Random seed for reproducible sampling
+    pub fn try_new(
+        input: Arc<dyn ExecutionPlan>,
+        lower_bound: f64,
+        upper_bound: f64,
+        seed: u64,
+    ) -> Result<Self> {
+        if lower_bound < 0.0 || upper_bound > 1.0 || lower_bound > upper_bound {
+            return internal_err!(
+                "Sampling bounds must satisfy 0.0 <= lower <= upper <= 1.0, got [{}, {}]",
+                lower_bound,
+                upper_bound
+            );
+        }
+
+        let cache = PlanProperties::new(
+            EquivalenceProperties::new(input.schema()),
+            input.properties().partitioning.clone(),
+            input.properties().emission_type,
+            input.properties().boundedness,
+        );
+
+        Ok(Self {
+            input,
+            lower_bound,
+            upper_bound,
+            seed,
+            metrics: ExecutionPlanMetricsSet::new(),
+            cache,
+        })
+    }
+
+    /// Create a sampler for the given partition.
+    fn create_sampler(&self, partition: usize) -> BernoulliSampler {
+        let seed = self.seed.wrapping_add(partition as u64);
+        BernoulliSampler::new(self.lower_bound, self.upper_bound, seed)
+    }
+}
+
+impl DisplayAs for SampleExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> fmt::Result {
+        write!(
+            f,
+            "SampleExec: bounds=[{}, {}], seed={}",
+            self.lower_bound, self.upper_bound, self.seed
+        )
+    }
+}
+
+impl ExecutionPlan for SampleExec {
+    fn name(&self) -> &'static str {
+        "SampleExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.cache
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        // Sampling preserves row order (rows are filtered, not reordered)
+        vec![true]
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        mut children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(Self::try_new(
+            children.swap_remove(0),
+            self.lower_bound,
+            self.upper_bound,
+            self.seed,
+        )?))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        Ok(Box::pin(SampleStream {
+            input: self.input.execute(partition, context)?,
+            sampler: self.create_sampler(partition),
+            metrics: BaselineMetrics::new(&self.metrics, partition),
+        }))
+    }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        let mut stats = self.input.partition_statistics(partition)?;
+        let ratio = self.upper_bound - self.lower_bound;
+
+        // Scale statistics by sampling ratio (inexact due to randomness)
+        stats.num_rows = stats
+            .num_rows
+            .map(|n| (n as f64 * ratio) as usize)
+            .to_inexact();
+        stats.total_byte_size = stats
+            .total_byte_size
+            .map(|n| (n as f64 * ratio) as usize)
+            .to_inexact();
+
+        Ok(stats)
+    }
+}
+
+/// Bernoulli sampler: includes each row with probability `(upper - lower)`.
+/// This is sampling **without replacement** - each row appears at most once.
+struct BernoulliSampler {
+    lower_bound: f64,
+    upper_bound: f64,
+    rng: StdRng,
+}
+
+impl BernoulliSampler {
+    fn new(lower_bound: f64, upper_bound: f64, seed: u64) -> Self {
+        Self {
+            lower_bound,
+            upper_bound,
+            rng: StdRng::seed_from_u64(seed),
+        }
+    }
+
+    fn sample(&mut self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let range = self.upper_bound - self.lower_bound;
+        if range <= 0.0 {
+            return Ok(RecordBatch::new_empty(batch.schema()));
+        }
+
+        // Select rows where random value falls in [lower, upper)
+        let indices: Vec<u32> = (0..batch.num_rows())
+            .filter(|_| {
+                let r: f64 = self.rng.random();
+                r >= self.lower_bound && r < self.upper_bound
+            })
+            .map(|i| i as u32)
+            .collect();
+
+        if indices.is_empty() {
+            return Ok(RecordBatch::new_empty(batch.schema()));
+        }
+
+        compute::take_record_batch(batch, &UInt32Array::from(indices))
+            .map_err(DataFusionError::from)
+    }
+}
+
+/// Stream adapter that applies sampling to each batch.
+struct SampleStream {
+    input: SendableRecordBatchStream,
+    sampler: BernoulliSampler,
+    metrics: BaselineMetrics,
+}
+
+impl Stream for SampleStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        match ready!(self.input.poll_next_unpin(cx)) {
+            Some(Ok(batch)) => {
+                let elapsed = self.metrics.elapsed_compute().clone();
+                let _timer = elapsed.timer();
+                let result = self.sampler.sample(&batch);
+                Poll::Ready(Some(result.record_output(&self.metrics)))
+            }
+            Some(Err(e)) => Poll::Ready(Some(Err(e))),
+            None => Poll::Ready(None),
+        }
+    }
+}
+
+impl RecordBatchStream for SampleStream {
+    fn schema(&self) -> SchemaRef {
+        self.input.schema()
+    }
+}
diff --git a/datafusion-examples/examples/sql_dialect.rs b/datafusion-examples/examples/sql_dialect.rs
deleted file mode 100644
index 20b515506f3b4..0000000000000
--- a/datafusion-examples/examples/sql_dialect.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Display;
-
-use datafusion::error::{DataFusionError, Result};
-use datafusion::sql::{
-    parser::{CopyToSource, CopyToStatement, DFParser, DFParserBuilder, Statement},
-    sqlparser::{keywords::Keyword, tokenizer::Token},
-};
-
-/// This example demonstrates how to use the DFParser to parse a statement in a custom way
-///
-/// This technique can be used to implement a custom SQL dialect, for example.
-#[tokio::main]
-async fn main() -> Result<()> {
-    let mut my_parser =
-        MyParser::new("COPY source_table TO 'file.fasta' STORED AS FASTA")?;
-
-    let my_statement = my_parser.parse_statement()?;
-
-    match my_statement {
-        MyStatement::DFStatement(s) => println!("df: {s}"),
-        MyStatement::MyCopyTo(s) => println!("my_copy: {s}"),
-    }
-
-    Ok(())
-}
-
-/// Here we define a Parser for our new SQL dialect that wraps the existing `DFParser`
-struct MyParser<'a> {
-    df_parser: DFParser<'a>,
-}
-
-impl<'a> MyParser<'a> {
-    fn new(sql: &'a str) -> Result<Self> {
-        let df_parser = DFParserBuilder::new(sql).build()?;
-        Ok(Self { df_parser })
-    }
-
-    /// Returns true if the next token is `COPY` keyword, false otherwise
-    fn is_copy(&self) -> bool {
-        matches!(
-            self.df_parser.parser.peek_token().token,
-            Token::Word(w) if w.keyword == Keyword::COPY
-        )
-    }
-
-    /// This is the entry point to our parser -- it handles `COPY` statements specially
-    /// but otherwise delegates to the existing DataFusion parser.
-    pub fn parse_statement(&mut self) -> Result<MyStatement, DataFusionError> {
-        if self.is_copy() {
-            self.df_parser.parser.next_token(); // COPY
-            let df_statement = self.df_parser.parse_copy()?;
-
-            if let Statement::CopyTo(s) = df_statement {
-                Ok(MyStatement::from(s))
-            } else {
-                Ok(MyStatement::DFStatement(Box::from(df_statement)))
-            }
-        } else {
-            let df_statement = self.df_parser.parse_statement()?;
-            Ok(MyStatement::from(df_statement))
-        }
-    }
-}
-
-enum MyStatement {
-    DFStatement(Box<Statement>),
-    MyCopyTo(MyCopyToStatement),
-}
-
-impl Display for MyStatement {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            MyStatement::DFStatement(s) => write!(f, "{s}"),
-            MyStatement::MyCopyTo(s) => write!(f, "{s}"),
-        }
-    }
-}
-
-impl From<Statement> for MyStatement {
-    fn from(s: Statement) -> Self {
-        Self::DFStatement(Box::from(s))
-    }
-}
-
-impl From<CopyToStatement> for MyStatement {
-    fn from(s: CopyToStatement) -> Self {
-        if s.stored_as == Some("FASTA".to_string()) {
-            Self::MyCopyTo(MyCopyToStatement::from(s))
-        } else {
-            Self::DFStatement(Box::from(Statement::CopyTo(s)))
-        }
-    }
-}
-
-struct MyCopyToStatement {
-    pub source: CopyToSource,
-    pub target: String,
-}
-
-impl From<CopyToStatement> for MyCopyToStatement {
-    fn from(s: CopyToStatement) -> Self {
-        Self {
-            source: s.source,
-            target: s.target,
-        }
-    }
-}
-
-impl Display for MyCopyToStatement {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "COPY {} TO '{}' STORED AS FASTA",
-            self.source, self.target
-        )
-    }
-}
diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_ops/analysis.rs
similarity index 98%
rename from datafusion-examples/examples/sql_analysis.rs
rename to datafusion-examples/examples/sql_ops/analysis.rs
index 4ff669faf1d0c..4243a2927865b 100644
--- a/datafusion-examples/examples/sql_analysis.rs
+++ b/datafusion-examples/examples/sql_ops/analysis.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example shows how to use the structures that DataFusion provides to perform
 //! Analysis on SQL queries and their plans.
 //!
@@ -23,8 +25,8 @@
 
 use std::sync::Arc;
 
-use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion::common::Result;
+use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion::logical_expr::LogicalPlan;
 use datafusion::{
     datasource::MemTable,
@@ -32,141 +34,9 @@ use datafusion::{
 };
 use test_utils::tpcds::tpcds_schemas;
 
-/// Counts the total number of joins in a plan
-fn total_join_count(plan: &LogicalPlan) -> usize {
-    let mut total = 0;
-
-    // We can use the TreeNode API to walk over a LogicalPlan.
-    plan.apply(|node| {
-        // if we encounter a join we update the running count
-        if matches!(node, LogicalPlan::Join(_)) {
-            total += 1;
-        }
-        Ok(TreeNodeRecursion::Continue)
-    })
-    .unwrap();
-
-    total
-}
-
-/// Counts the total number of joins in a plan and collects every join tree in
-/// the plan with their respective join count.
-///
-/// Join Tree Definition: the largest subtree consisting entirely of joins
-///
-/// For example, this plan:
-///
-/// ```text
-///         JOIN
-///         /  \
-///       A   JOIN
-///            /  \
-///           B    C
-/// ```
-///
-/// has a single join tree `(A-B-C)` which will result in `(2, [2])`
-///
-/// This plan:
-///
-/// ```text
-///         JOIN
-///         /  \
-///       A   GROUP
-///              |
-///             JOIN
-///             /  \
-///            B    C
-/// ```
-///
-/// Has two join trees `(A-, B-C)` which will result in `(2, [1, 1])`
-fn count_trees(plan: &LogicalPlan) -> (usize, Vec<usize>) {
-    // this works the same way as `total_count`, but now when we encounter a Join
-    // we try to collect it's entire tree
-    let mut to_visit = vec![plan];
-    let mut total = 0;
-    let mut groups = vec![];
-
-    while let Some(node) = to_visit.pop() {
-        // if we encounter a join, we know were at the root of the tree
-        // count this tree and recurse on it's inputs
-        if matches!(node, LogicalPlan::Join(_)) {
-            let (group_count, inputs) = count_tree(node);
-            total += group_count;
-            groups.push(group_count);
-            to_visit.extend(inputs);
-        } else {
-            to_visit.extend(node.inputs());
-        }
-    }
-
-    (total, groups)
-}
-
-/// Count the entire join tree and return its inputs using TreeNode API
-///
-/// For example, if this function receives following plan:
-///
-/// ```text
-///         JOIN
-///         /  \
-///       A   GROUP
-///              |
-///             JOIN
-///             /  \
-///            B    C
-/// ```
-///
-/// It will return `(1, [A, GROUP])`
-fn count_tree(join: &LogicalPlan) -> (usize, Vec<&LogicalPlan>) {
-    let mut inputs = Vec::new();
-    let mut total = 0;
-
-    join.apply(|node| {
-        // Some extra knowledge:
-        //
-        // optimized plans have their projections pushed down as far as
-        // possible, which sometimes results in a projection going in between 2
-        // subsequent joins giving the illusion these joins are not "related",
-        // when in fact they are.
-        //
-        // This plan:
-        //   JOIN
-        //   /  \
-        // A   PROJECTION
-        //        |
-        //       JOIN
-        //       /  \
-        //      B    C
-        //
-        // is the same as:
-        //
-        //   JOIN
-        //   /  \
-        // A   JOIN
-        //     /  \
-        //    B    C
-        // we can continue the recursion in this case
-        if let LogicalPlan::Projection(_) = node {
-            return Ok(TreeNodeRecursion::Continue);
-        }
-
-        // any join we count
-        if matches!(node, LogicalPlan::Join(_)) {
-            total += 1;
-            Ok(TreeNodeRecursion::Continue)
-        } else {
-            inputs.push(node);
-            // skip children of input node
-            Ok(TreeNodeRecursion::Jump)
-        }
-    })
-    .unwrap();
-
-    (total, inputs)
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Demonstrates how to analyze a SQL query by counting JOINs and identifying
+/// join-trees using DataFusion’s `LogicalPlan` and `TreeNode` API.
+pub async fn analysis() -> Result<()> {
     // To show how we can count the joins in a sql query we'll be using query 88
     // from the TPC-DS benchmark.
     //
@@ -310,3 +180,136 @@ from
 
     Ok(())
 }
+
+/// Counts the total number of joins in a plan
+fn total_join_count(plan: &LogicalPlan) -> usize {
+    let mut total = 0;
+
+    // We can use the TreeNode API to walk over a LogicalPlan.
+    plan.apply(|node| {
+        // if we encounter a join we update the running count
+        if matches!(node, LogicalPlan::Join(_)) {
+            total += 1;
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })
+    .unwrap();
+
+    total
+}
+
+/// Counts the total number of joins in a plan and collects every join tree in
+/// the plan with their respective join count.
+///
+/// Join Tree Definition: the largest subtree consisting entirely of joins
+///
+/// For example, this plan:
+///
+/// ```text
+///         JOIN
+///         /  \
+///       A   JOIN
+///            /  \
+///           B    C
+/// ```
+///
+/// has a single join tree `(A-B-C)` which will result in `(2, [2])`
+///
+/// This plan:
+///
+/// ```text
+///         JOIN
+///         /  \
+///       A   GROUP
+///              |
+///             JOIN
+///             /  \
+///            B    C
+/// ```
+///
+/// Has two join trees `(A-, B-C)` which will result in `(2, [1, 1])`
+fn count_trees(plan: &LogicalPlan) -> (usize, Vec<usize>) {
+    // this works the same way as `total_count`, but now when we encounter a Join
+    // we try to collect it's entire tree
+    let mut to_visit = vec![plan];
+    let mut total = 0;
+    let mut groups = vec![];
+
+    while let Some(node) = to_visit.pop() {
+        // if we encounter a join, we know were at the root of the tree
+        // count this tree and recurse on it's inputs
+        if matches!(node, LogicalPlan::Join(_)) {
+            let (group_count, inputs) = count_tree(node);
+            total += group_count;
+            groups.push(group_count);
+            to_visit.extend(inputs);
+        } else {
+            to_visit.extend(node.inputs());
+        }
+    }
+
+    (total, groups)
+}
+
+/// Count the entire join tree and return its inputs using TreeNode API
+///
+/// For example, if this function receives following plan:
+///
+/// ```text
+///         JOIN
+///         /  \
+///       A   GROUP
+///              |
+///             JOIN
+///             /  \
+///            B    C
+/// ```
+///
+/// It will return `(1, [A, GROUP])`
+fn count_tree(join: &LogicalPlan) -> (usize, Vec<&LogicalPlan>) {
+    let mut inputs = Vec::new();
+    let mut total = 0;
+
+    join.apply(|node| {
+        // Some extra knowledge:
+        //
+        // optimized plans have their projections pushed down as far as
+        // possible, which sometimes results in a projection going in between 2
+        // subsequent joins giving the illusion these joins are not "related",
+        // when in fact they are.
+        //
+        // This plan:
+        //   JOIN
+        //   /  \
+        // A   PROJECTION
+        //        |
+        //       JOIN
+        //       /  \
+        //      B    C
+        //
+        // is the same as:
+        //
+        //   JOIN
+        //   /  \
+        // A   JOIN
+        //     /  \
+        //    B    C
+        // we can continue the recursion in this case
+        if let LogicalPlan::Projection(_) = node {
+            return Ok(TreeNodeRecursion::Continue);
+        }
+
+        // any join we count
+        if matches!(node, LogicalPlan::Join(_)) {
+            total += 1;
+            Ok(TreeNodeRecursion::Continue)
+        } else {
+            inputs.push(node);
+            // skip children of input node
+            Ok(TreeNodeRecursion::Jump)
+        }
+    })
+    .unwrap();
+
+    (total, inputs)
+}
diff --git a/datafusion-examples/examples/sql_ops/custom_sql_parser.rs b/datafusion-examples/examples/sql_ops/custom_sql_parser.rs
new file mode 100644
index 0000000000000..308a0de62a242
--- /dev/null
+++ b/datafusion-examples/examples/sql_ops/custom_sql_parser.rs
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This example demonstrates extending the DataFusion SQL parser to support
+//! custom DDL statements, specifically `CREATE EXTERNAL CATALOG`.
+//!
+//! ### Custom Syntax
+//! ```sql
+//! CREATE EXTERNAL CATALOG my_catalog
+//! STORED AS ICEBERG
+//! LOCATION 's3://my-bucket/warehouse/'
+//! OPTIONS (
+//!   'region' = 'us-west-2'
+//! );
+//! ```
+//!
+//! Note: For the purpose of this example, we use `local://workspace/` to
+//! automatically discover and register files from the project's test data.
+
+use std::collections::HashMap;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use datafusion::catalog::{
+    CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider,
+    TableProviderFactory,
+};
+use datafusion::datasource::listing_table_factory::ListingTableFactory;
+use datafusion::error::{DataFusionError, Result};
+use datafusion::prelude::SessionContext;
+use datafusion::sql::{
+    parser::{DFParser, DFParserBuilder, Statement},
+    sqlparser::{
+        ast::{ObjectName, Value},
+        keywords::Keyword,
+        tokenizer::Token,
+    },
+};
+use datafusion_common::{DFSchema, TableReference, plan_datafusion_err, plan_err};
+use datafusion_expr::CreateExternalTable;
+use futures::StreamExt;
+use insta::assert_snapshot;
+use object_store::ObjectStore;
+use object_store::local::LocalFileSystem;
+
+/// Entry point for the example.
+pub async fn custom_sql_parser() -> Result<()> {
+    // Use standard Parquet testing data as our "external" source.
+    let base_path = datafusion::common::test_util::parquet_test_data();
+    let base_path = std::path::Path::new(&base_path).canonicalize()?;
+
+    // Make the path relative to the workspace root
+    let workspace_root = workspace_root();
+    let location = base_path
+        .strip_prefix(&workspace_root)
+        .map(|p| p.to_string_lossy().to_string())
+        .unwrap_or_else(|_| base_path.to_string_lossy().to_string());
+
+    let create_catalog_sql = format!(
+        "CREATE EXTERNAL CATALOG parquet_testing
+         STORED AS parquet
+         LOCATION 'local://workspace/{location}'
+         OPTIONS (
+           'schema_name' = 'staged_data',
+           'format.pruning' = 'true'
+         )"
+    );
+
+    // =========================================================================
+    // Part 1: Standard DataFusion parser rejects the custom DDL
+    // =========================================================================
+    println!("=== Part 1: Standard DataFusion Parser ===\n");
+    println!("Parsing: {}\n", create_catalog_sql.trim());
+
+    let ctx_standard = SessionContext::new();
+    let err = ctx_standard
+        .sql(&create_catalog_sql)
+        .await
+        .expect_err("Expected the standard parser to reject CREATE EXTERNAL CATALOG (custom DDL syntax)");
+
+    println!("Error: {err}\n");
+    assert_snapshot!(err.to_string(), @r#"SQL error: ParserError("Expected: TABLE, found: CATALOG at Line: 1, Column: 17")"#);
+
+    // =========================================================================
+    // Part 2: Custom parser handles the statement
+    // =========================================================================
+    println!("=== Part 2: Custom Parser ===\n");
+    println!("Parsing: {}\n", create_catalog_sql.trim());
+
+    let ctx = SessionContext::new();
+
+    let mut parser = CustomParser::new(&create_catalog_sql)?;
+    let statement = parser.parse_statement()?;
+    match statement {
+        CustomStatement::CreateExternalCatalog(stmt) => {
+            handle_create_external_catalog(&ctx, stmt).await?;
+        }
+        CustomStatement::DFStatement(_) => {
+            panic!("Expected CreateExternalCatalog statement");
+        }
+    }
+
+    // Query a table from the registered catalog
+    let query_sql = "SELECT id, bool_col, tinyint_col FROM parquet_testing.staged_data.alltypes_plain LIMIT 5";
+    println!("Executing: {query_sql}\n");
+
+    let results = execute_sql(&ctx, query_sql).await?;
+    println!("{results}");
+    assert_snapshot!(results, @r"
+    +----+----------+-------------+
+    | id | bool_col | tinyint_col |
+    +----+----------+-------------+
+    | 4  | true     | 0           |
+    | 5  | false    | 1           |
+    | 6  | true     | 0           |
+    | 7  | false    | 1           |
+    | 2  | true     | 0           |
+    +----+----------+-------------+
+    ");
+
+    Ok(())
+}
+
+/// Execute SQL and return formatted results.
+async fn execute_sql(ctx: &SessionContext, sql: &str) -> Result<String> {
+    let batches = ctx.sql(sql).await?.collect().await?;
+    Ok(arrow::util::pretty::pretty_format_batches(&batches)?.to_string())
+}
+
+/// Custom handler for the `CREATE EXTERNAL CATALOG` statement.
+async fn handle_create_external_catalog(
+    ctx: &SessionContext,
+    stmt: CreateExternalCatalog,
+) -> Result<()> {
+    let factory = ListingTableFactory::new();
+    let catalog = Arc::new(MemoryCatalogProvider::new());
+    let schema = Arc::new(MemorySchemaProvider::new());
+
+    // Extract options
+    let mut schema_name = "public".to_string();
+    let mut table_options = HashMap::new();
+
+    for (k, v) in stmt.options {
+        let val_str = match v {
+            Value::SingleQuotedString(ref s) | Value::DoubleQuotedString(ref s) => {
+                s.to_string()
+            }
+            Value::Number(ref n, _) => n.to_string(),
+            Value::Boolean(b) => b.to_string(),
+            _ => v.to_string(),
+        };
+
+        if k == "schema_name" {
+            schema_name = val_str;
+        } else {
+            table_options.insert(k, val_str);
+        }
+    }
+
+    println!("  Target Catalog: {}", stmt.name);
+    println!("  Data Location: {}", stmt.location);
+    println!("  Resolved Schema: {schema_name}");
+
+    // Register a local object store rooted at the workspace root.
+    // We use a specific authority 'workspace' to ensure consistent resolution.
+    let store = Arc::new(LocalFileSystem::new_with_prefix(workspace_root())?);
+    let store_url = url::Url::parse("local://workspace").unwrap();
+    ctx.register_object_store(&store_url, Arc::clone(&store) as _);
+
+    let target_ext = format!(".{}", stmt.catalog_type.to_lowercase());
+
+    // For 'local://workspace/parquet-testing/data', the path is 'parquet-testing/data'.
+    let path_str = stmt
+        .location
+        .strip_prefix("local://workspace/")
+        .unwrap_or(&stmt.location);
+    let prefix = object_store::path::Path::from(path_str);
+
+    // Discover data files using the ObjectStore API
+    let mut table_count = 0;
+    let mut list_stream = store.list(Some(&prefix));
+
+    while let Some(meta) = list_stream.next().await {
+        let meta = meta?;
+        let path = &meta.location;
+
+        if path.as_ref().ends_with(&target_ext) {
+            let name = std::path::Path::new(path.as_ref())
+                .file_stem()
+                .unwrap()
+                .to_string_lossy()
+                .to_string();
+
+            let table_url = format!("local://workspace/{path}");
+
+            let cmd = CreateExternalTable::builder(
+                TableReference::bare(name.clone()),
+                table_url,
+                stmt.catalog_type.clone(),
+                Arc::new(DFSchema::empty()),
+            )
+            .with_options(table_options.clone())
+            .build();
+
+            match factory.create(&ctx.state(), &cmd).await {
+                Ok(table) => {
+                    schema.register_table(name, table)?;
+                    table_count += 1;
+                }
+                Err(e) => {
+                    eprintln!("Failed to create table {name}: {e}");
+                }
+            }
+        }
+    }
+    println!("  Registered {table_count} tables into schema: {schema_name}");
+
+    catalog.register_schema(&schema_name, schema)?;
+    ctx.register_catalog(stmt.name.to_string(), catalog);
+
+    Ok(())
+}
+
+/// Possible statements returned by our custom parser.
+#[derive(Debug, Clone)]
+pub enum CustomStatement {
+    /// Standard DataFusion statement
+    DFStatement(Box<Statement>),
+    /// Custom `CREATE EXTERNAL CATALOG` statement
+    CreateExternalCatalog(CreateExternalCatalog),
+}
+
+/// Data structure for `CREATE EXTERNAL CATALOG`.
+#[derive(Debug, Clone)]
+pub struct CreateExternalCatalog {
+    pub name: ObjectName,
+    pub catalog_type: String,
+    pub location: String,
+    pub options: Vec<(String, Value)>,
+}
+
+impl Display for CustomStatement {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::DFStatement(s) => write!(f, "{s}"),
+            Self::CreateExternalCatalog(s) => write!(f, "{s}"),
+        }
+    }
+}
+
+impl Display for CreateExternalCatalog {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "CREATE EXTERNAL CATALOG {} STORED AS {} LOCATION '{}'",
+            self.name, self.catalog_type, self.location
+        )?;
+        if !self.options.is_empty() {
+            write!(f, " OPTIONS (")?;
+            for (i, (k, v)) in self.options.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "'{k}' = '{v}'")?;
+            }
+            write!(f, ")")?;
+        }
+        Ok(())
+    }
+}
+
+/// A parser that extends `DFParser` with custom syntax.
+struct CustomParser<'a> {
+    df_parser: DFParser<'a>,
+}
+
+impl<'a> CustomParser<'a> {
+    fn new(sql: &'a str) -> Result<Self> {
+        Ok(Self {
+            df_parser: DFParserBuilder::new(sql).build()?,
+        })
+    }
+
+    pub fn parse_statement(&mut self) -> Result<CustomStatement> {
+        if self.is_create_external_catalog() {
+            return self.parse_create_external_catalog();
+        }
+        Ok(CustomStatement::DFStatement(Box::new(
+            self.df_parser.parse_statement()?,
+        )))
+    }
+
+    fn is_create_external_catalog(&self) -> bool {
+        let t1 = &self.df_parser.parser.peek_nth_token(0).token;
+        let t2 = &self.df_parser.parser.peek_nth_token(1).token;
+        let t3 = &self.df_parser.parser.peek_nth_token(2).token;
+
+        matches!(t1, Token::Word(w) if w.keyword == Keyword::CREATE)
+            && matches!(t2, Token::Word(w) if w.keyword == Keyword::EXTERNAL)
+            && matches!(t3, Token::Word(w) if w.value.to_uppercase() == "CATALOG")
+    }
+
+    fn parse_create_external_catalog(&mut self) -> Result<CustomStatement> {
+        // Consume prefix tokens: CREATE EXTERNAL CATALOG
+        for _ in 0..3 {
+            self.df_parser.parser.next_token();
+        }
+
+        let name = self
+            .df_parser
+            .parser
+            .parse_object_name(false)
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+        let mut catalog_type = None;
+        let mut location = None;
+        let mut options = vec![];
+
+        while let Some(keyword) = self.df_parser.parser.parse_one_of_keywords(&[
+            Keyword::STORED,
+            Keyword::LOCATION,
+            Keyword::OPTIONS,
+        ]) {
+            match keyword {
+                Keyword::STORED => {
+                    if catalog_type.is_some() {
+                        return plan_err!("Duplicate STORED AS");
+                    }
+                    self.df_parser
+                        .parser
+                        .expect_keyword(Keyword::AS)
+                        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                    catalog_type = Some(
+                        self.df_parser
+                            .parser
+                            .parse_identifier()
+                            .map_err(|e| DataFusionError::External(Box::new(e)))?
+                            .value,
+                    );
+                }
+                Keyword::LOCATION => {
+                    if location.is_some() {
+                        return plan_err!("Duplicate LOCATION");
+                    }
+                    location = Some(
+                        self.df_parser
+                            .parser
+                            .parse_literal_string()
+                            .map_err(|e| DataFusionError::External(Box::new(e)))?,
+                    );
+                }
+                Keyword::OPTIONS => {
+                    if !options.is_empty() {
+                        return plan_err!("Duplicate OPTIONS");
+                    }
+                    options = self.parse_value_options()?;
+                }
+                _ => unreachable!(),
+            }
+        }
+
+        Ok(CustomStatement::CreateExternalCatalog(
+            CreateExternalCatalog {
+                name,
+                catalog_type: catalog_type
+                    .ok_or_else(|| plan_datafusion_err!("Missing STORED AS"))?,
+                location: location
+                    .ok_or_else(|| plan_datafusion_err!("Missing LOCATION"))?,
+                options,
+            },
+        ))
+    }
+
+    /// Parse options in the form: (key [=] value, key [=] value, ...)
+    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>> {
+        let mut options = vec![];
+        self.df_parser
+            .parser
+            .expect_token(&Token::LParen)
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+        loop {
+            let key = self.df_parser.parse_option_key()?;
+            // Support optional '=' between key and value
+            let _ = self.df_parser.parser.consume_token(&Token::Eq);
+            let value = self.df_parser.parse_option_value()?;
+            options.push((key, value));
+
+            let comma = self.df_parser.parser.consume_token(&Token::Comma);
+            if self.df_parser.parser.consume_token(&Token::RParen) {
+                break;
+            } else if !comma {
+                return plan_err!("Expected ',' or ')' in OPTIONS");
+            }
+        }
+        Ok(options)
+    }
+}
+
+/// Returns the workspace root directory (parent of datafusion-examples).
+fn workspace_root() -> std::path::PathBuf {
+    std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .expect("CARGO_MANIFEST_DIR should have a parent")
+        .to_path_buf()
+}
diff --git a/datafusion-examples/examples/sql_frontend.rs b/datafusion-examples/examples/sql_ops/frontend.rs
similarity index 98%
rename from datafusion-examples/examples/sql_frontend.rs
rename to datafusion-examples/examples/sql_ops/frontend.rs
index 1fc9ce24ecbb5..025fe47e75b07 100644
--- a/datafusion-examples/examples/sql_frontend.rs
+++ b/datafusion-examples/examples/sql_ops/frontend.rs
@@ -15,8 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::common::{plan_err, TableReference};
+use datafusion::common::{TableReference, plan_err};
 use datafusion::config::ConfigOptions;
 use datafusion::error::Result;
 use datafusion::logical_expr::{
@@ -44,7 +46,7 @@ use std::sync::Arc;
 ///
 /// In this example, we demonstrate how to use the lower level APIs directly,
 /// which only requires the `datafusion-sql` dependency.
-pub fn main() -> Result<()> {
+pub fn frontend() -> Result<()> {
     // First, we parse the SQL string. Note that we use the DataFusion
     // Parser, which wraps the `sqlparser-rs` SQL parser and adds DataFusion
     // specific syntax such as `CREATE EXTERNAL TABLE`
diff --git a/datafusion-examples/examples/sql_ops/main.rs b/datafusion-examples/examples/sql_ops/main.rs
new file mode 100644
index 0000000000000..8c3ac056698b7
--- /dev/null
+++ b/datafusion-examples/examples/sql_ops/main.rs
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # SQL Examples
+//!
+//! These examples demonstrate SQL operations in DataFusion.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example sql_ops -- [all|analysis|custom_sql_parser|frontend|query]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
+//! - `analysis` — analyse SQL queries with DataFusion structures
+//! - `custom_sql_parser` — implementing a custom SQL parser to extend DataFusion
+//! - `frontend` — create LogicalPlans (only) from sql strings
+//! - `query` — query data using SQL (in memory RecordBatches, local Parquet files)
+
+mod analysis;
+mod custom_sql_parser;
+mod frontend;
+mod query;
+
+use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
+
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
+enum ExampleKind {
+    All,
+    Analysis,
+    CustomSqlParser,
+    Frontend,
+    Query,
+}
+
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "sql_ops";
+
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
+    }
+
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::Analysis => analysis::analysis().await?,
+            ExampleKind::CustomSqlParser => {
+                custom_sql_parser::custom_sql_parser().await?
+            }
+            ExampleKind::Frontend => frontend::frontend()?,
+            ExampleKind::Query => query::query().await?,
+        }
+        Ok(())
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::VARIANTS.join("|")
+    );
+
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
+
+    example.run().await
+}
diff --git a/datafusion-examples/examples/sql_query.rs b/datafusion-examples/examples/sql_ops/query.rs
similarity index 97%
rename from datafusion-examples/examples/sql_query.rs
rename to datafusion-examples/examples/sql_ops/query.rs
index 4da07d33d03d4..90d0c3ca34a00 100644
--- a/datafusion-examples/examples/sql_query.rs
+++ b/datafusion-examples/examples/sql_ops/query.rs
@@ -15,13 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::arrow::array::{UInt64Array, UInt8Array};
+//! See `main.rs` for how to run it.
+
+use datafusion::arrow::array::{UInt8Array, UInt64Array};
 use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::common::{assert_batches_eq, exec_datafusion_err};
+use datafusion::datasource::MemTable;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
-use datafusion::datasource::MemTable;
 use datafusion::error::{DataFusionError, Result};
 use datafusion::prelude::*;
 use object_store::local::LocalFileSystem;
@@ -32,8 +34,7 @@ use std::sync::Arc;
 ///
 /// [`query_memtable`]: a simple query against a [`MemTable`]
 /// [`query_parquet`]: a simple query against a directory with multiple Parquet files
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn query() -> Result<()> {
     query_memtable().await?;
     query_parquet().await?;
     Ok(())
@@ -152,7 +153,8 @@ async fn query_parquet() -> Result<()> {
             "| 4  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30332f30312f3039 | 30         | 2009-03-01T00:00:00 |",
             "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
         ],
-        &results);
+        &results
+    );
 
     // Second example were we temporarily move into the test data's parent directory and
     // simulate a relative path, this requires registering an ObjectStore.
@@ -201,7 +203,8 @@ async fn query_parquet() -> Result<()> {
             "| 4  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30332f30312f3039 | 30         | 2009-03-01T00:00:00 |",
             "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
         ],
-        &results);
+        &results
+    );
 
     // Reset the current directory
     std::env::set_current_dir(cur_dir)?;
diff --git a/datafusion-examples/examples/udf/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs
index 81e227bfacee4..fbb9e652486ce 100644
--- a/datafusion-examples/examples/udf/advanced_udaf.rs
+++ b/datafusion-examples/examples/udf/advanced_udaf.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use arrow::datatypes::{Field, Schema};
 use datafusion::physical_expr::NullState;
 use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
@@ -26,13 +28,13 @@ use arrow::array::{
 use arrow::datatypes::{ArrowNativeTypeOp, ArrowPrimitiveType, Float64Type, UInt32Type};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::FieldRef;
-use datafusion::common::{cast::as_float64_array, ScalarValue};
+use datafusion::common::{ScalarValue, cast::as_float64_array};
 use datafusion::error::Result;
 use datafusion::logical_expr::{
+    Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature,
     expr::AggregateFunction,
     function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs},
     simplify::SimplifyInfo,
-    Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature,
 };
 use datafusion::prelude::*;
 
diff --git a/datafusion-examples/examples/udf/advanced_udf.rs b/datafusion-examples/examples/udf/advanced_udf.rs
index bb5a68e90cbbe..a00a7e7df434f 100644
--- a/datafusion-examples/examples/udf/advanced_udf.rs
+++ b/datafusion-examples/examples/udf/advanced_udf.rs
@@ -15,19 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    new_null_array, Array, ArrayRef, AsArray, Float32Array, Float64Array,
+    Array, ArrayRef, AsArray, Float32Array, Float64Array, new_null_array,
 };
 use arrow::compute;
 use arrow::datatypes::{DataType, Float64Type};
 use arrow::record_batch::RecordBatch;
-use datafusion::common::{exec_err, internal_err, ScalarValue};
+use datafusion::common::{ScalarValue, exec_err, internal_err};
 use datafusion::error::Result;
-use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion::logical_expr::Volatility;
+use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion::logical_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
 };
diff --git a/datafusion-examples/examples/udf/advanced_udwf.rs b/datafusion-examples/examples/udf/advanced_udwf.rs
index 86f215e019c78..e8d3a75b29dec 100644
--- a/datafusion-examples/examples/udf/advanced_udwf.rs
+++ b/datafusion-examples/examples/udf/advanced_udwf.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
+use std::{any::Any, fs::File, io::Write, sync::Arc};
+
 use arrow::datatypes::Field;
 use arrow::{
     array::{ArrayRef, AsArray, Float64Array},
@@ -36,8 +40,7 @@ use datafusion::logical_expr::{
 use datafusion::physical_expr::PhysicalExpr;
 use datafusion::prelude::*;
 use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
-use std::any::Any;
-use std::sync::Arc;
+use tempfile::tempdir;
 
 /// This example shows how to use the full WindowUDFImpl API to implement a user
 /// defined window function. As in the `simple_udwf.rs` example, this struct implements
@@ -227,12 +230,46 @@ async fn create_context() -> Result<SessionContext> {
     // declare a new context. In spark API, this corresponds to a new spark SQL session
     let ctx = SessionContext::new();
 
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    println!("pwd: {}", std::env::current_dir().unwrap().display());
-    let csv_path = "../../datafusion/core/tests/data/cars.csv".to_string();
-    let read_options = CsvReadOptions::default().has_header(true);
+    // content from file 'datafusion/core/tests/data/cars.csv'
+    let csv_data = r#"car,speed,time
+red,20.0,1996-04-12T12:05:03.000000000
+red,20.3,1996-04-12T12:05:04.000000000
+red,21.4,1996-04-12T12:05:05.000000000
+red,21.5,1996-04-12T12:05:06.000000000
+red,19.0,1996-04-12T12:05:07.000000000
+red,18.0,1996-04-12T12:05:08.000000000
+red,17.0,1996-04-12T12:05:09.000000000
+red,7.0,1996-04-12T12:05:10.000000000
+red,7.1,1996-04-12T12:05:11.000000000
+red,7.2,1996-04-12T12:05:12.000000000
+red,3.0,1996-04-12T12:05:13.000000000
+red,1.0,1996-04-12T12:05:14.000000000
+red,0.0,1996-04-12T12:05:15.000000000
+green,10.0,1996-04-12T12:05:03.000000000
+green,10.3,1996-04-12T12:05:04.000000000
+green,10.4,1996-04-12T12:05:05.000000000
+green,10.5,1996-04-12T12:05:06.000000000
+green,11.0,1996-04-12T12:05:07.000000000
+green,12.0,1996-04-12T12:05:08.000000000
+green,14.0,1996-04-12T12:05:09.000000000
+green,15.0,1996-04-12T12:05:10.000000000
+green,15.1,1996-04-12T12:05:11.000000000
+green,15.2,1996-04-12T12:05:12.000000000
+green,8.0,1996-04-12T12:05:13.000000000
+green,2.0,1996-04-12T12:05:14.000000000
+"#;
+    let dir = tempdir()?;
+    let file_path = dir.path().join("cars.csv");
+    {
+        let mut file = File::create(&file_path)?;
+        // write CSV data
+        file.write_all(csv_data.as_bytes())?;
+    } // scope closes the file
+    let file_path = file_path.to_str().unwrap();
+
+    ctx.register_csv("cars", file_path, CsvReadOptions::new())
+        .await?;
 
-    ctx.register_csv("cars", &csv_path, read_options).await?;
     Ok(ctx)
 }
 
diff --git a/datafusion-examples/examples/udf/async_udf.rs b/datafusion-examples/examples/udf/async_udf.rs
index 475775a599f62..c31e8290ccce5 100644
--- a/datafusion-examples/examples/udf/async_udf.rs
+++ b/datafusion-examples/examples/udf/async_udf.rs
@@ -15,12 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 //! This example shows how to create and use "Async UDFs" in DataFusion.
 //!
 //! Async UDFs allow you to perform asynchronous operations, such as
 //! making network requests. This can be used for tasks like fetching
 //! data from an external API such as a LLM service or an external database.
 
+use std::{any::Any, sync::Arc};
+
 use arrow::array::{ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray};
 use arrow_schema::{DataType, Field, Schema};
 use async_trait::async_trait;
@@ -35,8 +39,6 @@ use datafusion::logical_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion::prelude::{SessionConfig, SessionContext};
-use std::any::Any;
-use std::sync::Arc;
 
 /// In this example we register `AskLLM` as an asynchronous user defined function
 /// and invoke it via the DataFrame API and SQL
@@ -91,20 +93,19 @@ pub async fn async_udf() -> Result<()> {
 
     assert_batches_eq!(
         [
-    "+---------------+--------------------------------------------------------------------------------------------------------------------------------+",
-    "| plan_type     | plan                                                                                                                           |",
-    "+---------------+--------------------------------------------------------------------------------------------------------------------------------+",
-    "| logical_plan  | SubqueryAlias: a                                                                                                               |",
-    "|               |   Filter: ask_llm(CAST(animal.name AS Utf8View), Utf8View(\"Is this animal furry?\"))                                            |",
-    "|               |     TableScan: animal projection=[id, name]                                                                                    |",
-    "| physical_plan | CoalesceBatchesExec: target_batch_size=8192                                                                                    |",
-    "|               |   FilterExec: __async_fn_0@2, projection=[id@0, name@1]                                                                        |",
-    "|               |     RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                       |",
-    "|               |       AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=ask_llm(CAST(name@1 AS Utf8View), Is this animal furry?))] |",
-    "|               |         CoalesceBatchesExec: target_batch_size=8192                                                                            |",
-    "|               |           DataSourceExec: partitions=1, partition_sizes=[1]                                                                    |",
-    "|               |                                                                                                                                |",
-    "+---------------+--------------------------------------------------------------------------------------------------------------------------------+",
+            "+---------------+------------------------------------------------------------------------------------------------------------------------------+",
+            "| plan_type     | plan                                                                                                                         |",
+            "+---------------+------------------------------------------------------------------------------------------------------------------------------+",
+            "| logical_plan  | SubqueryAlias: a                                                                                                             |",
+            "|               |   Filter: ask_llm(CAST(animal.name AS Utf8View), Utf8View(\"Is this animal furry?\"))                                          |",
+            "|               |     TableScan: animal projection=[id, name]                                                                                  |",
+            "| physical_plan | FilterExec: __async_fn_0@2, projection=[id@0, name@1]                                                                        |",
+            "|               |   RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                       |",
+            "|               |     AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=ask_llm(CAST(name@1 AS Utf8View), Is this animal furry?))] |",
+            "|               |       CoalesceBatchesExec: target_batch_size=8192                                                                            |",
+            "|               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                    |",
+            "|               |                                                                                                                              |",
+            "+---------------+------------------------------------------------------------------------------------------------------------------------------+",
         ],
         &results
     );
diff --git a/datafusion-examples/examples/udf/main.rs b/datafusion-examples/examples/udf/main.rs
index ba36dbb15c58b..0fb26ff5f74ce 100644
--- a/datafusion-examples/examples/udf/main.rs
+++ b/datafusion-examples/examples/udf/main.rs
@@ -19,7 +19,13 @@
 //!
 //! These examples demonstrate user-defined functions in DataFusion.
 //!
+//! ## Usage
+//! ```bash
+//! cargo run --example udf -- [all|adv_udaf|adv_udf|adv_udwf|async_udf|udaf|udf|udtf|udwf]
+//! ```
+//!
 //! Each subcommand runs a corresponding example:
+//! - `all` — run all examples included in this module
 //! - `adv_udaf` — user defined aggregate function example
 //! - `adv_udf` — user defined scalar function example
 //! - `adv_udwf` — user defined window function example
@@ -38,11 +44,14 @@ mod simple_udf;
 mod simple_udtf;
 mod simple_udwf;
 
-use std::str::FromStr;
-
 use datafusion::error::{DataFusionError, Result};
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{Display, EnumIter, EnumString, VariantNames};
 
+#[derive(EnumIter, EnumString, Display, VariantNames)]
+#[strum(serialize_all = "snake_case")]
 enum ExampleKind {
+    All,
     AdvUdaf,
     AdvUdf,
     AdvUdwf,
@@ -53,55 +62,32 @@ enum ExampleKind {
     Udtf,
 }
 
-impl AsRef<str> for ExampleKind {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::AdvUdaf => "adv_udaf",
-            Self::AdvUdf => "adv_udf",
-            Self::AdvUdwf => "adv_udwf",
-            Self::AsyncUdf => "async_udf",
-            Self::Udf => "udf",
-            Self::Udaf => "udaf",
-            Self::Udwf => "udwt",
-            Self::Udtf => "udtf",
-        }
-    }
-}
-
-impl FromStr for ExampleKind {
-    type Err = DataFusionError;
+impl ExampleKind {
+    const EXAMPLE_NAME: &str = "udf";
 
-    fn from_str(s: &str) -> Result<Self> {
-        match s {
-            "adv_udaf" => Ok(Self::AdvUdaf),
-            "adv_udf" => Ok(Self::AdvUdf),
-            "adv_udwf" => Ok(Self::AdvUdwf),
-            "async_udf" => Ok(Self::AsyncUdf),
-            "udaf" => Ok(Self::Udaf),
-            "udf" => Ok(Self::Udf),
-            "udtf" => Ok(Self::Udtf),
-            "udwf" => Ok(Self::Udwf),
-            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
-        }
+    fn runnable() -> impl Iterator<Item = ExampleKind> {
+        ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
     }
-}
 
-impl ExampleKind {
-    const ALL: [Self; 8] = [
-        Self::AdvUdaf,
-        Self::AdvUdf,
-        Self::AdvUdwf,
-        Self::AsyncUdf,
-        Self::Udaf,
-        Self::Udf,
-        Self::Udtf,
-        Self::Udwf,
-    ];
-
-    const EXAMPLE_NAME: &str = "udf";
+    async fn run(&self) -> Result<()> {
+        match self {
+            ExampleKind::All => {
+                for example in ExampleKind::runnable() {
+                    println!("Running example: {example}");
+                    Box::pin(example.run()).await?;
+                }
+            }
+            ExampleKind::AdvUdaf => advanced_udaf::advanced_udaf().await?,
+            ExampleKind::AdvUdf => advanced_udf::advanced_udf().await?,
+            ExampleKind::AdvUdwf => advanced_udwf::advanced_udwf().await?,
+            ExampleKind::AsyncUdf => async_udf::async_udf().await?,
+            ExampleKind::Udaf => simple_udaf::simple_udaf().await?,
+            ExampleKind::Udf => simple_udf::simple_udf().await?,
+            ExampleKind::Udtf => simple_udtf::simple_udtf().await?,
+            ExampleKind::Udwf => simple_udwf::simple_udwf().await?,
+        }
 
-    fn variants() -> Vec<&'static str> {
-        Self::ALL.iter().map(|x| x.as_ref()).collect()
+        Ok(())
     }
 }
 
@@ -110,24 +96,14 @@ async fn main() -> Result<()> {
     let usage = format!(
         "Usage: cargo run --example {} -- [{}]",
         ExampleKind::EXAMPLE_NAME,
-        ExampleKind::variants().join("|")
+        ExampleKind::VARIANTS.join("|")
     );
 
-    let arg = std::env::args().nth(1).ok_or_else(|| {
-        eprintln!("{usage}");
-        DataFusionError::Execution("Missing argument".to_string())
-    })?;
-
-    match arg.parse::<ExampleKind>()? {
-        ExampleKind::AdvUdaf => advanced_udaf::advanced_udaf().await?,
-        ExampleKind::AdvUdf => advanced_udf::advanced_udf().await?,
-        ExampleKind::AdvUdwf => advanced_udwf::advanced_udwf().await?,
-        ExampleKind::AsyncUdf => async_udf::async_udf().await?,
-        ExampleKind::Udaf => simple_udaf::simple_udaf().await?,
-        ExampleKind::Udf => simple_udf::simple_udf().await?,
-        ExampleKind::Udtf => simple_udtf::simple_udtf().await?,
-        ExampleKind::Udwf => simple_udwf::simple_udwf().await?,
-    }
+    let example: ExampleKind = std::env::args()
+        .nth(1)
+        .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
+        .parse()
+        .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;
 
-    Ok(())
+    example.run().await
 }
diff --git a/datafusion-examples/examples/udf/simple_udaf.rs b/datafusion-examples/examples/udf/simple_udaf.rs
index e9f905e720997..42ea0054b759f 100644
--- a/datafusion-examples/examples/udf/simple_udaf.rs
+++ b/datafusion-examples/examples/udf/simple_udaf.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+//!
 /// In this example we will declare a single-type, single return type UDAF that computes the geometric mean.
 /// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean
 use datafusion::arrow::{
diff --git a/datafusion-examples/examples/udf/simple_udf.rs b/datafusion-examples/examples/udf/simple_udf.rs
index 7d4f3588e313f..e8d6c9c8173ac 100644
--- a/datafusion-examples/examples/udf/simple_udf.rs
+++ b/datafusion-examples/examples/udf/simple_udf.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! See `main.rs` for how to run it.
+
 use datafusion::{
     arrow::{
         array::{ArrayRef, Float32Array, Float64Array},
diff --git a/datafusion-examples/examples/udf/simple_udtf.rs b/datafusion-examples/examples/udf/simple_udtf.rs
index a03b157134aea..087b8ba73af5c 100644
--- a/datafusion-examples/examples/udf/simple_udtf.rs
+++ b/datafusion-examples/examples/udf/simple_udtf.rs
@@ -15,16 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::csv::reader::Format;
+//! See `main.rs` for how to run it.
+
 use arrow::csv::ReaderBuilder;
+use arrow::csv::reader::Format;
 use async_trait::async_trait;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::catalog::Session;
 use datafusion::catalog::TableFunctionImpl;
-use datafusion::common::{plan_err, ScalarValue};
-use datafusion::datasource::memory::MemorySourceConfig;
+use datafusion::common::{ScalarValue, plan_err};
 use datafusion::datasource::TableProvider;
+use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::error::Result;
 use datafusion::execution::context::ExecutionProps;
 use datafusion::logical_expr::simplify::SimplifyContext;
@@ -132,8 +134,7 @@ struct LocalCsvTableFunc {}
 
 impl TableFunctionImpl for LocalCsvTableFunc {
     fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
-        let Some(Expr::Literal(ScalarValue::Utf8(Some(ref path)), _)) = exprs.first()
-        else {
+        let Some(Expr::Literal(ScalarValue::Utf8(Some(path)), _)) = exprs.first() else {
             return plan_err!("read_csv requires at least one string argument");
         };
 
diff --git a/datafusion-examples/examples/udf/simple_udwf.rs b/datafusion-examples/examples/udf/simple_udwf.rs
index 2cf1df8d8ed86..1842d88b9ba29 100644
--- a/datafusion-examples/examples/udf/simple_udwf.rs
+++ b/datafusion-examples/examples/udf/simple_udwf.rs
@@ -15,29 +15,65 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
+//! See `main.rs` for how to run it.
+
+use std::{fs::File, io::Write, sync::Arc};
 
 use arrow::{
     array::{ArrayRef, AsArray, Float64Array},
     datatypes::{DataType, Float64Type},
 };
-
 use datafusion::common::ScalarValue;
 use datafusion::error::Result;
 use datafusion::logical_expr::{PartitionEvaluator, Volatility, WindowFrame};
 use datafusion::prelude::*;
+use tempfile::tempdir;
 
 // create local execution context with `cars.csv` registered as a table named `cars`
 async fn create_context() -> Result<SessionContext> {
     // declare a new context. In spark API, this corresponds to a new spark SQL session
     let ctx = SessionContext::new();
 
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    println!("pwd: {}", std::env::current_dir().unwrap().display());
-    let csv_path = "../../datafusion/core/tests/data/cars.csv".to_string();
-    let read_options = CsvReadOptions::default().has_header(true);
+    // content from file 'datafusion/core/tests/data/cars.csv'
+    let csv_data = r#"car,speed,time
+red,20.0,1996-04-12T12:05:03.000000000
+red,20.3,1996-04-12T12:05:04.000000000
+red,21.4,1996-04-12T12:05:05.000000000
+red,21.5,1996-04-12T12:05:06.000000000
+red,19.0,1996-04-12T12:05:07.000000000
+red,18.0,1996-04-12T12:05:08.000000000
+red,17.0,1996-04-12T12:05:09.000000000
+red,7.0,1996-04-12T12:05:10.000000000
+red,7.1,1996-04-12T12:05:11.000000000
+red,7.2,1996-04-12T12:05:12.000000000
+red,3.0,1996-04-12T12:05:13.000000000
+red,1.0,1996-04-12T12:05:14.000000000
+red,0.0,1996-04-12T12:05:15.000000000
+green,10.0,1996-04-12T12:05:03.000000000
+green,10.3,1996-04-12T12:05:04.000000000
+green,10.4,1996-04-12T12:05:05.000000000
+green,10.5,1996-04-12T12:05:06.000000000
+green,11.0,1996-04-12T12:05:07.000000000
+green,12.0,1996-04-12T12:05:08.000000000
+green,14.0,1996-04-12T12:05:09.000000000
+green,15.0,1996-04-12T12:05:10.000000000
+green,15.1,1996-04-12T12:05:11.000000000
+green,15.2,1996-04-12T12:05:12.000000000
+green,8.0,1996-04-12T12:05:13.000000000
+green,2.0,1996-04-12T12:05:14.000000000
+"#;
+    let dir = tempdir()?;
+    let file_path = dir.path().join("cars.csv");
+    {
+        let mut file = File::create(&file_path)?;
+        // write CSV data
+        file.write_all(csv_data.as_bytes())?;
+    } // scope closes the file
+    let file_path = file_path.to_str().unwrap();
+
+    ctx.register_csv("cars", file_path, CsvReadOptions::new())
+        .await?;
 
-    ctx.register_csv("cars", &csv_path, read_options).await?;
     Ok(ctx)
 }
 
diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml
index 4b802c0067e59..be1374b371485 100644
--- a/datafusion/catalog-listing/Cargo.toml
+++ b/datafusion/catalog-listing/Cargo.toml
@@ -46,7 +46,6 @@ futures = { workspace = true }
 itertools = { workspace = true }
 log = { workspace = true }
 object_store = { workspace = true }
-tokio = { workspace = true }
 
 [dev-dependencies]
 datafusion-datasource-parquet = { workspace = true }
diff --git a/datafusion/catalog-listing/src/config.rs b/datafusion/catalog-listing/src/config.rs
index 3370d2ea75535..ca4d2abfcd737 100644
--- a/datafusion/catalog-listing/src/config.rs
+++ b/datafusion/catalog-listing/src/config.rs
@@ -19,9 +19,10 @@ use crate::options::ListingOptions;
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion_catalog::Session;
 use datafusion_common::{config_err, internal_err};
+use datafusion_datasource::ListingTableUrl;
 use datafusion_datasource::file_compression_type::FileCompressionType;
+#[expect(deprecated)]
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
-use datafusion_datasource::ListingTableUrl;
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -44,15 +45,12 @@ pub enum SchemaSource {
 /// # Schema Evolution Support
 ///
 /// This configuration supports schema evolution through the optional
-/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
+/// [`PhysicalExprAdapterFactory`]. You might want to override the default factory when you need:
 ///
 /// - **Type coercion requirements**: When you need custom logic for converting between
 ///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
 /// - **Column mapping**: You need to map columns with a legacy name to a new name
 /// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
-///
-/// If not specified, a [`datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory`]
-/// will be used, which handles basic schema compatibility cases.
 #[derive(Debug, Clone, Default)]
 pub struct ListingTableConfig {
     /// Paths on the `ObjectStore` for creating [`crate::ListingTable`].
@@ -68,8 +66,6 @@ pub struct ListingTableConfig {
     pub options: Option<ListingOptions>,
     /// Tracks the source of the schema information
     pub(crate) schema_source: SchemaSource,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    pub(crate) schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
     pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
 }
@@ -218,8 +214,7 @@ impl ListingTableConfig {
                     file_schema,
                     options: _,
                     schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
+                    expr_adapter_factory,
                 } = self;
 
                 let (schema, new_schema_source) = match file_schema {
@@ -241,8 +236,7 @@ impl ListingTableConfig {
                     file_schema: Some(schema),
                     options: Some(options),
                     schema_source: new_schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
+                    expr_adapter_factory,
                 })
             }
             None => internal_err!("No `ListingOptions` set for inferring schema"),
@@ -282,7 +276,6 @@ impl ListingTableConfig {
                     file_schema: self.file_schema,
                     options: Some(options),
                     schema_source: self.schema_source,
-                    schema_adapter_factory: self.schema_adapter_factory,
                     expr_adapter_factory: self.expr_adapter_factory,
                 })
             }
@@ -290,63 +283,11 @@ impl ListingTableConfig {
         }
     }
 
-    /// Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
-    ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
-    ///
-    /// If not provided, a default schema adapter factory will be used.
-    ///
-    /// # Example: Custom Schema Adapter for Type Coercion
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
-    /// # use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion_datasource::ListingTableUrl;
-    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// #
-    /// # #[derive(Debug)]
-    /// # struct MySchemaAdapterFactory;
-    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
-    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
-    /// #         unimplemented!()
-    /// #     }
-    /// # }
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(listing_options)
-    ///     .with_schema(table_schema)
-    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
-    /// ```
-    pub fn with_schema_adapter_factory(
-        self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this configuration
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
-    }
-
     /// Set the [`PhysicalExprAdapterFactory`] for the [`crate::ListingTable`]
     ///
     /// The expression adapter factory is used to create physical expression adapters that can
     /// handle schema evolution and type conversions when evaluating expressions
     /// with different schemas than the table schema.
-    ///
-    /// If not provided, a default physical expression adapter factory will be used unless a custom
-    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
-    ///
-    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
     pub fn with_expr_adapter_factory(
         self,
         expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
@@ -356,4 +297,23 @@ impl ListingTableConfig {
             ..self
         }
     }
+
+    /// Deprecated: Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
+    ///
+    /// `SchemaAdapterFactory` has been removed. Use [`Self::with_expr_adapter_factory`]
+    /// and `PhysicalExprAdapterFactory` instead. See `upgrading.md` for more details.
+    ///
+    /// This method is a no-op and returns `self` unchanged.
+    #[deprecated(
+        since = "52.0.0",
+        note = "SchemaAdapterFactory has been removed. Use with_expr_adapter_factory and PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    #[expect(deprecated)]
+    pub fn with_schema_adapter_factory(
+        self,
+        _schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        // No-op - just return self unchanged
+        self
+    }
 }
diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs
index 82cc36867939e..ea016015cebd3 100644
--- a/datafusion/catalog-listing/src/helpers.rs
+++ b/datafusion/catalog-listing/src/helpers.rs
@@ -21,25 +21,23 @@ use std::mem;
 use std::sync::Arc;
 
 use datafusion_catalog::Session;
-use datafusion_common::internal_err;
-use datafusion_common::{HashMap, Result, ScalarValue};
+use datafusion_common::{HashMap, Result, ScalarValue, assert_or_internal_err};
 use datafusion_datasource::ListingTableUrl;
 use datafusion_datasource::PartitionedFile;
-use datafusion_expr::{BinaryExpr, Operator};
+use datafusion_expr::{BinaryExpr, Operator, lit, utils};
 
 use arrow::{
-    array::{Array, ArrayRef, AsArray, StringBuilder},
-    compute::{and, cast, prep_null_mask_filter},
-    datatypes::{DataType, Field, Fields, Schema},
+    array::AsArray,
+    datatypes::{DataType, Field},
     record_batch::RecordBatch,
 };
 use datafusion_expr::execution_props::ExecutionProps;
 use futures::stream::FuturesUnordered;
-use futures::{stream::BoxStream, StreamExt, TryStreamExt};
+use futures::{StreamExt, TryStreamExt, stream::BoxStream};
 use log::{debug, trace};
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{Column, DFSchema, DataFusionError};
+use datafusion_common::{Column, DFSchema};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_physical_expr::create_physical_expr;
 use object_store::path::Path;
@@ -53,7 +51,7 @@ use object_store::{ObjectMeta, ObjectStore};
 pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool {
     let mut is_applicable = true;
     expr.apply(|expr| match expr {
-        Expr::Column(Column { ref name, .. }) => {
+        Expr::Column(Column { name, .. }) => {
             is_applicable &= col_names.contains(&name.as_str());
             if is_applicable {
                 Ok(TreeNodeRecursion::Jump)
@@ -239,105 +237,6 @@ pub async fn list_partitions(
     Ok(out)
 }
 
-async fn prune_partitions(
-    table_path: &ListingTableUrl,
-    partitions: Vec<Partition>,
-    filters: &[Expr],
-    partition_cols: &[(String, DataType)],
-) -> Result<Vec<Partition>> {
-    if filters.is_empty() {
-        // prune partitions which don't contain the partition columns
-        return Ok(partitions
-            .into_iter()
-            .filter(|p| {
-                let cols = partition_cols.iter().map(|x| x.0.as_str());
-                !parse_partitions_for_path(table_path, &p.path, cols)
-                    .unwrap_or_default()
-                    .is_empty()
-            })
-            .collect());
-    }
-
-    let mut builders: Vec<_> = (0..partition_cols.len())
-        .map(|_| StringBuilder::with_capacity(partitions.len(), partitions.len() * 10))
-        .collect();
-
-    for partition in &partitions {
-        let cols = partition_cols.iter().map(|x| x.0.as_str());
-        let parsed = parse_partitions_for_path(table_path, &partition.path, cols)
-            .unwrap_or_default();
-
-        let mut builders = builders.iter_mut();
-        for (p, b) in parsed.iter().zip(&mut builders) {
-            b.append_value(p);
-        }
-        builders.for_each(|b| b.append_null());
-    }
-
-    let arrays = partition_cols
-        .iter()
-        .zip(builders)
-        .map(|((_, d), mut builder)| {
-            let array = builder.finish();
-            cast(&array, d)
-        })
-        .collect::<Result<_, _>>()?;
-
-    let fields: Fields = partition_cols
-        .iter()
-        .map(|(n, d)| Field::new(n, d.clone(), true))
-        .collect();
-    let schema = Arc::new(Schema::new(fields));
-
-    let df_schema = DFSchema::from_unqualified_fields(
-        partition_cols
-            .iter()
-            .map(|(n, d)| Field::new(n, d.clone(), true))
-            .collect(),
-        Default::default(),
-    )?;
-
-    let batch = RecordBatch::try_new(schema, arrays)?;
-
-    // TODO: Plumb this down
-    let props = ExecutionProps::new();
-
-    // Applies `filter` to `batch` returning `None` on error
-    let do_filter = |filter| -> Result<ArrayRef> {
-        let expr = create_physical_expr(filter, &df_schema, &props)?;
-        expr.evaluate(&batch)?.into_array(partitions.len())
-    };
-
-    //.Compute the conjunction of the filters
-    let mask = filters
-        .iter()
-        .map(|f| do_filter(f).map(|a| a.as_boolean().clone()))
-        .reduce(|a, b| Ok(and(&a?, &b?)?));
-
-    let mask = match mask {
-        Some(Ok(mask)) => mask,
-        Some(Err(err)) => return Err(err),
-        None => return Ok(partitions),
-    };
-
-    // Don't retain partitions that evaluated to null
-    let prepared = match mask.null_count() {
-        0 => mask,
-        _ => prep_null_mask_filter(&mask),
-    };
-
-    // Sanity check
-    assert_eq!(prepared.len(), partitions.len());
-
-    let filtered = partitions
-        .into_iter()
-        .zip(prepared.values())
-        .filter_map(|(p, f)| f.then_some(p))
-        .collect();
-
-    Ok(filtered)
-}
-
 #[derive(Debug)]
 enum PartitionValue {
     Single(String),
@@ -348,16 +247,11 @@ fn populate_partition_values<'a>(
     partition_values: &mut HashMap<&'a str, PartitionValue>,
     filter: &'a Expr,
 ) {
-    if let Expr::BinaryExpr(BinaryExpr {
-        ref left,
-        op,
-        ref right,
-    }) = filter
-    {
+    if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = filter {
         match op {
             Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(Column { ref name, .. }), Expr::Literal(val, _))
-                | (Expr::Literal(val, _), Expr::Column(Column { ref name, .. })) => {
+                (Expr::Column(Column { name, .. }), Expr::Literal(val, _))
+                | (Expr::Literal(val, _), Expr::Column(Column { name, .. })) => {
                     if partition_values
                         .insert(name, PartitionValue::Single(val.to_string()))
                         .is_some()
@@ -412,6 +306,62 @@ pub fn evaluate_partition_prefix<'a>(
     }
 }
 
+fn filter_partitions(
+    pf: PartitionedFile,
+    filters: &[Expr],
+    df_schema: &DFSchema,
+) -> Result<Option<PartitionedFile>> {
+    if pf.partition_values.is_empty() && !filters.is_empty() {
+        return Ok(None);
+    } else if filters.is_empty() {
+        return Ok(Some(pf));
+    }
+
+    let arrays = pf
+        .partition_values
+        .iter()
+        .map(|v| v.to_array())
+        .collect::<Result<_, _>>()?;
+
+    let batch = RecordBatch::try_new(Arc::clone(df_schema.inner()), arrays)?;
+
+    let filter = utils::conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true));
+    let props = ExecutionProps::new();
+    let expr = create_physical_expr(&filter, df_schema, &props)?;
+
+    // Since we're only operating on a single file, our batch and resulting "array" holds only one
+    // value indicating if the input file matches the provided filters
+    let matches = expr.evaluate(&batch)?.into_array(1)?;
+    if matches.as_boolean().value(0) {
+        return Ok(Some(pf));
+    }
+
+    Ok(None)
+}
+
+fn try_into_partitioned_file(
+    object_meta: ObjectMeta,
+    partition_cols: &[(String, DataType)],
+    table_path: &ListingTableUrl,
+) -> Result<PartitionedFile> {
+    let cols = partition_cols.iter().map(|(name, _)| name.as_str());
+    let parsed = parse_partitions_for_path(table_path, &object_meta.location, cols);
+
+    let partition_values = parsed
+        .into_iter()
+        .flatten()
+        .zip(partition_cols)
+        .map(|(parsed, (_, datatype))| {
+            ScalarValue::try_from_string(parsed.to_string(), datatype)
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let mut pf: PartitionedFile = object_meta.into();
+    pf.partition_values = partition_values;
+
+    Ok(pf)
+}
+
 /// Discover the partitions on the given path and prune out files
 /// that belong to irrelevant partitions using `filters` expressions.
 /// `filters` should only contain expressions that can be evaluated
@@ -424,80 +374,46 @@ pub async fn pruned_partition_list<'a>(
     file_extension: &'a str,
     partition_cols: &'a [(String, DataType)],
 ) -> Result<BoxStream<'a, Result<PartitionedFile>>> {
-    // if no partition col => simply list all the files
-    if partition_cols.is_empty() {
-        if !filters.is_empty() {
-            return internal_err!(
-                "Got partition filters for unpartitioned table {}",
-                table_path
-            );
-        }
-        return Ok(Box::pin(
-            table_path
-                .list_all_files(ctx, store, file_extension)
-                .await?
-                .try_filter(|object_meta| futures::future::ready(object_meta.size > 0))
-                .map_ok(|object_meta| object_meta.into()),
-        ));
-    }
-
-    let partition_prefix = evaluate_partition_prefix(partition_cols, filters);
-
-    let partitions =
-        list_partitions(store, table_path, partition_cols.len(), partition_prefix)
-            .await?;
-    debug!("Listed {} partitions", partitions.len());
+    let prefix = if !partition_cols.is_empty() {
+        evaluate_partition_prefix(partition_cols, filters)
+    } else {
+        None
+    };
 
-    let pruned =
-        prune_partitions(table_path, partitions, filters, partition_cols).await?;
+    let objects = table_path
+        .list_prefixed_files(ctx, store, prefix, file_extension)
+        .await?
+        .try_filter(|object_meta| futures::future::ready(object_meta.size > 0));
 
-    debug!("Pruning yielded {} partitions", pruned.len());
+    if partition_cols.is_empty() {
+        assert_or_internal_err!(
+            filters.is_empty(),
+            "Got partition filters for unpartitioned table {}",
+            table_path
+        );
 
-    let stream = futures::stream::iter(pruned)
-        .map(move |partition: Partition| async move {
-            let cols = partition_cols.iter().map(|x| x.0.as_str());
-            let parsed = parse_partitions_for_path(table_path, &partition.path, cols);
+        // if no partition col => simply list all the files
+        Ok(objects.map_ok(|object_meta| object_meta.into()).boxed())
+    } else {
+        let df_schema = DFSchema::from_unqualified_fields(
+            partition_cols
+                .iter()
+                .map(|(n, d)| Field::new(n, d.clone(), true))
+                .collect(),
+            Default::default(),
+        )?;
 
-            let partition_values = parsed
-                .into_iter()
-                .flatten()
-                .zip(partition_cols)
-                .map(|(parsed, (_, datatype))| {
-                    ScalarValue::try_from_string(parsed.to_string(), datatype)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            let files = match partition.files {
-                Some(files) => files,
-                None => {
-                    trace!("Recursively listing partition {}", partition.path);
-                    store.list(Some(&partition.path)).try_collect().await?
-                }
-            };
-            let files = files.into_iter().filter(move |o| {
-                let extension_match = o.location.as_ref().ends_with(file_extension);
-                // here need to scan subdirectories(`listing_table_ignore_subdirectory` = false)
-                let glob_match = table_path.contains(&o.location, false);
-                extension_match && glob_match
-            });
-
-            let stream = futures::stream::iter(files.map(move |object_meta| {
-                Ok(PartitionedFile {
-                    object_meta,
-                    partition_values: partition_values.clone(),
-                    range: None,
-                    statistics: None,
-                    extensions: None,
-                    metadata_size_hint: None,
-                })
-            }));
-
-            Ok::<_, DataFusionError>(stream)
-        })
-        .buffer_unordered(CONCURRENCY_LIMIT)
-        .try_flatten()
-        .boxed();
-    Ok(stream)
+        Ok(objects
+            .map_ok(|object_meta| {
+                try_into_partitioned_file(object_meta, partition_cols, table_path)
+            })
+            .try_filter_map(move |pf| {
+                futures::future::ready(
+                    pf.and_then(|pf| filter_partitions(pf, filters, &df_schema)),
+                )
+            })
+            .boxed())
+    }
 }
 
 /// Extract the partition values for the given `file_path` (in the given `table_path`)
@@ -541,22 +457,11 @@ pub fn describe_partition(partition: &Partition) -> (&str, usize, Vec<&str>) {
 
 #[cfg(test)]
 mod tests {
-    use async_trait::async_trait;
-    use datafusion_common::config::TableOptions;
     use datafusion_datasource::file_groups::FileGroup;
-    use datafusion_execution::config::SessionConfig;
-    use datafusion_execution::runtime_env::RuntimeEnv;
-    use futures::FutureExt;
-    use object_store::memory::InMemory;
-    use std::any::Any;
     use std::ops::Not;
 
     use super::*;
-    use datafusion_expr::{
-        case, col, lit, AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF,
-    };
-    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-    use datafusion_physical_plan::ExecutionPlan;
+    use datafusion_expr::{Expr, case, col, lit};
 
     #[test]
     fn test_split_files() {
@@ -599,209 +504,6 @@ mod tests {
         assert_eq!(0, chunks.len());
     }
 
-    #[tokio::test]
-    async fn test_pruned_partition_list_empty() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/mypartition=val1/notparquetfile", 100),
-            ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
-            ("tablepath/file.parquet", 100),
-            ("tablepath/notapartition/file.parquet", 100),
-            ("tablepath/notmypartition=val1/file.parquet", 100),
-        ]);
-        let filter = Expr::eq(col("mypartition"), lit("val1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter],
-            ".parquet",
-            &[(String::from("mypartition"), DataType::Utf8)],
-        )
-        .await
-        .expect("partition pruning failed")
-        .collect::<Vec<_>>()
-        .await;
-
-        assert_eq!(pruned.len(), 0);
-    }
-
-    #[tokio::test]
-    async fn test_pruned_partition_list() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/mypartition=val1/file.parquet", 100),
-            ("tablepath/mypartition=val2/file.parquet", 100),
-            ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
-            ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
-            ("tablepath/notapartition/file.parquet", 100),
-            ("tablepath/notmypartition=val1/file.parquet", 100),
-        ]);
-        let filter = Expr::eq(col("mypartition"), lit("val1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter],
-            ".parquet",
-            &[(String::from("mypartition"), DataType::Utf8)],
-        )
-        .await
-        .expect("partition pruning failed")
-        .try_collect::<Vec<_>>()
-        .await
-        .unwrap();
-
-        assert_eq!(pruned.len(), 2);
-        let f1 = &pruned[0];
-        assert_eq!(
-            f1.object_meta.location.as_ref(),
-            "tablepath/mypartition=val1/file.parquet"
-        );
-        assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]);
-        let f2 = &pruned[1];
-        assert_eq!(
-            f2.object_meta.location.as_ref(),
-            "tablepath/mypartition=val1/other=val3/file.parquet"
-        );
-        assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]);
-    }
-
-    #[tokio::test]
-    async fn test_pruned_partition_list_multi() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/part1=p1v1/file.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100),
-        ]);
-        let filter1 = Expr::eq(col("part1"), lit("p1v2"));
-        let filter2 = Expr::eq(col("part2"), lit("p2v1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter1, filter2],
-            ".parquet",
-            &[
-                (String::from("part1"), DataType::Utf8),
-                (String::from("part2"), DataType::Utf8),
-            ],
-        )
-        .await
-        .expect("partition pruning failed")
-        .try_collect::<Vec<_>>()
-        .await
-        .unwrap();
-
-        assert_eq!(pruned.len(), 2);
-        let f1 = &pruned[0];
-        assert_eq!(
-            f1.object_meta.location.as_ref(),
-            "tablepath/part1=p1v2/part2=p2v1/file1.parquet"
-        );
-        assert_eq!(
-            &f1.partition_values,
-            &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),]
-        );
-        let f2 = &pruned[1];
-        assert_eq!(
-            f2.object_meta.location.as_ref(),
-            "tablepath/part1=p1v2/part2=p2v1/file2.parquet"
-        );
-        assert_eq!(
-            &f2.partition_values,
-            &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")]
-        );
-    }
-
-    #[tokio::test]
-    async fn test_list_partition() {
-        let (store, _) = make_test_store_and_state(&[
-            ("tablepath/part1=p1v1/file.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0),
-        ]);
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            0,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec![]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-            ]
-        );
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            1,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]),
-                ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-                ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]),
-            ]
-        );
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            2,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-                (
-                    "tablepath/part1=p1v2/part2=p2v1",
-                    2,
-                    vec!["file1.parquet", "file2.parquet"]
-                ),
-                ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]),
-                ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]),
-            ]
-        );
-    }
-
     #[test]
     fn test_parse_partitions_for_path() {
         assert_eq!(
@@ -1016,86 +718,4 @@ mod tests {
             Some(Path::from("a=1970-01-05")),
         );
     }
-
-    pub fn make_test_store_and_state(
-        files: &[(&str, u64)],
-    ) -> (Arc<InMemory>, Arc<dyn Session>) {
-        let memory = InMemory::new();
-
-        for (name, size) in files {
-            memory
-                .put(&Path::from(*name), vec![0; *size as usize].into())
-                .now_or_never()
-                .unwrap()
-                .unwrap();
-        }
-
-        (Arc::new(memory), Arc::new(MockSession {}))
-    }
-
-    struct MockSession {}
-
-    #[async_trait]
-    impl Session for MockSession {
-        fn session_id(&self) -> &str {
-            unimplemented!()
-        }
-
-        fn config(&self) -> &SessionConfig {
-            unimplemented!()
-        }
-
-        async fn create_physical_plan(
-            &self,
-            _logical_plan: &LogicalPlan,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn create_physical_expr(
-            &self,
-            _expr: Expr,
-            _df_schema: &DFSchema,
-        ) -> Result<Arc<dyn PhysicalExpr>> {
-            unimplemented!()
-        }
-
-        fn scalar_functions(&self) -> &std::collections::HashMap<String, Arc<ScalarUDF>> {
-            unimplemented!()
-        }
-
-        fn aggregate_functions(
-            &self,
-        ) -> &std::collections::HashMap<String, Arc<AggregateUDF>> {
-            unimplemented!()
-        }
-
-        fn window_functions(&self) -> &std::collections::HashMap<String, Arc<WindowUDF>> {
-            unimplemented!()
-        }
-
-        fn runtime_env(&self) -> &Arc<RuntimeEnv> {
-            unimplemented!()
-        }
-
-        fn execution_props(&self) -> &ExecutionProps {
-            unimplemented!()
-        }
-
-        fn as_any(&self) -> &dyn Any {
-            unimplemented!()
-        }
-
-        fn table_options(&self) -> &TableOptions {
-            unimplemented!()
-        }
-
-        fn table_options_mut(&mut self) -> &mut TableOptions {
-            unimplemented!()
-        }
-
-        fn task_ctx(&self) -> Arc<datafusion_execution::TaskContext> {
-            unimplemented!()
-        }
-    }
 }
diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs
index 90d04b46b8067..28bd880ea01fb 100644
--- a/datafusion/catalog-listing/src/mod.rs
+++ b/datafusion/catalog-listing/src/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -31,4 +33,4 @@ mod table;
 
 pub use config::{ListingTableConfig, SchemaSource};
 pub use options::ListingOptions;
-pub use table::ListingTable;
+pub use table::{ListFilesResult, ListingTable};
diff --git a/datafusion/catalog-listing/src/options.rs b/datafusion/catalog-listing/src/options.rs
index 7da8005f90ec2..146f98d62335e 100644
--- a/datafusion/catalog-listing/src/options.rs
+++ b/datafusion/catalog-listing/src/options.rs
@@ -18,12 +18,12 @@
 use arrow::datatypes::{DataType, SchemaRef};
 use datafusion_catalog::Session;
 use datafusion_common::plan_err;
-use datafusion_datasource::file_format::FileFormat;
 use datafusion_datasource::ListingTableUrl;
+use datafusion_datasource::file_format::FileFormat;
 use datafusion_execution::config::SessionConfig;
 use datafusion_expr::SortExpr;
 use futures::StreamExt;
-use futures::{future, TryStreamExt};
+use futures::{TryStreamExt, future};
 use itertools::Itertools;
 use std::sync::Arc;
 
diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs
index 95f9523d4401c..9fb2dd2dce29c 100644
--- a/datafusion/catalog-listing/src/table.rs
+++ b/datafusion/catalog-listing/src/table.rs
@@ -23,18 +23,16 @@ use async_trait::async_trait;
 use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider};
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    internal_datafusion_err, plan_err, project_schema, Constraints, DataFusionError,
-    SchemaExt, Statistics,
+    Constraints, SchemaExt, Statistics, internal_datafusion_err, plan_err, project_schema,
 };
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::file_sink_config::FileSinkConfig;
-use datafusion_datasource::schema_adapter::{
-    DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
-};
+#[expect(deprecated)]
+use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 use datafusion_datasource::{
-    compute_all_files_statistics, ListingTableUrl, PartitionedFile,
+    ListingTableUrl, PartitionedFile, TableSchema, compute_all_files_statistics,
 };
 use datafusion_execution::cache::cache_manager::FileStatisticsCache;
 use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
@@ -44,14 +42,25 @@ use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
 use datafusion_physical_expr::create_lex_ordering;
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::ExecutionPlan;
-use futures::{future, stream, Stream, StreamExt, TryStreamExt};
+use datafusion_physical_plan::empty::EmptyExec;
+use futures::{Stream, StreamExt, TryStreamExt, future, stream};
 use object_store::ObjectStore;
 use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
 
+/// Result of a file listing operation from [`ListingTable::list_files_for_scan`].
+#[derive(Debug)]
+pub struct ListFilesResult {
+    /// File groups organized by the partitioning strategy.
+    pub file_groups: Vec<FileGroup>,
+    /// Aggregated statistics for all files.
+    pub statistics: Statistics,
+    /// Whether files are grouped by partition values (enables Hash partitioning).
+    pub grouped_by_partition: bool,
+}
+
 /// Built in [`TableProvider`] that reads data from one or more files as a single table.
 ///
 /// The files are read using an  [`ObjectStore`] instance, for example from
@@ -178,13 +187,11 @@ pub struct ListingTable {
     /// The SQL definition for this table, if any
     definition: Option<String>,
     /// Cache for collected file statistics
-    collected_statistics: FileStatisticsCache,
+    collected_statistics: Arc<dyn FileStatisticsCache>,
     /// Constraints applied to this table
     constraints: Constraints,
     /// Column default expressions for columns that are not physically present in the data files
     column_defaults: HashMap<String, Expr>,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
     expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
 }
@@ -227,7 +234,6 @@ impl ListingTable {
             collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
             constraints: Constraints::default(),
             column_defaults: HashMap::new(),
-            schema_adapter_factory: config.schema_adapter_factory,
             expr_adapter_factory: config.expr_adapter_factory,
         };
 
@@ -255,7 +261,7 @@ impl ListingTable {
     /// multiple times in the same session.
     ///
     /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
-    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
+    pub fn with_cache(mut self, cache: Option<Arc<dyn FileStatisticsCache>>) -> Self {
         self.collected_statistics =
             cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
         self
@@ -282,71 +288,52 @@ impl ListingTable {
         self.schema_source
     }
 
-    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
+    /// Deprecated: Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
     ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
+    /// `SchemaAdapterFactory` has been removed. Use [`ListingTableConfig::with_expr_adapter_factory`]
+    /// and `PhysicalExprAdapterFactory` instead. See `upgrading.md` for more details.
     ///
-    /// # Example: Adding Schema Evolution Support
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion_catalog_listing::{ListingTable, ListingTableConfig, ListingOptions};
-    /// # use datafusion_datasource::ListingTableUrl;
-    /// # use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
-    /// # let table = ListingTable::try_new(config).unwrap();
-    /// let table_with_evolution = table
-    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
-    /// ```
-    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
+    /// This method is a no-op and returns `self` unchanged.
+    #[deprecated(
+        since = "52.0.0",
+        note = "SchemaAdapterFactory has been removed. Use ListingTableConfig::with_expr_adapter_factory and PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    #[expect(deprecated)]
     pub fn with_schema_adapter_factory(
         self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+        _schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
     ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this table
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
+        // No-op - just return self unchanged
+        self
     }
 
-    /// Creates a schema adapter for mapping between file and table schemas
+    /// Deprecated: Returns the [`SchemaAdapterFactory`] used by this [`ListingTable`].
     ///
-    /// Uses the configured schema adapter factory if available, otherwise falls back
-    /// to the default implementation.
-    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
-        let table_schema = self.schema();
-        match &self.schema_adapter_factory {
-            Some(factory) => {
-                factory.create_with_projected_schema(Arc::clone(&table_schema))
-            }
-            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
-        }
+    /// `SchemaAdapterFactory` has been removed. Use `PhysicalExprAdapterFactory` instead.
+    /// See `upgrading.md` for more details.
+    ///
+    /// Always returns `None`.
+    #[deprecated(
+        since = "52.0.0",
+        note = "SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    #[expect(deprecated)]
+    pub fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
+        None
     }
 
-    /// Creates a file source and applies schema adapter factory if available
-    fn create_file_source_with_schema_adapter(
-        &self,
-    ) -> datafusion_common::Result<Arc<dyn FileSource>> {
-        let mut source = self.options.format.file_source();
-        // Apply schema adapter to source if available
-        //
-        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
-        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
-        if let Some(factory) = &self.schema_adapter_factory {
-            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
-        }
-        Ok(source)
+    /// Creates a file source for this table
+    fn create_file_source(&self) -> Arc<dyn FileSource> {
+        let table_schema = TableSchema::new(
+            Arc::clone(&self.file_schema),
+            self.options
+                .table_partition_cols
+                .iter()
+                .map(|(col, field)| Arc::new(Field::new(col, field.clone(), false)))
+                .collect(),
+        );
+
+        self.options.format.file_source(table_schema)
     }
 
     /// If file_sort_order is specified, creates the appropriate physical expressions
@@ -418,7 +405,7 @@ impl TableProvider for ListingTable {
             .options
             .table_partition_cols
             .iter()
-            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .map(|col| Ok(Arc::new(self.table_schema.field_with_name(&col.0)?.clone())))
             .collect::<datafusion_common::Result<Vec<_>>>()?;
 
         let table_partition_col_names = table_partition_cols
@@ -437,7 +424,11 @@ impl TableProvider for ListingTable {
         // at the same time. This is because the limit should be applied after the filters are applied.
         let statistic_file_limit = if filters.is_empty() { limit } else { None };
 
-        let (mut partitioned_file_lists, statistics) = self
+        let ListFilesResult {
+            file_groups: mut partitioned_file_lists,
+            statistics,
+            grouped_by_partition: partitioned_by_file_group,
+        } = self
             .list_files_for_scan(state, &partition_filters, statistic_file_limit)
             .await?;
 
@@ -469,7 +460,9 @@ impl TableProvider for ListingTable {
                 if new_groups.len() <= self.options.target_partitions {
                     partitioned_file_lists = new_groups;
                 } else {
-                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
+                    log::debug!(
+                        "attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered"
+                    )
                 }
             }
             None => {} // no ordering required
@@ -483,7 +476,7 @@ impl TableProvider for ListingTable {
             )))));
         };
 
-        let file_source = self.create_file_source_with_schema_adapter()?;
+        let file_source = self.create_file_source();
 
         // create the execution plan
         let plan = self
@@ -491,20 +484,16 @@ impl TableProvider for ListingTable {
             .format
             .create_physical_plan(
                 state,
-                FileScanConfigBuilder::new(
-                    object_store_url,
-                    Arc::clone(&self.file_schema),
-                    file_source,
-                )
-                .with_file_groups(partitioned_file_lists)
-                .with_constraints(self.constraints.clone())
-                .with_statistics(statistics)
-                .with_projection_indices(projection)
-                .with_limit(limit)
-                .with_output_ordering(output_ordering)
-                .with_table_partition_cols(table_partition_cols)
-                .with_expr_adapter(self.expr_adapter_factory.clone())
-                .build(),
+                FileScanConfigBuilder::new(object_store_url, file_source)
+                    .with_file_groups(partitioned_file_lists)
+                    .with_constraints(self.constraints.clone())
+                    .with_statistics(statistics)
+                    .with_projection_indices(projection)?
+                    .with_limit(limit)
+                    .with_output_ordering(output_ordering)
+                    .with_expr_adapter(self.expr_adapter_factory.clone())
+                    .with_partitioned_by_file_group(partitioned_by_file_group)
+                    .build(),
             )
             .await?;
 
@@ -574,6 +563,11 @@ impl TableProvider for ListingTable {
         let keep_partition_by_columns =
             state.config_options().execution.keep_partition_by_columns;
 
+        // Invalidate cache entries for this table if they exist
+        if let Some(lfc) = state.runtime_env().cache_manager.get_list_files_cache() {
+            let _ = lfc.remove(table_path.prefix());
+        }
+
         // Sink related option, apart from format
         let config = FileSinkConfig {
             original_url: String::default(),
@@ -611,11 +605,15 @@ impl ListingTable {
         ctx: &'a dyn Session,
         filters: &'a [Expr],
         limit: Option<usize>,
-    ) -> datafusion_common::Result<(Vec<FileGroup>, Statistics)> {
+    ) -> datafusion_common::Result<ListFilesResult> {
         let store = if let Some(url) = self.table_paths.first() {
             ctx.runtime_env().object_store(url)?
         } else {
-            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
+            return Ok(ListFilesResult {
+                file_groups: vec![],
+                statistics: Statistics::new_unknown(&self.file_schema),
+                grouped_by_partition: false,
+            });
         };
         // list files (with partitions)
         let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
@@ -649,27 +647,51 @@ impl ListingTable {
         let (file_group, inexact_stats) =
             get_files_with_limit(files, limit, self.options.collect_stat).await?;
 
-        let file_groups = file_group.split_files(self.options.target_partitions);
-        let (mut file_groups, mut stats) = compute_all_files_statistics(
+        // Threshold: 0 = disabled, N > 0 = enabled when distinct_keys >= N
+        //
+        // When enabled, files are grouped by their Hive partition column values, allowing
+        // FileScanConfig to declare Hash partitioning. This enables the optimizer to skip
+        // hash repartitioning for aggregates and joins on partition columns.
+        let threshold = ctx.config_options().optimizer.preserve_file_partitions;
+
+        let (file_groups, grouped_by_partition) = if threshold > 0
+            && !self.options.table_partition_cols.is_empty()
+        {
+            let grouped =
+                file_group.group_by_partition_values(self.options.target_partitions);
+            if grouped.len() >= threshold {
+                (grouped, true)
+            } else {
+                let all_files: Vec<_> =
+                    grouped.into_iter().flat_map(|g| g.into_inner()).collect();
+                (
+                    FileGroup::new(all_files).split_files(self.options.target_partitions),
+                    false,
+                )
+            }
+        } else {
+            (
+                file_group.split_files(self.options.target_partitions),
+                false,
+            )
+        };
+
+        let (file_groups, stats) = compute_all_files_statistics(
             file_groups,
             self.schema(),
             self.options.collect_stat,
             inexact_stats,
         )?;
 
-        let schema_adapter = self.create_schema_adapter();
-        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
-
-        stats.column_statistics =
-            schema_mapper.map_column_statistics(&stats.column_statistics)?;
-        file_groups.iter_mut().try_for_each(|file_group| {
-            if let Some(stat) = file_group.statistics_mut() {
-                stat.column_statistics =
-                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
-            }
-            Ok::<_, DataFusionError>(())
-        })?;
-        Ok((file_groups, stats))
+        // Note: Statistics already include both file columns and partition columns.
+        // PartitionedFile::with_statistics automatically appends exact partition column
+        // statistics (min=max=partition_value, null_count=0, distinct_count=1) computed
+        // from partition_values.
+        Ok(ListFilesResult {
+            file_groups,
+            statistics: stats,
+            grouped_by_partition,
+        })
     }
 
     /// Collects statistics for a given partitioned file.
@@ -756,28 +778,25 @@ async fn get_files_with_limit(
         let file = file_result?;
 
         // Update file statistics regardless of state
-        if collect_stats {
-            if let Some(file_stats) = &file.statistics {
-                num_rows = if file_group.is_empty() {
-                    // For the first file, just take its row count
-                    file_stats.num_rows
-                } else {
-                    // For subsequent files, accumulate the counts
-                    num_rows.add(&file_stats.num_rows)
-                };
-            }
+        if collect_stats && let Some(file_stats) = &file.statistics {
+            num_rows = if file_group.is_empty() {
+                // For the first file, just take its row count
+                file_stats.num_rows
+            } else {
+                // For subsequent files, accumulate the counts
+                num_rows.add(&file_stats.num_rows)
+            };
         }
 
         // Always add the file to our group
         file_group.push(file);
 
         // Check if we've hit the limit (if one was specified)
-        if let Some(limit) = limit {
-            if let Precision::Exact(row_count) = num_rows {
-                if row_count > limit {
-                    state = ProcessingState::ReachedLimit;
-                }
-            }
+        if let Some(limit) = limit
+            && let Precision::Exact(row_count) = num_rows
+            && row_count > limit
+        {
+            state = ProcessingState::ReachedLimit;
         }
     }
     // If we still have files in the stream, it means that the limit kicked
diff --git a/datafusion/catalog/src/async.rs b/datafusion/catalog/src/async.rs
index 1c830c976d8b8..1b8039d828fdb 100644
--- a/datafusion/catalog/src/async.rs
+++ b/datafusion/catalog/src/async.rs
@@ -18,7 +18,7 @@
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use datafusion_common::{error::Result, not_impl_err, HashMap, TableReference};
+use datafusion_common::{HashMap, TableReference, error::Result, not_impl_err};
 use datafusion_execution::config::SessionConfig;
 
 use crate::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider};
@@ -60,7 +60,9 @@ impl SchemaProvider for ResolvedSchemaProvider {
     }
 
     fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-        not_impl_err!("Attempt to deregister table '{name}' with ResolvedSchemaProvider which is not supported")
+        not_impl_err!(
+            "Attempt to deregister table '{name}' with ResolvedSchemaProvider which is not supported"
+        )
     }
 
     fn table_exist(&self, name: &str) -> bool {
@@ -193,7 +195,7 @@ impl CatalogProviderList for ResolvedCatalogProviderList {
 ///
 /// See the [remote_catalog.rs] for an end to end example
 ///
-/// [remote_catalog.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/remote_catalog.rs
+/// [remote_catalog.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/remote_catalog.rs
 #[async_trait]
 pub trait AsyncSchemaProvider: Send + Sync {
     /// Lookup a table in the schema provider
@@ -425,14 +427,14 @@ mod tests {
     use std::{
         any::Any,
         sync::{
-            atomic::{AtomicU32, Ordering},
             Arc,
+            atomic::{AtomicU32, Ordering},
         },
     };
 
     use arrow::datatypes::SchemaRef;
     use async_trait::async_trait;
-    use datafusion_common::{error::Result, Statistics, TableReference};
+    use datafusion_common::{Statistics, TableReference, error::Result};
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::{Expr, TableType};
     use datafusion_physical_plan::ExecutionPlan;
diff --git a/datafusion/catalog/src/catalog.rs b/datafusion/catalog/src/catalog.rs
index 71b9eccf9d657..bb9e89eba2fef 100644
--- a/datafusion/catalog/src/catalog.rs
+++ b/datafusion/catalog/src/catalog.rs
@@ -20,8 +20,8 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 pub use crate::schema::SchemaProvider;
-use datafusion_common::not_impl_err;
 use datafusion_common::Result;
+use datafusion_common::not_impl_err;
 
 /// Represents a catalog, comprising a number of named schemas.
 ///
@@ -61,7 +61,7 @@ use datafusion_common::Result;
 /// schemas and tables exist.
 ///
 /// [Delta Lake]: https://delta.io/
-/// [`remote_catalog`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/remote_catalog.rs
+/// [`remote_catalog`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/remote_catalog.rs
 ///
 /// The [`CatalogProvider`] can support this use case, but it takes some care.
 /// The planning APIs in DataFusion are not `async` and thus network IO can not
@@ -100,7 +100,7 @@ use datafusion_common::Result;
 ///
 /// [`datafusion-cli`]: https://datafusion.apache.org/user-guide/cli/index.html
 /// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
-/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs
+/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/catalog.rs
 /// [delta-rs]: https://github.com/delta-io/delta-rs
 /// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
 ///
diff --git a/datafusion/catalog/src/cte_worktable.rs b/datafusion/catalog/src/cte_worktable.rs
index d6b2a453118c9..9565dcc60141e 100644
--- a/datafusion/catalog/src/cte_worktable.rs
+++ b/datafusion/catalog/src/cte_worktable.rs
@@ -17,20 +17,18 @@
 
 //! CteWorkTable implementation used for recursive queries
 
+use std::any::Any;
+use std::borrow::Cow;
 use std::sync::Arc;
-use std::{any::Any, borrow::Cow};
 
-use crate::Session;
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
-use datafusion_physical_plan::work_table::WorkTableExec;
-
-use datafusion_physical_plan::ExecutionPlan;
-
 use datafusion_common::error::Result;
 use datafusion_expr::{Expr, LogicalPlan, TableProviderFilterPushDown, TableType};
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::work_table::WorkTableExec;
 
-use crate::TableProvider;
+use crate::{ScanArgs, ScanResult, Session, TableProvider};
 
 /// The temporary working table where the previous iteration of a recursive query is stored
 /// Naming is based on PostgreSQL's implementation.
@@ -85,16 +83,28 @@ impl TableProvider for CteWorkTable {
 
     async fn scan(
         &self,
-        _state: &dyn Session,
-        _projection: Option<&Vec<usize>>,
-        _filters: &[Expr],
-        _limit: Option<usize>,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        // TODO: pushdown filters and limits
-        Ok(Arc::new(WorkTableExec::new(
+        let options = ScanArgs::default()
+            .with_projection(projection.map(|p| p.as_slice()))
+            .with_filters(Some(filters))
+            .with_limit(limit);
+        Ok(self.scan_with_args(state, options).await?.into_inner())
+    }
+
+    async fn scan_with_args<'a>(
+        &self,
+        _state: &dyn Session,
+        args: ScanArgs<'a>,
+    ) -> Result<ScanResult> {
+        Ok(ScanResult::new(Arc::new(WorkTableExec::new(
             self.name.clone(),
             Arc::clone(&self.table_schema),
-        )))
+            args.projection().map(|p| p.to_vec()),
+        )?)))
     }
 
     fn supports_filters_pushdown(
diff --git a/datafusion/catalog/src/default_table_source.rs b/datafusion/catalog/src/default_table_source.rs
index 11963c06c88f5..fb6531ba0b2ee 100644
--- a/datafusion/catalog/src/default_table_source.rs
+++ b/datafusion/catalog/src/default_table_source.rs
@@ -23,7 +23,7 @@ use std::{any::Any, borrow::Cow};
 use crate::TableProvider;
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, Constraints};
+use datafusion_common::{Constraints, internal_err};
 use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource, TableType};
 
 /// Implements [`TableSource`] for a [`TableProvider`]
diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs
index d733551f44051..52bfeca3d4282 100644
--- a/datafusion/catalog/src/information_schema.rs
+++ b/datafusion/catalog/src/information_schema.rs
@@ -28,16 +28,17 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use async_trait::async_trait;
+use datafusion_common::DataFusionError;
 use datafusion_common::config::{ConfigEntry, ConfigOptions};
 use datafusion_common::error::Result;
 use datafusion_common::types::NativeType;
-use datafusion_common::DataFusionError;
 use datafusion_execution::TaskContext;
+use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF};
 use datafusion_expr::{TableType, Volatility};
+use datafusion_physical_plan::SendableRecordBatchStream;
 use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion_physical_plan::streaming::PartitionStream;
-use datafusion_physical_plan::SendableRecordBatchStream;
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fmt::Debug;
 use std::{any::Any, sync::Arc};
@@ -137,11 +138,11 @@ impl InformationSchemaConfig {
             let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
 
             for schema_name in catalog.schema_names() {
-                if schema_name != INFORMATION_SCHEMA {
-                    if let Some(schema) = catalog.schema(&schema_name) {
-                        let schema_owner = schema.owner_name();
-                        builder.add_schemata(&catalog_name, &schema_name, schema_owner);
-                    }
+                if schema_name != INFORMATION_SCHEMA
+                    && let Some(schema) = catalog.schema(&schema_name)
+                {
+                    let schema_owner = schema.owner_name();
+                    builder.add_schemata(&catalog_name, &schema_name, schema_owner);
                 }
             }
         }
@@ -215,11 +216,16 @@ impl InformationSchemaConfig {
     fn make_df_settings(
         &self,
         config_options: &ConfigOptions,
+        runtime_env: &Arc<RuntimeEnv>,
         builder: &mut InformationSchemaDfSettingsBuilder,
     ) {
         for entry in config_options.entries() {
             builder.add_setting(entry);
         }
+        // Add runtime configuration entries
+        for entry in runtime_env.config_entries() {
+            builder.add_setting(entry);
+        }
     }
 
     fn make_routines(
@@ -245,7 +251,7 @@ impl InformationSchemaConfig {
                     name,
                     "FUNCTION",
                     Self::is_deterministic(udf.signature()),
-                    return_type,
+                    return_type.as_ref(),
                     "SCALAR",
                     udf.documentation().map(|d| d.description.to_string()),
                     udf.documentation().map(|d| d.syntax_example.to_string()),
@@ -265,7 +271,7 @@ impl InformationSchemaConfig {
                     name,
                     "FUNCTION",
                     Self::is_deterministic(udaf.signature()),
-                    return_type,
+                    return_type.as_ref(),
                     "AGGREGATE",
                     udaf.documentation().map(|d| d.description.to_string()),
                     udaf.documentation().map(|d| d.syntax_example.to_string()),
@@ -285,7 +291,7 @@ impl InformationSchemaConfig {
                     name,
                     "FUNCTION",
                     Self::is_deterministic(udwf.signature()),
-                    return_type,
+                    return_type.as_ref(),
                     "WINDOW",
                     udwf.documentation().map(|d| d.description.to_string()),
                     udwf.documentation().map(|d| d.syntax_example.to_string()),
@@ -418,11 +424,11 @@ fn get_udf_args_and_return_types(
                 // only handle the function which implemented [`ScalarUDFImpl::return_type`] method
                 let return_type = udf
                     .return_type(&arg_types)
-                    .map(|t| remove_native_type_prefix(NativeType::from(t)))
+                    .map(|t| remove_native_type_prefix(&NativeType::from(t)))
                     .ok();
                 let arg_types = arg_types
                     .into_iter()
-                    .map(|t| remove_native_type_prefix(NativeType::from(t)))
+                    .map(|t| remove_native_type_prefix(&NativeType::from(t)))
                     .collect::<Vec<_>>();
                 (arg_types, return_type)
             })
@@ -445,10 +451,10 @@ fn get_udaf_args_and_return_types(
                 let return_type = udaf
                     .return_type(&arg_types)
                     .ok()
-                    .map(|t| remove_native_type_prefix(NativeType::from(t)));
+                    .map(|t| remove_native_type_prefix(&NativeType::from(t)));
                 let arg_types = arg_types
                     .into_iter()
-                    .map(|t| remove_native_type_prefix(NativeType::from(t)))
+                    .map(|t| remove_native_type_prefix(&NativeType::from(t)))
                     .collect::<Vec<_>>();
                 (arg_types, return_type)
             })
@@ -470,7 +476,7 @@ fn get_udwf_args_and_return_types(
                 // only handle the function which implemented [`ScalarUDFImpl::return_type`] method
                 let arg_types = arg_types
                     .into_iter()
-                    .map(|t| remove_native_type_prefix(NativeType::from(t)))
+                    .map(|t| remove_native_type_prefix(&NativeType::from(t)))
                     .collect::<Vec<_>>();
                 (arg_types, None)
             })
@@ -479,7 +485,7 @@ fn get_udwf_args_and_return_types(
 }
 
 #[inline]
-fn remove_native_type_prefix(native_type: NativeType) -> String {
+fn remove_native_type_prefix(native_type: &NativeType) -> String {
     format!("{native_type}")
 }
 
@@ -679,7 +685,7 @@ impl InformationSchemaViewBuilder {
         catalog_name: impl AsRef<str>,
         schema_name: impl AsRef<str>,
         table_name: impl AsRef<str>,
-        definition: Option<impl AsRef<str>>,
+        definition: Option<&(impl AsRef<str> + ?Sized)>,
     ) {
         // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name.as_ref());
@@ -1060,7 +1066,12 @@ impl PartitionStream for InformationSchemaDfSettings {
             // TODO: Stream this
             futures::stream::once(async move {
                 // create a mem table with the names of tables
-                config.make_df_settings(ctx.session_config().options(), &mut builder);
+                let runtime_env = ctx.runtime_env();
+                config.make_df_settings(
+                    ctx.session_config().options(),
+                    &runtime_env,
+                    &mut builder,
+                );
                 Ok(builder.finish())
             }),
         ))
@@ -1156,7 +1167,7 @@ struct InformationSchemaRoutinesBuilder {
 }
 
 impl InformationSchemaRoutinesBuilder {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     fn add_routine(
         &mut self,
         catalog_name: impl AsRef<str>,
@@ -1164,7 +1175,7 @@ impl InformationSchemaRoutinesBuilder {
         routine_name: impl AsRef<str>,
         routine_type: impl AsRef<str>,
         is_deterministic: bool,
-        data_type: Option<impl AsRef<str>>,
+        data_type: Option<&impl AsRef<str>>,
         function_type: impl AsRef<str>,
         description: Option<impl AsRef<str>>,
         syntax_example: Option<impl AsRef<str>>,
@@ -1290,7 +1301,7 @@ struct InformationSchemaParametersBuilder {
 }
 
 impl InformationSchemaParametersBuilder {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     fn add_parameter(
         &mut self,
         specific_catalog: impl AsRef<str>,
@@ -1298,7 +1309,7 @@ impl InformationSchemaParametersBuilder {
         specific_name: impl AsRef<str>,
         ordinal_position: u64,
         parameter_mode: impl AsRef<str>,
-        parameter_name: Option<impl AsRef<str>>,
+        parameter_name: Option<&(impl AsRef<str> + ?Sized)>,
         data_type: impl AsRef<str>,
         parameter_default: Option<impl AsRef<str>>,
         is_variadic: bool,
@@ -1397,7 +1408,9 @@ mod tests {
         // InformationSchemaConfig::make_tables used this before `table_type`
         // existed but should not, as it may be expensive.
         async fn table(&self, _: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-            panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type")
+            panic!(
+                "InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type"
+            )
         }
 
         fn as_any(&self) -> &dyn Any {
diff --git a/datafusion/catalog/src/lib.rs b/datafusion/catalog/src/lib.rs
index 1c5e38438724e..d1cd3998fecf1 100644
--- a/datafusion/catalog/src/lib.rs
+++ b/datafusion/catalog/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 //! Interfaces and default implementations of catalogs and schemas.
 //!
@@ -46,13 +48,13 @@ mod dynamic_file;
 mod schema;
 mod table;
 
+pub use r#async::*;
 pub use catalog::*;
 pub use datafusion_session::Session;
 pub use dynamic_file::catalog::*;
 pub use memory::{
     MemTable, MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
 };
-pub use r#async::*;
 pub use schema::*;
 pub use table::*;
 
diff --git a/datafusion/catalog/src/listing_schema.rs b/datafusion/catalog/src/listing_schema.rs
index af96cfc15fc82..77fbea8577089 100644
--- a/datafusion/catalog/src/listing_schema.rs
+++ b/datafusion/catalog/src/listing_schema.rs
@@ -26,7 +26,7 @@ use crate::{SchemaProvider, TableProvider, TableProviderFactory};
 
 use crate::Session;
 use datafusion_common::{
-    internal_datafusion_err, DFSchema, DataFusionError, HashMap, TableReference,
+    DFSchema, DataFusionError, HashMap, TableReference, internal_datafusion_err,
 };
 use datafusion_expr::CreateExternalTable;
 
@@ -127,22 +127,13 @@ impl ListingSchemaProvider {
                     .factory
                     .create(
                         state,
-                        &CreateExternalTable {
-                            schema: Arc::new(DFSchema::empty()),
+                        &CreateExternalTable::builder(
                             name,
-                            location: table_url,
-                            file_type: self.format.clone(),
-                            table_partition_cols: vec![],
-                            if_not_exists: false,
-                            or_replace: false,
-                            temporary: false,
-                            definition: None,
-                            order_exprs: vec![],
-                            unbounded: false,
-                            options: Default::default(),
-                            constraints: Default::default(),
-                            column_defaults: Default::default(),
-                        },
+                            table_url,
+                            self.format.clone(),
+                            Arc::new(DFSchema::empty()),
+                        )
+                        .build(),
                     )
                     .await?;
                 let _ =
diff --git a/datafusion/catalog/src/memory/schema.rs b/datafusion/catalog/src/memory/schema.rs
index f1b3628f7affc..97a579b021617 100644
--- a/datafusion/catalog/src/memory/schema.rs
+++ b/datafusion/catalog/src/memory/schema.rs
@@ -20,7 +20,7 @@
 use crate::{SchemaProvider, TableProvider};
 use async_trait::async_trait;
 use dashmap::DashMap;
-use datafusion_common::{exec_err, DataFusionError};
+use datafusion_common::{DataFusionError, exec_err};
 use std::any::Any;
 use std::sync::Arc;
 
diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs
index 90224f6a37bc3..47f773fe9befd 100644
--- a/datafusion/catalog/src/memory/table.rs
+++ b/datafusion/catalog/src/memory/table.rs
@@ -27,17 +27,17 @@ use crate::TableProvider;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::error::Result;
-use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
+use datafusion_common::{Constraints, DFSchema, SchemaExt, not_impl_err, plan_err};
 use datafusion_common_runtime::JoinSet;
 use datafusion_datasource::memory::{MemSink, MemorySourceConfig};
 use datafusion_datasource::sink::DataSinkExec;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::{Expr, SortExpr, TableType};
-use datafusion_physical_expr::{create_physical_sort_exprs, LexOrdering};
+use datafusion_physical_expr::{LexOrdering, create_physical_sort_exprs};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::{
-    common, ExecutionPlan, ExecutionPlanProperties, Partitioning,
+    ExecutionPlan, ExecutionPlanProperties, Partitioning, common,
 };
 use datafusion_session::Session;
 
diff --git a/datafusion/catalog/src/schema.rs b/datafusion/catalog/src/schema.rs
index 9ba55256f1824..c6299582813b4 100644
--- a/datafusion/catalog/src/schema.rs
+++ b/datafusion/catalog/src/schema.rs
@@ -19,7 +19,7 @@
 //! representing collections of named tables.
 
 use async_trait::async_trait;
-use datafusion_common::{exec_err, DataFusionError};
+use datafusion_common::{DataFusionError, exec_err};
 use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
@@ -68,7 +68,7 @@ pub trait SchemaProvider: Debug + Sync + Send {
     ///
     /// If a table of the same name was already registered, returns "Table
     /// already exists" error.
-    #[allow(unused_variables)]
+    #[expect(unused_variables)]
     fn register_table(
         &self,
         name: String,
@@ -81,7 +81,7 @@ pub trait SchemaProvider: Debug + Sync + Send {
     /// schema and returns the previously registered [`TableProvider`], if any.
     ///
     /// If no `name` table exists, returns Ok(None).
-    #[allow(unused_variables)]
+    #[expect(unused_variables)]
     fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
         exec_err!("schema provider does not support deregistering tables")
     }
diff --git a/datafusion/catalog/src/stream.rs b/datafusion/catalog/src/stream.rs
index f4a2338b8eecb..bdd72a1b1d70b 100644
--- a/datafusion/catalog/src/stream.rs
+++ b/datafusion/catalog/src/stream.rs
@@ -28,7 +28,7 @@ use std::sync::Arc;
 use crate::{Session, TableProvider, TableProviderFactory};
 use arrow::array::{RecordBatch, RecordBatchReader, RecordBatchWriter};
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result};
+use datafusion_common::{Constraints, DataFusionError, Result, config_err, plan_err};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_datasource::sink::{DataSink, DataSinkExec};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
diff --git a/datafusion/catalog/src/streaming.rs b/datafusion/catalog/src/streaming.rs
index 082e74dab9a15..31669171b291a 100644
--- a/datafusion/catalog/src/streaming.rs
+++ b/datafusion/catalog/src/streaming.rs
@@ -24,11 +24,11 @@ use crate::Session;
 use crate::TableProvider;
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{plan_err, DFSchema, Result};
+use datafusion_common::{DFSchema, Result, plan_err};
 use datafusion_expr::{Expr, SortExpr, TableType};
-use datafusion_physical_expr::{create_physical_sort_exprs, LexOrdering};
-use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
+use datafusion_physical_expr::{LexOrdering, create_physical_sort_exprs};
 use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 
 use async_trait::async_trait;
 use log::debug;
diff --git a/datafusion/catalog/src/table.rs b/datafusion/catalog/src/table.rs
index 11c9af01a7a54..cabdb22c62ae5 100644
--- a/datafusion/catalog/src/table.rs
+++ b/datafusion/catalog/src/table.rs
@@ -24,7 +24,7 @@ use crate::session::Session;
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 use datafusion_common::Result;
-use datafusion_common::{not_impl_err, Constraints, Statistics};
+use datafusion_common::{Constraints, Statistics, not_impl_err};
 use datafusion_expr::Expr;
 
 use datafusion_expr::dml::InsertOp;
diff --git a/datafusion/catalog/src/view.rs b/datafusion/catalog/src/view.rs
index 89c6a4a224511..54c54431a5913 100644
--- a/datafusion/catalog/src/view.rs
+++ b/datafusion/catalog/src/view.rs
@@ -24,8 +24,8 @@ use crate::TableProvider;
 
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
-use datafusion_common::error::Result;
 use datafusion_common::Column;
+use datafusion_common::error::Result;
 use datafusion_expr::TableType;
 use datafusion_expr::{Expr, LogicalPlan};
 use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
diff --git a/datafusion/common-runtime/src/common.rs b/datafusion/common-runtime/src/common.rs
index cebd6e04cd1b1..ca618b19ed2f1 100644
--- a/datafusion/common-runtime/src/common.rs
+++ b/datafusion/common-runtime/src/common.rs
@@ -44,7 +44,7 @@ impl<R: 'static> SpawnedTask<R> {
         R: Send,
     {
         // Ok to use spawn here as SpawnedTask handles aborting/cancelling the task on Drop
-        #[allow(clippy::disallowed_methods)]
+        #[expect(clippy::disallowed_methods)]
         let inner = tokio::task::spawn(trace_future(task));
         Self { inner }
     }
@@ -56,7 +56,7 @@ impl<R: 'static> SpawnedTask<R> {
         R: Send,
     {
         // Ok to use spawn_blocking here as SpawnedTask handles aborting/cancelling the task on Drop
-        #[allow(clippy::disallowed_methods)]
+        #[expect(clippy::disallowed_methods)]
         let inner = tokio::task::spawn_blocking(trace_block(task));
         Self { inner }
     }
@@ -115,14 +115,14 @@ impl<R> Drop for SpawnedTask<R> {
 mod tests {
     use super::*;
 
-    use std::future::{pending, Pending};
+    use std::future::{Pending, pending};
 
     use tokio::{runtime::Runtime, sync::oneshot};
 
     #[tokio::test]
     async fn runtime_shutdown() {
         let rt = Runtime::new().unwrap();
-        #[allow(clippy::async_yields_async)]
+        #[expect(clippy::async_yields_async)]
         let task = rt
             .spawn(async {
                 SpawnedTask::spawn(async {
diff --git a/datafusion/common-runtime/src/lib.rs b/datafusion/common-runtime/src/lib.rs
index 5d404d99e7760..fdbfe7f2390ca 100644
--- a/datafusion/common-runtime/src/lib.rs
+++ b/datafusion/common-runtime/src/lib.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -31,5 +33,5 @@ mod trace_utils;
 pub use common::SpawnedTask;
 pub use join_set::JoinSet;
 pub use trace_utils::{
-    set_join_set_tracer, trace_block, trace_future, JoinSetTracer, JoinSetTracerError,
+    JoinSetTracer, JoinSetTracerError, set_join_set_tracer, trace_block, trace_future,
 };
diff --git a/datafusion/common-runtime/src/trace_utils.rs b/datafusion/common-runtime/src/trace_utils.rs
index c3a39c355fc88..f8adbe8825bc1 100644
--- a/datafusion/common-runtime/src/trace_utils.rs
+++ b/datafusion/common-runtime/src/trace_utils.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use futures::future::BoxFuture;
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use std::any::Any;
 use std::error::Error;
 use std::fmt::{Display, Formatter, Result as FmtResult};
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index b222ae12b92f5..262f50839563a 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -48,15 +48,18 @@ parquet_encryption = [
     "parquet/encryption",
     "dep:hex",
 ]
-pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
 force_hash_collisions = []
 recursive_protection = ["dep:recursive"]
 parquet = ["dep:parquet"]
 sql = ["sqlparser"]
 
+[[bench]]
+harness = false
+name = "with_hashes"
+
 [dependencies]
 ahash = { workspace = true }
-apache-avro = { version = "0.20", default-features = false, features = [
+apache-avro = { workspace = true, features = [
     "bzip",
     "snappy",
     "xz",
@@ -73,8 +76,7 @@ libc = "0.2.177"
 log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
-paste = "1.0.15"
-pyo3 = { version = "0.26", optional = true }
+paste = { workspace = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true, optional = true }
 tokio = { workspace = true }
@@ -84,6 +86,7 @@ web-time = "1.1.0"
 
 [dev-dependencies]
 chrono = { workspace = true }
+criterion = { workspace = true }
 insta = { workspace = true }
 rand = { workspace = true }
 sqlparser = { workspace = true }
diff --git a/datafusion/common/benches/with_hashes.rs b/datafusion/common/benches/with_hashes.rs
new file mode 100644
index 0000000000000..8154c20df88f3
--- /dev/null
+++ b/datafusion/common/benches/with_hashes.rs
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmarks for `with_hashes` function
+
+use ahash::RandomState;
+use arrow::array::{
+    Array, ArrayRef, ArrowPrimitiveType, DictionaryArray, GenericStringArray,
+    NullBufferBuilder, OffsetSizeTrait, PrimitiveArray, StringViewArray, make_array,
+};
+use arrow::buffer::NullBuffer;
+use arrow::datatypes::{ArrowDictionaryKeyType, Int32Type, Int64Type};
+use criterion::{Bencher, Criterion, criterion_group, criterion_main};
+use datafusion_common::hash_utils::with_hashes;
+use rand::Rng;
+use rand::SeedableRng;
+use rand::distr::{Alphanumeric, Distribution, StandardUniform};
+use rand::prelude::StdRng;
+use std::sync::Arc;
+
+const BATCH_SIZE: usize = 8192;
+
+struct BenchData {
+    name: &'static str,
+    array: ArrayRef,
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let pool = StringPool::new(100, 64);
+    // poll with small strings for string view tests (<=12 bytes are inlined)
+    let small_pool = StringPool::new(100, 5);
+    let cases = [
+        BenchData {
+            name: "int64",
+            array: primitive_array::<Int64Type>(BATCH_SIZE),
+        },
+        BenchData {
+            name: "utf8",
+            array: pool.string_array::<i32>(BATCH_SIZE),
+        },
+        BenchData {
+            name: "large_utf8",
+            array: pool.string_array::<i64>(BATCH_SIZE),
+        },
+        BenchData {
+            name: "utf8_view",
+            array: pool.string_view_array(BATCH_SIZE),
+        },
+        BenchData {
+            name: "utf8_view (small)",
+            array: small_pool.string_view_array(BATCH_SIZE),
+        },
+        BenchData {
+            name: "dictionary_utf8_int32",
+            array: pool.dictionary_array::<Int32Type>(BATCH_SIZE),
+        },
+    ];
+
+    for BenchData { name, array } in cases {
+        // with_hash has different code paths for single vs multiple arrays and nulls vs no nulls
+        let nullable_array = add_nulls(&array);
+        c.bench_function(&format!("{name}: single, no nulls"), |b| {
+            do_hash_test(b, std::slice::from_ref(&array));
+        });
+        c.bench_function(&format!("{name}: single, nulls"), |b| {
+            do_hash_test(b, std::slice::from_ref(&nullable_array));
+        });
+        c.bench_function(&format!("{name}: multiple, no nulls"), |b| {
+            let arrays = vec![array.clone(), array.clone(), array.clone()];
+            do_hash_test(b, &arrays);
+        });
+        c.bench_function(&format!("{name}: multiple, nulls"), |b| {
+            let arrays = vec![
+                nullable_array.clone(),
+                nullable_array.clone(),
+                nullable_array.clone(),
+            ];
+            do_hash_test(b, &arrays);
+        });
+    }
+}
+
+fn do_hash_test(b: &mut Bencher, arrays: &[ArrayRef]) {
+    let state = RandomState::new();
+    b.iter(|| {
+        with_hashes(arrays, &state, |hashes| {
+            assert_eq!(hashes.len(), BATCH_SIZE); // make sure the result is used
+            Ok(())
+        })
+        .unwrap();
+    });
+}
+
+fn create_null_mask(len: usize) -> NullBuffer
+where
+    StandardUniform: Distribution<bool>,
+{
+    let mut rng = make_rng();
+    let null_density = 0.03;
+    let mut builder = NullBufferBuilder::new(len);
+    for _ in 0..len {
+        if rng.random::<f32>() < null_density {
+            builder.append_null();
+        } else {
+            builder.append_non_null();
+        }
+    }
+    builder.finish().expect("should be nulls in buffer")
+}
+
+// Returns an new array that is the same as array, but with nulls
+fn add_nulls(array: &ArrayRef) -> ArrayRef {
+    let array_data = array
+        .clone()
+        .into_data()
+        .into_builder()
+        .nulls(Some(create_null_mask(array.len())))
+        .build()
+        .unwrap();
+    make_array(array_data)
+}
+
+pub fn make_rng() -> StdRng {
+    StdRng::seed_from_u64(42)
+}
+
+/// String pool for generating low cardinality data (for dictionaries and string views)
+struct StringPool {
+    strings: Vec<String>,
+}
+
+impl StringPool {
+    /// Create a new string pool with the given number of random strings
+    /// each having between 1 and max_length characters.
+    fn new(pool_size: usize, max_length: usize) -> Self {
+        let mut rng = make_rng();
+        let mut strings = Vec::with_capacity(pool_size);
+        for _ in 0..pool_size {
+            let len = rng.random_range(1..=max_length);
+            let value: Vec<u8> =
+                rng.clone().sample_iter(&Alphanumeric).take(len).collect();
+            strings.push(String::from_utf8(value).unwrap());
+        }
+        Self { strings }
+    }
+
+    /// Return an iterator over &str of the given length with values randomly chosen from the pool
+    fn iter_strings(&self, len: usize) -> impl Iterator<Item = &str> {
+        let mut rng = make_rng();
+        (0..len).map(move |_| {
+            let idx = rng.random_range(0..self.strings.len());
+            self.strings[idx].as_str()
+        })
+    }
+
+    /// Return a StringArray of the given length with values randomly chosen from the pool
+    fn string_array<O: OffsetSizeTrait>(&self, array_length: usize) -> ArrayRef {
+        Arc::new(GenericStringArray::<O>::from_iter_values(
+            self.iter_strings(array_length),
+        ))
+    }
+
+    /// Return a StringViewArray of the given length with values randomly chosen from the pool
+    fn string_view_array(&self, array_length: usize) -> ArrayRef {
+        Arc::new(StringViewArray::from_iter_values(
+            self.iter_strings(array_length),
+        ))
+    }
+
+    /// Return a DictionaryArray of the given length with values randomly chosen from the pool
+    fn dictionary_array<T: ArrowDictionaryKeyType>(
+        &self,
+        array_length: usize,
+    ) -> ArrayRef {
+        Arc::new(DictionaryArray::<T>::from_iter(
+            self.iter_strings(array_length),
+        ))
+    }
+}
+
+pub fn primitive_array<T>(array_len: usize) -> ArrayRef
+where
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    let mut rng = make_rng();
+
+    let array: PrimitiveArray<T> = (0..array_len)
+        .map(|_| Some(rng.random::<T::Native>()))
+        .collect();
+    Arc::new(array)
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs
index b95167ca13908..29082cc303a70 100644
--- a/datafusion/common/src/cast.rs
+++ b/datafusion/common/src/cast.rs
@@ -20,11 +20,11 @@
 //! but provide an error message rather than a panic, as the corresponding
 //! kernels in arrow-rs such as `as_boolean_array` do.
 
-use crate::{downcast_value, Result};
+use crate::{Result, downcast_value};
 use arrow::array::{
     BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
     DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
-    Int16Array, Int8Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
+    Int8Array, Int16Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
     ListViewArray, StringViewArray, UInt16Array,
 };
 use arrow::{
@@ -37,8 +37,8 @@ use arrow::{
         MapArray, NullArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
         Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
         Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray, UInt32Array, UInt64Array,
-        UInt8Array, UnionArray,
+        TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt32Array,
+        UInt64Array, UnionArray,
     },
     datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
 };
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 0ed499da04757..2bea2ec5a4526 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -157,12 +157,10 @@ macro_rules! config_namespace {
                             // $(#[allow(deprecated)])?
                             {
                                 $(let value = $transform(value);)? // Apply transformation if specified
-                                #[allow(deprecated)]
                                 let ret = self.$field_name.set(rem, value.as_ref());
 
                                 $(if !$warn.is_empty() {
                                     let default: $field_type = $default;
-                                    #[allow(deprecated)]
                                     if default != self.$field_name {
                                         log::warn!($warn);
                                     }
@@ -181,14 +179,36 @@ macro_rules! config_namespace {
                 $(
                     let key = format!(concat!("{}.", stringify!($field_name)), key_prefix);
                     let desc = concat!($($d),*).trim();
-                    #[allow(deprecated)]
                     self.$field_name.visit(v, key.as_str(), desc);
                 )*
             }
+
+            fn reset(&mut self, key: &str) -> $crate::error::Result<()> {
+                let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+                match key {
+                    $(
+                        stringify!($field_name) => {
+                                    {
+                                if rem.is_empty() {
+                                    let default_value: $field_type = $default;
+                                    self.$field_name = default_value;
+                                    Ok(())
+                                } else {
+                                    self.$field_name.reset(rem)
+                                }
+                            }
+                        },
+                    )*
+                    _ => $crate::error::_config_err!(
+                        "Config value \"{}\" not found on {}",
+                        key,
+                        stringify!($struct_name)
+                    ),
+                }
+            }
         }
         impl Default for $struct_name {
             fn default() -> Self {
-                #[allow(deprecated)]
                 Self {
                     $($field_name: $default),*
                 }
@@ -606,6 +626,29 @@ config_namespace! {
         /// written, it may be necessary to increase this size to avoid errors from
         /// the remote end point.
         pub objectstore_writer_buffer_size: usize, default = 10 * 1024 * 1024
+
+        /// Whether to enable ANSI SQL mode.
+        ///
+        /// The flag is experimental and relevant only for DataFusion Spark built-in functions
+        ///
+        /// When `enable_ansi_mode` is set to `true`, the query engine follows ANSI SQL
+        /// semantics for expressions, casting, and error handling. This means:
+        /// - **Strict type coercion rules:** implicit casts between incompatible types are disallowed.
+        /// - **Standard SQL arithmetic behavior:** operations such as division by zero,
+        ///   numeric overflow, or invalid casts raise runtime errors rather than returning
+        ///   `NULL` or adjusted values.
+        /// - **Consistent ANSI behavior** for string concatenation, comparisons, and `NULL` handling.
+        ///
+        /// When `enable_ansi_mode` is `false` (the default), the engine uses a more permissive,
+        /// non-ANSI mode designed for user convenience and backward compatibility. In this mode:
+        /// - Implicit casts between types are allowed (e.g., string to integer when possible).
+        /// - Arithmetic operations are more lenient — for example, `abs()` on the minimum
+        ///   representable integer value returns the input value instead of raising overflow.
+        /// - Division by zero or invalid casts may return `NULL` instead of failing.
+        ///
+        /// # Default
+        /// `false` — ANSI SQL mode is disabled by default.
+        pub enable_ansi_mode: bool, default = false
     }
 }
 
@@ -651,6 +694,12 @@ config_namespace! {
         /// the filters are applied in the same order as written in the query
         pub reorder_filters: bool, default = false
 
+        /// (reading) Force the use of RowSelections for filter results, when
+        /// pushdown_filters is enabled. If false, the reader will automatically
+        /// choose between a RowSelection and a Bitmap based on the number and
+        /// pattern of selected rows.
+        pub force_filter_selections: bool, default = false
+
         /// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
         /// and `Binary/BinaryLarge` with `BinaryView`.
         pub schema_force_view_types: bool, default = true
@@ -861,12 +910,16 @@ config_namespace! {
         /// into the file scan phase.
         pub enable_join_dynamic_filter_pushdown: bool, default = true
 
-        /// When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase.
+        /// When set to true, the optimizer will attempt to push down Aggregate dynamic filters
+        /// into the file scan phase.
+        pub enable_aggregate_dynamic_filter_pushdown: bool, default = true
+
+        /// When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase.
         /// For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer
         /// will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans.
         /// This means that if we already have 10 timestamps in the year 2025
         /// any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.
-        /// The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown`
+        /// The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown`
         /// So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
         pub enable_dynamic_filter_pushdown: bool, default = true
 
@@ -912,6 +965,19 @@ config_namespace! {
         /// record tables provided to the MemTable on creation.
         pub repartition_file_scans: bool, default = true
 
+        /// Minimum number of distinct partition values required to group files by their
+        /// Hive partition column values (enabling Hash partitioning declaration).
+        ///
+        /// How the option is used:
+        ///     - preserve_file_partitions=0: Disable it.
+        ///     - preserve_file_partitions=1: Always enable it.
+        ///     - preserve_file_partitions=N, actual file partitions=M: Only enable when M >= N.
+        ///     This threshold preserves I/O parallelism when file partitioning is below it.
+        ///
+        /// Note: This may reduce parallelism, rooting from the I/O level, if the number of distinct
+        /// partitions is less than the target_partitions.
+        pub preserve_file_partitions: usize, default = 0
+
         /// Should DataFusion repartition data using the partitions keys to execute window
         /// functions in parallel using the provided `target_partitions` level
         pub repartition_windows: bool, default = true
@@ -934,6 +1000,34 @@ config_namespace! {
         /// ```
         pub repartition_sorts: bool, default = true
 
+        /// Partition count threshold for subset satisfaction optimization.
+        ///
+        /// When the current partition count is >= this threshold, DataFusion will
+        /// skip repartitioning if the required partitioning expression is a subset
+        /// of the current partition expression such as Hash(a) satisfies Hash(a, b).
+        ///
+        /// When the current partition count is < this threshold, DataFusion will
+        /// repartition to increase parallelism even when subset satisfaction applies.
+        ///
+        /// Set to 0 to always repartition (disable subset satisfaction optimization).
+        /// Set to a high value to always use subset satisfaction.
+        ///
+        /// Example (subset_repartition_threshold = 4):
+        /// ```text
+        ///     Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a])
+        ///
+        ///     If current partitions (3) < threshold (4), repartition:
+        ///     AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)]
+        ///       RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3
+        ///         AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)]
+        ///           DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3)
+        ///
+        ///     If current partitions (8) >= threshold (4), use subset satisfaction:
+        ///     AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)]
+        ///       DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8)
+        /// ```
+        pub subset_repartition_threshold: usize, default = 4
+
         /// When true, DataFusion will opportunistically remove sorts when the data is already sorted,
         /// (i.e. setting `preserve_order` to true on `RepartitionExec`  and
         /// using `SortPreservingMergeExec`)
@@ -971,6 +1065,36 @@ config_namespace! {
         /// will be collected into a single partition
         pub hash_join_single_partition_threshold_rows: usize, default = 1024 * 128
 
+        /// Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering.
+        /// Build sides larger than this will use hash table lookups instead.
+        /// Set to 0 to always use hash table lookups.
+        ///
+        /// InList pushdown can be more efficient for small build sides because it can result in better
+        /// statistics pruning as well as use any bloom filters present on the scan side.
+        /// InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion.
+        /// On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory.
+        ///
+        /// This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` * `target_partitions` memory.
+        ///
+        /// The default is 128kB per partition.
+        /// This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases
+        /// but avoids excessive memory usage or overhead for larger joins.
+        pub hash_join_inlist_pushdown_max_size: usize, default = 128 * 1024
+
+        /// Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering.
+        /// Build sides with more rows than this will use hash table lookups instead.
+        /// Set to 0 to always use hash table lookups.
+        ///
+        /// This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent
+        /// very large IN lists that might not provide much benefit over hash table lookups.
+        ///
+        /// This uses the deduplicated row count once the build side has been evaluated.
+        ///
+        /// The default is 150 values per partition.
+        /// This is inspired by Trino's `max-filter-keys-per-column` setting.
+        /// See: <https://trino.io/docs/current/admin/dynamic-filtering.html#dynamic-filter-collection-thresholds>
+        pub hash_join_inlist_pushdown_max_distinct_values: usize, default = 150
+
         /// The default filter selectivity used by Filter Statistics
         /// when an exact selectivity cannot be determined. Valid values are
         /// between 0 (no selectivity) and 100 (all rows are selected).
@@ -983,6 +1107,21 @@ config_namespace! {
         /// then the output will be coerced to a non-view.
         /// Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
         pub expand_views_at_output: bool, default = false
+
+        /// Enable sort pushdown optimization.
+        /// When enabled, attempts to push sort requirements down to data sources
+        /// that can natively handle them (e.g., by reversing file/row group read order).
+        ///
+        /// Returns **inexact ordering**: Sort operator is kept for correctness,
+        /// but optimized input enables early termination for TopK queries (ORDER BY ... LIMIT N),
+        /// providing significant speedup.
+        ///
+        /// Memory: No additional overhead (only changes read order).
+        ///
+        /// Future: Will add option to detect perfectly sorted data and eliminate Sort completely.
+        ///
+        /// Default: true
+        pub enable_sort_pushdown: bool, default = true
     }
 }
 
@@ -1073,7 +1212,7 @@ impl<'a> TryInto<arrow::util::display::FormatOptions<'a>> for &'a FormatOptions
                 return _config_err!(
                     "Invalid duration format: {}. Valid values are pretty or iso8601",
                     self.duration_format
-                )
+                );
             }
         };
 
@@ -1124,6 +1263,15 @@ pub struct ConfigOptions {
 }
 
 impl ConfigField for ConfigOptions {
+    fn visit<V: Visit>(&self, v: &mut V, _key_prefix: &str, _description: &'static str) {
+        self.catalog.visit(v, "datafusion.catalog", "");
+        self.execution.visit(v, "datafusion.execution", "");
+        self.optimizer.visit(v, "datafusion.optimizer", "");
+        self.explain.visit(v, "datafusion.explain", "");
+        self.sql_parser.visit(v, "datafusion.sql_parser", "");
+        self.format.visit(v, "datafusion.format", "");
+    }
+
     fn set(&mut self, key: &str, value: &str) -> Result<()> {
         // Extensions are handled in the public `ConfigOptions::set`
         let (key, rem) = key.split_once('.').unwrap_or((key, ""));
@@ -1138,13 +1286,43 @@ impl ConfigField for ConfigOptions {
         }
     }
 
-    fn visit<V: Visit>(&self, v: &mut V, _key_prefix: &str, _description: &'static str) {
-        self.catalog.visit(v, "datafusion.catalog", "");
-        self.execution.visit(v, "datafusion.execution", "");
-        self.optimizer.visit(v, "datafusion.optimizer", "");
-        self.explain.visit(v, "datafusion.explain", "");
-        self.sql_parser.visit(v, "datafusion.sql_parser", "");
-        self.format.visit(v, "datafusion.format", "");
+    /// Reset a configuration option back to its default value
+    fn reset(&mut self, key: &str) -> Result<()> {
+        let Some((prefix, rest)) = key.split_once('.') else {
+            return _config_err!("could not find config namespace for key \"{key}\"");
+        };
+
+        if prefix != "datafusion" {
+            return _config_err!("Could not find config namespace \"{prefix}\"");
+        }
+
+        let (section, rem) = rest.split_once('.').unwrap_or((rest, ""));
+        if rem.is_empty() {
+            return _config_err!("could not find config field for key \"{key}\"");
+        }
+
+        match section {
+            "catalog" => self.catalog.reset(rem),
+            "execution" => self.execution.reset(rem),
+            "optimizer" => {
+                if rem == "enable_dynamic_filter_pushdown" {
+                    let defaults = OptimizerOptions::default();
+                    self.optimizer.enable_dynamic_filter_pushdown =
+                        defaults.enable_dynamic_filter_pushdown;
+                    self.optimizer.enable_topk_dynamic_filter_pushdown =
+                        defaults.enable_topk_dynamic_filter_pushdown;
+                    self.optimizer.enable_join_dynamic_filter_pushdown =
+                        defaults.enable_join_dynamic_filter_pushdown;
+                    Ok(())
+                } else {
+                    self.optimizer.reset(rem)
+                }
+            }
+            "explain" => self.explain.reset(rem),
+            "sql_parser" => self.sql_parser.reset(rem),
+            "format" => self.format.reset(rem),
+            other => _config_err!("Config value \"{other}\" not found on ConfigOptions"),
+        }
     }
 }
 
@@ -1178,6 +1356,7 @@ impl ConfigOptions {
                     self.optimizer.enable_dynamic_filter_pushdown = bool_value;
                     self.optimizer.enable_topk_dynamic_filter_pushdown = bool_value;
                     self.optimizer.enable_join_dynamic_filter_pushdown = bool_value;
+                    self.optimizer.enable_aggregate_dynamic_filter_pushdown = bool_value;
                 }
                 return Ok(());
             }
@@ -1437,6 +1616,14 @@ impl Extensions {
         let e = self.0.get_mut(T::PREFIX)?;
         e.0.as_any_mut().downcast_mut()
     }
+
+    /// Iterates all the config extension entries yielding their prefix and their
+    /// [ExtensionOptions] implementation.
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (&'static str, &Box<dyn ExtensionOptions>)> {
+        self.0.iter().map(|(k, v)| (*k, &v.0))
+    }
 }
 
 #[derive(Debug)]
@@ -1454,6 +1641,10 @@ pub trait ConfigField {
     fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str);
 
     fn set(&mut self, key: &str, value: &str) -> Result<()>;
+
+    fn reset(&mut self, key: &str) -> Result<()> {
+        _config_err!("Reset is not supported for this config field, key: {}", key)
+    }
 }
 
 impl<F: ConfigField + Default> ConfigField for Option<F> {
@@ -1467,6 +1658,15 @@ impl<F: ConfigField + Default> ConfigField for Option<F> {
     fn set(&mut self, key: &str, value: &str) -> Result<()> {
         self.get_or_insert_with(Default::default).set(key, value)
     }
+
+    fn reset(&mut self, key: &str) -> Result<()> {
+        if key.is_empty() {
+            *self = Default::default();
+            Ok(())
+        } else {
+            self.get_or_insert_with(Default::default).reset(key)
+        }
+    }
 }
 
 /// Default transformation to parse a [`ConfigField`] for a string.
@@ -1531,6 +1731,19 @@ macro_rules! config_field {
                 *self = $transform;
                 Ok(())
             }
+
+            fn reset(&mut self, key: &str) -> $crate::error::Result<()> {
+                if key.is_empty() {
+                    *self = <$t as Default>::default();
+                    Ok(())
+                } else {
+                    $crate::error::_config_err!(
+                        "Config field is a scalar {} and does not have nested field \"{}\"",
+                        stringify!($t),
+                        key
+                    )
+                }
+            }
         }
     };
 }
@@ -1540,6 +1753,7 @@ config_field!(bool, value => default_config_transform(value.to_lowercase().as_st
 config_field!(usize);
 config_field!(f64);
 config_field!(u64);
+config_field!(u32);
 
 impl ConfigField for u8 {
     fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
@@ -1730,8 +1944,7 @@ macro_rules! extensions_options {
                             // Safely apply deprecated attribute if present
                             // $(#[allow(deprecated)])?
                             {
-                                #[allow(deprecated)]
-                                self.$field_name.set(rem, value.as_ref())
+                                            self.$field_name.set(rem, value.as_ref())
                             }
                         },
                     )*
@@ -1745,7 +1958,6 @@ macro_rules! extensions_options {
                 $(
                     let key = stringify!($field_name).to_string();
                     let desc = concat!($($d),*).trim();
-                    #[allow(deprecated)]
                     self.$field_name.visit(v, key.as_str(), desc);
                 )*
             }
@@ -2136,13 +2348,13 @@ impl ConfigField for TableParquetOptions {
                 [_meta] | [_meta, ""] => {
                     return _config_err!(
                         "Invalid metadata key provided, missing key in metadata::<key>"
-                    )
+                    );
                 }
                 [_meta, k] => k.into(),
                 _ => {
                     return _config_err!(
                         "Invalid metadata key provided, found too many '::' in \"{key}\""
-                    )
+                    );
                 }
             };
             self.key_value_metadata.insert(k, Some(value.into()));
@@ -2188,7 +2400,6 @@ macro_rules! config_namespace_with_hashmap {
                     $(
                        stringify!($field_name) => {
                            // Handle deprecated fields
-                           #[allow(deprecated)] // Allow deprecated fields
                            $(let value = $transform(value);)?
                            self.$field_name.set(rem, value.as_ref())
                        },
@@ -2204,7 +2415,6 @@ macro_rules! config_namespace_with_hashmap {
                 let key = format!(concat!("{}.", stringify!($field_name)), key_prefix);
                 let desc = concat!($($d),*).trim();
                 // Handle deprecated fields
-                #[allow(deprecated)]
                 self.$field_name.visit(v, key.as_str(), desc);
                 )*
             }
@@ -2212,7 +2422,6 @@ macro_rules! config_namespace_with_hashmap {
 
         impl Default for $struct_name {
             fn default() -> Self {
-                #[allow(deprecated)]
                 Self {
                     $($field_name: $default),*
                 }
@@ -2240,7 +2449,6 @@ macro_rules! config_namespace_with_hashmap {
                     $(
                     let key = format!("{}.{field}::{}", key_prefix, column_name, field = stringify!($field_name));
                     let desc = concat!($($d),*).trim();
-                    #[allow(deprecated)]
                     col_options.$field_name.visit(v, key.as_str(), desc);
                     )*
                 }
@@ -2539,7 +2747,7 @@ impl ConfigField for ConfigFileDecryptionProperties {
                 self.footer_signature_verification.set(rem, value.as_ref())
             }
             _ => _config_err!(
-                "Config value \"{}\" not found on ConfigFileEncryptionProperties",
+                "Config value \"{}\" not found on ConfigFileDecryptionProperties",
                 key
             ),
         }
@@ -2665,6 +2873,14 @@ config_namespace! {
         /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
         pub newlines_in_values: Option<bool>, default = None
         pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
+        /// Compression level for the output file. The valid range depends on the
+        /// compression algorithm:
+        /// - ZSTD: 1 to 22 (default: 3)
+        /// - GZIP: 0 to 9 (default: 6)
+        /// - BZIP2: 0 to 9 (default: 6)
+        /// - XZ: 0 to 9 (default: 6)
+        /// If not specified, the default level for the compression algorithm is used.
+        pub compression_level: Option<u32>, default = None
         pub schema_infer_max_rec: Option<usize>, default = None
         pub date_format: Option<String>, default = None
         pub datetime_format: Option<String>, default = None
@@ -2787,6 +3003,14 @@ impl CsvOptions {
         self
     }
 
+    /// Set the compression level for the output file.
+    /// The valid range depends on the compression algorithm.
+    /// If not specified, the default level for the algorithm is used.
+    pub fn with_compression_level(mut self, level: u32) -> Self {
+        self.compression_level = Some(level);
+        self
+    }
+
     /// The delimiter character.
     pub fn delimiter(&self) -> u8 {
         self.delimiter
@@ -2812,6 +3036,14 @@ config_namespace! {
     /// Options controlling JSON format
     pub struct JsonOptions {
         pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
+        /// Compression level for the output file. The valid range depends on the
+        /// compression algorithm:
+        /// - ZSTD: 1 to 22 (default: 3)
+        /// - GZIP: 0 to 9 (default: 6)
+        /// - BZIP2: 0 to 9 (default: 6)
+        /// - XZ: 0 to 9 (default: 6)
+        /// If not specified, the default level for the compression algorithm is used.
+        pub compression_level: Option<u32>, default = None
         pub schema_infer_max_rec: Option<usize>, default = None
     }
 }
@@ -2819,7 +3051,7 @@ config_namespace! {
 pub trait OutputFormatExt: Display {}
 
 #[derive(Debug, Clone, PartialEq)]
-#[allow(clippy::large_enum_variant)]
+#[cfg_attr(feature = "parquet", expect(clippy::large_enum_variant))]
 pub enum OutputFormat {
     CSV(CsvOptions),
     JSON(JsonOptions),
@@ -2853,7 +3085,6 @@ mod tests {
     };
     use std::any::Any;
     use std::collections::HashMap;
-    use std::sync::Arc;
 
     #[derive(Default, Debug, Clone)]
     pub struct TestExtensionConfig {
@@ -2925,6 +3156,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn iter_test_extension_config() {
+        let mut extension = Extensions::new();
+        extension.insert(TestExtensionConfig::default());
+        let table_config = TableOptions::new().with_extensions(extension);
+        let extensions = table_config.extensions.iter().collect::<Vec<_>>();
+        assert_eq!(extensions.len(), 1);
+        assert_eq!(extensions[0].0, TestExtensionConfig::PREFIX);
+    }
+
     #[test]
     fn csv_u8_table_options() {
         let mut table_config = TableOptions::new();
@@ -2968,6 +3209,19 @@ mod tests {
         assert_eq!(COUNT.load(std::sync::atomic::Ordering::Relaxed), 1);
     }
 
+    #[test]
+    fn reset_nested_scalar_reports_helpful_error() {
+        let mut value = true;
+        let err = <bool as ConfigField>::reset(&mut value, "nested").unwrap_err();
+        let message = err.to_string();
+        assert!(
+            message.starts_with(
+                "Invalid or Unsupported Configuration: Config field is a scalar bool and does not have nested field \"nested\""
+            ),
+            "unexpected error message: {message}"
+        );
+    }
+
     #[cfg(feature = "parquet")]
     #[test]
     fn parquet_table_options() {
@@ -2990,6 +3244,7 @@ mod tests {
         };
         use parquet::encryption::decrypt::FileDecryptionProperties;
         use parquet::encryption::encrypt::FileEncryptionProperties;
+        use std::sync::Arc;
 
         let footer_key = b"0123456789012345".to_vec(); // 128bit/16
         let column_names = vec!["double_field", "float_field"];
@@ -3143,9 +3398,11 @@ mod tests {
             .set("format.bloom_filter_enabled::col1", "true")
             .unwrap();
         let entries = table_config.entries();
-        assert!(entries
-            .iter()
-            .any(|item| item.key == "format.bloom_filter_enabled::col1"))
+        assert!(
+            entries
+                .iter()
+                .any(|item| item.key == "format.bloom_filter_enabled::col1")
+        )
     }
 
     #[cfg(feature = "parquet")]
@@ -3159,10 +3416,10 @@ mod tests {
             )
             .unwrap();
         let entries = table_parquet_options.entries();
-        assert!(entries
-            .iter()
-            .any(|item| item.key
-                == "crypto.file_encryption.column_key_as_hex::double_field"))
+        assert!(
+            entries.iter().any(|item| item.key
+                == "crypto.file_encryption.column_key_as_hex::double_field")
+        )
     }
 
     #[cfg(feature = "parquet")]
diff --git a/datafusion/common/src/cse.rs b/datafusion/common/src/cse.rs
index 674d3386171f8..93169d6a02ff1 100644
--- a/datafusion/common/src/cse.rs
+++ b/datafusion/common/src/cse.rs
@@ -19,12 +19,12 @@
 //! a [`CSEController`], that defines how to eliminate common subtrees from a particular
 //! [`TreeNode`] tree.
 
+use crate::Result;
 use crate::hash_utils::combine_hashes;
 use crate::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
     TreeNodeVisitor,
 };
-use crate::Result;
 use indexmap::IndexMap;
 use std::collections::HashMap;
 use std::hash::{BuildHasher, Hash, Hasher, RandomState};
@@ -676,13 +676,13 @@ where
 
 #[cfg(test)]
 mod test {
+    use crate::Result;
     use crate::alias::AliasGenerator;
     use crate::cse::{
-        CSEController, HashNode, IdArray, Identifier, NodeStats, NormalizeEq,
-        Normalizeable, CSE,
+        CSE, CSEController, HashNode, IdArray, Identifier, NodeStats, NormalizeEq,
+        Normalizeable,
     };
     use crate::tree_node::tests::TestTreeNode;
-    use crate::Result;
     use std::collections::HashSet;
     use std::hash::{Hash, Hasher};
 
diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs
index 65f6395211866..19847f8583505 100644
--- a/datafusion/common/src/datatype.rs
+++ b/datafusion/common/src/datatype.rs
@@ -15,9 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with DataTypes to Fields
+//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with Arrow [`DataType`] and [`Field`]s
 
 use crate::arrow::datatypes::{DataType, Field, FieldRef};
+use crate::metadata::FieldMetadata;
 use std::sync::Arc;
 
 /// DataFusion extension methods for Arrow [`DataType`]
@@ -61,7 +62,54 @@ impl DataTypeExt for DataType {
 }
 
 /// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
+///
+/// This trait is implemented for both [`Field`] and [`FieldRef`] and
+/// provides convenience methods for efficiently working with both types.
+///
+/// For [`FieldRef`], the methods will attempt to unwrap the `Arc`
+/// to avoid unnecessary cloning when possible.
 pub trait FieldExt {
+    /// Ensure the field is named `new_name`, returning the given field if the
+    /// name matches, and a new field if not.
+    ///
+    /// This method avoids `clone`ing fields and names if the name is the same
+    /// as the field's existing name.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // rename to "your_int"
+    /// let renamed_field = int_field.renamed("your_int");
+    /// assert_eq!(renamed_field.name(), "your_int");
+    /// ```
+    fn renamed(self, new_name: &str) -> Self;
+
+    /// Ensure the field has the given data type
+    ///
+    /// Note this is different than simply calling [`Field::with_data_type`] as
+    /// it avoids copying if the data type is already the same.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // change to Float64
+    /// let retyped_field = int_field.retyped(DataType::Float64);
+    /// assert_eq!(retyped_field.data_type(), &DataType::Float64);
+    /// ```
+    fn retyped(self, new_data_type: DataType) -> Self;
+
+    /// Add field metadata to the Field
+    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self;
+
+    /// Add optional field metadata,
+    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self;
+
     /// Returns a new Field representing a List of this Field's DataType.
     ///
     /// For example if input represents an `Int32`, the return value will
@@ -130,6 +178,32 @@ pub trait FieldExt {
 }
 
 impl FieldExt for Field {
+    fn renamed(self, new_name: &str) -> Self {
+        // check if this is a new name before allocating a new Field / copying
+        // the existing one
+        if self.name() != new_name {
+            self.with_name(new_name)
+        } else {
+            self
+        }
+    }
+
+    fn retyped(self, new_data_type: DataType) -> Self {
+        self.with_data_type(new_data_type)
+    }
+
+    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
+        metadata.add_to_field(self)
+    }
+
+    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
+        if let Some(metadata) = metadata {
+            self.with_field_metadata(metadata)
+        } else {
+            self
+        }
+    }
+
     fn into_list(self) -> Self {
         DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
     }
@@ -149,6 +223,34 @@ impl FieldExt for Field {
 }
 
 impl FieldExt for Arc<Field> {
+    fn renamed(mut self, new_name: &str) -> Self {
+        if self.name() != new_name {
+            // avoid cloning if possible
+            Arc::make_mut(&mut self).set_name(new_name);
+        }
+        self
+    }
+
+    fn retyped(mut self, new_data_type: DataType) -> Self {
+        if self.data_type() != &new_data_type {
+            // avoid cloning if possible
+            Arc::make_mut(&mut self).set_data_type(new_data_type);
+        }
+        self
+    }
+
+    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
+        metadata.add_to_field_ref(self)
+    }
+
+    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
+        if let Some(metadata) = metadata {
+            self.with_field_metadata(metadata)
+        } else {
+            self
+        }
+    }
+
     fn into_list(self) -> Self {
         DataType::List(self.into_list_item())
             .into_nullable_field()
@@ -161,13 +263,11 @@ impl FieldExt for Arc<Field> {
             .into()
     }
 
-    fn into_list_item(self) -> Self {
+    fn into_list_item(mut self) -> Self {
         if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
-            Arc::unwrap_or_clone(self)
-                .with_name(Field::LIST_FIELD_DEFAULT_NAME)
-                .into()
-        } else {
-            self
+            // avoid cloning if possible
+            Arc::make_mut(&mut self).set_name(Field::LIST_FIELD_DEFAULT_NAME);
         }
+        self
     }
 }
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 24d152a7dba8c..55a031d870122 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -23,10 +23,10 @@ use std::fmt::{Display, Formatter};
 use std::hash::Hash;
 use std::sync::Arc;
 
-use crate::error::{DataFusionError, Result, _plan_err, _schema_err};
+use crate::error::{_plan_err, _schema_err, DataFusionError, Result};
 use crate::{
-    field_not_found, unqualified_field_not_found, Column, FunctionalDependencies,
-    SchemaError, TableReference,
+    Column, FunctionalDependencies, SchemaError, TableReference, field_not_found,
+    unqualified_field_not_found,
 };
 
 use arrow::compute::can_cast_types;
@@ -37,7 +37,7 @@ use arrow::datatypes::{
 /// A reference-counted reference to a [DFSchema].
 pub type DFSchemaRef = Arc<DFSchema>;
 
-/// DFSchema wraps an Arrow schema and adds relation names.
+/// DFSchema wraps an Arrow schema and add a relation (table) name.
 ///
 /// The schema may hold the fields across multiple tables. Some fields may be
 /// qualified and some unqualified. A qualified field is a field that has a
@@ -47,8 +47,14 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// have a distinct name from any qualified field names. This allows finding a
 /// qualified field by name to be possible, so long as there aren't multiple
 /// qualified fields with the same name.
+///]
+/// # See Also
+/// * [DFSchemaRef], an alias to `Arc<DFSchema>`
+/// * [DataTypeExt], common methods for working with Arrow [DataType]s
+/// * [FieldExt], extension methods for working with Arrow [Field]s
 ///
-/// There is an alias to `Arc<DFSchema>` named [DFSchemaRef].
+/// [DataTypeExt]: crate::datatype::DataTypeExt
+/// [FieldExt]: crate::datatype::FieldExt
 ///
 /// # Creating qualified schemas
 ///
@@ -346,20 +352,22 @@ impl DFSchema {
         self.field_qualifiers.extend(qualifiers);
     }
 
-    /// Get a list of fields
+    /// Get a list of fields for this schema
     pub fn fields(&self) -> &Fields {
         &self.inner.fields
     }
 
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector
-    pub fn field(&self, i: usize) -> &Field {
+    /// Returns a reference to [`FieldRef`] for a column at specific index
+    /// within the schema.
+    ///
+    /// See also [Self::qualified_field] to get both qualifier and field
+    pub fn field(&self, i: usize) -> &FieldRef {
         &self.inner.fields[i]
     }
 
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector and its qualifier
-    pub fn qualified_field(&self, i: usize) -> (Option<&TableReference>, &Field) {
+    /// Returns the qualifier (if any) and [`FieldRef`] for a column at specific
+    /// index within the schema.
+    pub fn qualified_field(&self, i: usize) -> (Option<&TableReference>, &FieldRef) {
         (self.field_qualifiers[i].as_ref(), self.field(i))
     }
 
@@ -410,12 +418,12 @@ impl DFSchema {
             .is_some()
     }
 
-    /// Find the field with the given name
+    /// Find the [`FieldRef`] with the given name and optional qualifier
     pub fn field_with_name(
         &self,
         qualifier: Option<&TableReference>,
         name: &str,
-    ) -> Result<&Field> {
+    ) -> Result<&FieldRef> {
         if let Some(qualifier) = qualifier {
             self.field_with_qualified_name(qualifier, name)
         } else {
@@ -428,7 +436,7 @@ impl DFSchema {
         &self,
         qualifier: Option<&TableReference>,
         name: &str,
-    ) -> Result<(Option<&TableReference>, &Field)> {
+    ) -> Result<(Option<&TableReference>, &FieldRef)> {
         if let Some(qualifier) = qualifier {
             let idx = self
                 .index_of_column_by_name(Some(qualifier), name)
@@ -440,10 +448,10 @@ impl DFSchema {
     }
 
     /// Find all fields having the given qualifier
-    pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&Field> {
+    pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&FieldRef> {
         self.iter()
             .filter(|(q, _)| q.map(|q| q.eq(qualifier)).unwrap_or(false))
-            .map(|(_, f)| f.as_ref())
+            .map(|(_, f)| f)
             .collect()
     }
 
@@ -459,11 +467,10 @@ impl DFSchema {
     }
 
     /// Find all fields that match the given name
-    pub fn fields_with_unqualified_name(&self, name: &str) -> Vec<&Field> {
+    pub fn fields_with_unqualified_name(&self, name: &str) -> Vec<&FieldRef> {
         self.fields()
             .iter()
             .filter(|field| field.name() == name)
-            .map(|f| f.as_ref())
             .collect()
     }
 
@@ -471,10 +478,9 @@ impl DFSchema {
     pub fn qualified_fields_with_unqualified_name(
         &self,
         name: &str,
-    ) -> Vec<(Option<&TableReference>, &Field)> {
+    ) -> Vec<(Option<&TableReference>, &FieldRef)> {
         self.iter()
             .filter(|(_, field)| field.name() == name)
-            .map(|(qualifier, field)| (qualifier, field.as_ref()))
             .collect()
     }
 
@@ -499,7 +505,7 @@ impl DFSchema {
     pub fn qualified_field_with_unqualified_name(
         &self,
         name: &str,
-    ) -> Result<(Option<&TableReference>, &Field)> {
+    ) -> Result<(Option<&TableReference>, &FieldRef)> {
         let matches = self.qualified_fields_with_unqualified_name(name);
         match matches.len() {
             0 => Err(unqualified_field_not_found(name, self)),
@@ -528,7 +534,7 @@ impl DFSchema {
     }
 
     /// Find the field with the given name
-    pub fn field_with_unqualified_name(&self, name: &str) -> Result<&Field> {
+    pub fn field_with_unqualified_name(&self, name: &str) -> Result<&FieldRef> {
         self.qualified_field_with_unqualified_name(name)
             .map(|(_, field)| field)
     }
@@ -538,7 +544,7 @@ impl DFSchema {
         &self,
         qualifier: &TableReference,
         name: &str,
-    ) -> Result<&Field> {
+    ) -> Result<&FieldRef> {
         let idx = self
             .index_of_column_by_name(Some(qualifier), name)
             .ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;
@@ -550,7 +556,7 @@ impl DFSchema {
     pub fn qualified_field_from_column(
         &self,
         column: &Column,
-    ) -> Result<(Option<&TableReference>, &Field)> {
+    ) -> Result<(Option<&TableReference>, &FieldRef)> {
         self.qualified_field_with_name(column.relation.as_ref(), &column.name)
     }
 
@@ -982,36 +988,35 @@ fn format_field_with_indent(
             result.push_str(&format!(
                 "{indent}|-- {field_name}: map (nullable = {nullable_str})\n"
             ));
-            if let DataType::Struct(inner_fields) = field.data_type() {
-                if inner_fields.len() == 2 {
-                    format_field_with_indent(
-                        result,
-                        "key",
-                        inner_fields[0].data_type(),
-                        inner_fields[0].is_nullable(),
-                        &child_indent,
-                    );
-                    let value_contains_null =
-                        field.is_nullable().to_string().to_lowercase();
-                    // Handle complex value types properly
-                    match inner_fields[1].data_type() {
-                        DataType::Struct(_)
-                        | DataType::List(_)
-                        | DataType::LargeList(_)
-                        | DataType::FixedSizeList(_, _)
-                        | DataType::Map(_, _) => {
-                            format_field_with_indent(
-                                result,
-                                "value",
-                                inner_fields[1].data_type(),
-                                inner_fields[1].is_nullable(),
-                                &child_indent,
-                            );
-                        }
-                        _ => {
-                            result.push_str(&format!("{child_indent}|-- value: {} (nullable = {value_contains_null})\n",
+            if let DataType::Struct(inner_fields) = field.data_type()
+                && inner_fields.len() == 2
+            {
+                format_field_with_indent(
+                    result,
+                    "key",
+                    inner_fields[0].data_type(),
+                    inner_fields[0].is_nullable(),
+                    &child_indent,
+                );
+                let value_contains_null = field.is_nullable().to_string().to_lowercase();
+                // Handle complex value types properly
+                match inner_fields[1].data_type() {
+                    DataType::Struct(_)
+                    | DataType::List(_)
+                    | DataType::LargeList(_)
+                    | DataType::FixedSizeList(_, _)
+                    | DataType::Map(_, _) => {
+                        format_field_with_indent(
+                            result,
+                            "value",
+                            inner_fields[1].data_type(),
+                            inner_fields[1].is_nullable(),
+                            &child_indent,
+                        );
+                    }
+                    _ => {
+                        result.push_str(&format!("{child_indent}|-- value: {} (nullable = {value_contains_null})\n",
                                 format_simple_data_type(inner_fields[1].data_type())));
-                        }
                     }
                 }
             }
@@ -1221,7 +1226,7 @@ pub trait ExprSchema: std::fmt::Debug {
     }
 
     // Return the column's field
-    fn field_from_column(&self, col: &Column) -> Result<&Field>;
+    fn field_from_column(&self, col: &Column) -> Result<&FieldRef>;
 }
 
 // Implement `ExprSchema` for `Arc<DFSchema>`
@@ -1242,13 +1247,13 @@ impl<P: AsRef<DFSchema> + std::fmt::Debug> ExprSchema for P {
         self.as_ref().data_type_and_nullable(col)
     }
 
-    fn field_from_column(&self, col: &Column) -> Result<&Field> {
+    fn field_from_column(&self, col: &Column) -> Result<&FieldRef> {
         self.as_ref().field_from_column(col)
     }
 }
 
 impl ExprSchema for DFSchema {
-    fn field_from_column(&self, col: &Column) -> Result<&Field> {
+    fn field_from_column(&self, col: &Column) -> Result<&FieldRef> {
         match &col.relation {
             Some(r) => self.field_with_qualified_name(r, &col.name),
             None => self.field_with_unqualified_name(&col.name),
@@ -1433,12 +1438,14 @@ mod tests {
             join.to_string()
         );
         // test valid access
-        assert!(join
-            .field_with_qualified_name(&TableReference::bare("t1"), "c0")
-            .is_ok());
-        assert!(join
-            .field_with_qualified_name(&TableReference::bare("t2"), "c0")
-            .is_ok());
+        assert!(
+            join.field_with_qualified_name(&TableReference::bare("t1"), "c0")
+                .is_ok()
+        );
+        assert!(
+            join.field_with_qualified_name(&TableReference::bare("t2"), "c0")
+                .is_ok()
+        );
         // test invalid access
         assert!(join.field_with_unqualified_name("c0").is_err());
         assert!(join.field_with_unqualified_name("t1.c0").is_err());
@@ -1480,18 +1487,20 @@ mod tests {
             join.to_string()
         );
         // test valid access
-        assert!(join
-            .field_with_qualified_name(&TableReference::bare("t1"), "c0")
-            .is_ok());
+        assert!(
+            join.field_with_qualified_name(&TableReference::bare("t1"), "c0")
+                .is_ok()
+        );
         assert!(join.field_with_unqualified_name("c0").is_ok());
         assert!(join.field_with_unqualified_name("c100").is_ok());
         assert!(join.field_with_name(None, "c100").is_ok());
         // test invalid access
         assert!(join.field_with_unqualified_name("t1.c0").is_err());
         assert!(join.field_with_unqualified_name("t1.c100").is_err());
-        assert!(join
-            .field_with_qualified_name(&TableReference::bare(""), "c100")
-            .is_err());
+        assert!(
+            join.field_with_qualified_name(&TableReference::bare(""), "c100")
+                .is_err()
+        );
         Ok(())
     }
 
@@ -1500,9 +1509,11 @@ mod tests {
         let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let right = DFSchema::try_from(test_schema_1())?;
         let join = left.join(&right);
-        assert_contains!(join.unwrap_err().to_string(),
-                         "Schema error: Schema contains qualified \
-                          field name t1.c0 and unqualified field name c0 which would be ambiguous");
+        assert_contains!(
+            join.unwrap_err().to_string(),
+            "Schema error: Schema contains qualified \
+                          field name t1.c0 and unqualified field name c0 which would be ambiguous"
+        );
         Ok(())
     }
 
@@ -2059,7 +2070,7 @@ mod tests {
     fn test_print_schema_empty() {
         let schema = DFSchema::empty();
         let output = schema.tree_string();
-        insta::assert_snapshot!(output, @r###"root"###);
+        insta::assert_snapshot!(output, @"root");
     }
 
     #[test]
diff --git a/datafusion/common/src/display/human_readable.rs b/datafusion/common/src/display/human_readable.rs
new file mode 100644
index 0000000000000..0e0d677bd8904
--- /dev/null
+++ b/datafusion/common/src/display/human_readable.rs
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Helpers for rendering sizes, counts, and durations in human readable form.
+
+/// Common data size units
+pub mod units {
+    pub const TB: u64 = 1 << 40;
+    pub const GB: u64 = 1 << 30;
+    pub const MB: u64 = 1 << 20;
+    pub const KB: u64 = 1 << 10;
+}
+
+/// Present size in human-readable form
+pub fn human_readable_size(size: usize) -> String {
+    use units::*;
+
+    let size = size as u64;
+    let (value, unit) = {
+        if size >= 2 * TB {
+            (size as f64 / TB as f64, "TB")
+        } else if size >= 2 * GB {
+            (size as f64 / GB as f64, "GB")
+        } else if size >= 2 * MB {
+            (size as f64 / MB as f64, "MB")
+        } else if size >= 2 * KB {
+            (size as f64 / KB as f64, "KB")
+        } else {
+            (size as f64, "B")
+        }
+    };
+    format!("{value:.1} {unit}")
+}
+
+/// Present count in human-readable form with K, M, B, T suffixes
+pub fn human_readable_count(count: usize) -> String {
+    let count = count as u64;
+    let (value, unit) = {
+        if count >= 1_000_000_000_000 {
+            (count as f64 / 1_000_000_000_000.0, " T")
+        } else if count >= 1_000_000_000 {
+            (count as f64 / 1_000_000_000.0, " B")
+        } else if count >= 1_000_000 {
+            (count as f64 / 1_000_000.0, " M")
+        } else if count >= 1_000 {
+            (count as f64 / 1_000.0, " K")
+        } else {
+            return count.to_string();
+        }
+    };
+
+    // Format with appropriate precision
+    // For values >= 100, show 1 decimal place (e.g., 123.4 K)
+    // For values < 100, show 2 decimal places (e.g., 10.12 K)
+    if value >= 100.0 {
+        format!("{value:.1}{unit}")
+    } else {
+        format!("{value:.2}{unit}")
+    }
+}
+
+/// Present duration in human-readable form with 2 decimal places
+pub fn human_readable_duration(nanos: u64) -> String {
+    const NANOS_PER_SEC: f64 = 1_000_000_000.0;
+    const NANOS_PER_MILLI: f64 = 1_000_000.0;
+    const NANOS_PER_MICRO: f64 = 1_000.0;
+
+    let nanos_f64 = nanos as f64;
+
+    if nanos >= 1_000_000_000 {
+        // >= 1 second: show in seconds
+        format!("{:.2}s", nanos_f64 / NANOS_PER_SEC)
+    } else if nanos >= 1_000_000 {
+        // >= 1 millisecond: show in milliseconds
+        format!("{:.2}ms", nanos_f64 / NANOS_PER_MILLI)
+    } else if nanos >= 1_000 {
+        // >= 1 microsecond: show in microseconds
+        format!("{:.2}µs", nanos_f64 / NANOS_PER_MICRO)
+    } else {
+        // < 1 microsecond: show in nanoseconds
+        format!("{nanos}ns")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_human_readable_count() {
+        assert_eq!(human_readable_count(0), "0");
+        assert_eq!(human_readable_count(1), "1");
+        assert_eq!(human_readable_count(999), "999");
+        assert_eq!(human_readable_count(1_000), "1.00 K");
+        assert_eq!(human_readable_count(10_100), "10.10 K");
+        assert_eq!(human_readable_count(1_532), "1.53 K");
+        assert_eq!(human_readable_count(99_999), "100.00 K");
+        assert_eq!(human_readable_count(1_000_000), "1.00 M");
+        assert_eq!(human_readable_count(1_532_000), "1.53 M");
+        assert_eq!(human_readable_count(99_000_000), "99.00 M");
+        assert_eq!(human_readable_count(123_456_789), "123.5 M");
+        assert_eq!(human_readable_count(1_000_000_000), "1.00 B");
+        assert_eq!(human_readable_count(1_532_000_000), "1.53 B");
+        assert_eq!(human_readable_count(999_999_999_999), "1000.0 B");
+        assert_eq!(human_readable_count(1_000_000_000_000), "1.00 T");
+        assert_eq!(human_readable_count(42_000_000_000_000), "42.00 T");
+    }
+
+    #[test]
+    fn test_human_readable_duration() {
+        assert_eq!(human_readable_duration(0), "0ns");
+        assert_eq!(human_readable_duration(1), "1ns");
+        assert_eq!(human_readable_duration(999), "999ns");
+        assert_eq!(human_readable_duration(1_000), "1.00µs");
+        assert_eq!(human_readable_duration(1_234), "1.23µs");
+        assert_eq!(human_readable_duration(999_999), "1000.00µs");
+        assert_eq!(human_readable_duration(1_000_000), "1.00ms");
+        assert_eq!(human_readable_duration(11_295_377), "11.30ms");
+        assert_eq!(human_readable_duration(1_234_567), "1.23ms");
+        assert_eq!(human_readable_duration(999_999_999), "1000.00ms");
+        assert_eq!(human_readable_duration(1_000_000_000), "1.00s");
+        assert_eq!(human_readable_duration(1_234_567_890), "1.23s");
+        assert_eq!(human_readable_duration(42_000_000_000), "42.00s");
+    }
+}
diff --git a/datafusion/common/src/display/mod.rs b/datafusion/common/src/display/mod.rs
index bad51c45f8ee8..a6a97b243f06a 100644
--- a/datafusion/common/src/display/mod.rs
+++ b/datafusion/common/src/display/mod.rs
@@ -18,6 +18,7 @@
 //! Types for plan display
 
 mod graphviz;
+pub mod human_readable;
 pub use graphviz::*;
 
 use std::{
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index fde52944d0497..4f681896dfc66 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -15,7 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFusion error types
+//! # Error Handling in DataFusion
+//!
+//! In DataFusion, there are two types of errors that can be raised:
+//!
+//! 1. Expected errors – These indicate invalid operations performed by the caller,
+//!    such as attempting to open a non-existent file. Different categories exist to
+//!    distinguish their sources (e.g., [`DataFusionError::ArrowError`],
+//!    [`DataFusionError::IoError`], etc.).
+//!
+//! 2. Unexpected errors – Represented by [`DataFusionError::Internal`], these
+//!    indicate that an internal invariant has been broken, suggesting a potential
+//!    bug in the system.
+//!
+//! There are several convenient macros for throwing errors. For example, use
+//! `exec_err!` for expected errors.
+//! For invariant checks, you can use `assert_or_internal_err!`,
+//! `assert_eq_or_internal_err!`, `assert_ne_or_internal_err!` for easier assertions.
+//! On the performance-critical path, use `debug_assert!` instead to reduce overhead.
+
 #[cfg(feature = "backtrace")]
 use std::backtrace::{Backtrace, BacktraceStatus};
 
@@ -153,6 +171,10 @@ pub enum DataFusionError {
     /// to multiple receivers. For example, when the source of a repartition
     /// errors and the error is propagated to multiple consumers.
     Shared(Arc<DataFusionError>),
+    /// An error that originated during a foreign function interface call.
+    /// Transferring errors across the FFI boundary is difficult, so the original
+    /// error will be converted to a string.
+    Ffi(String),
 }
 
 #[macro_export]
@@ -395,6 +417,7 @@ impl Error for DataFusionError {
             // can't be executed.
             DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error),
             DataFusionError::Shared(e) => Some(e.as_ref()),
+            DataFusionError::Ffi(_) => None,
         }
     }
 }
@@ -526,6 +549,7 @@ impl DataFusionError {
                 errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix()
             }
             DataFusionError::Shared(_) => "",
+            DataFusionError::Ffi(_) => "FFI error: ",
         }
     }
 
@@ -578,6 +602,7 @@ impl DataFusionError {
                 .expect("cannot construct DataFusionError::Collection with 0 errors")
                 .message(),
             DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()),
+            DataFusionError::Ffi(ref desc) => Cow::Owned(desc.to_string()),
         }
     }
 
@@ -750,7 +775,7 @@ impl DataFusionErrorBuilder {
 macro_rules! unwrap_or_internal_err {
     ($Value: ident) => {
         $Value.ok_or_else(|| {
-            DataFusionError::Internal(format!(
+            $crate::DataFusionError::Internal(format!(
                 "{} should not be None",
                 stringify!($Value)
             ))
@@ -758,6 +783,116 @@ macro_rules! unwrap_or_internal_err {
     };
 }
 
+/// Assert a condition, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_or_internal_err!(predicate);
+/// assert_or_internal_err!(predicate, "human readable message");
+/// assert_or_internal_err!(predicate, format!("details: {}", value));
+/// ```
+#[macro_export]
+macro_rules! assert_or_internal_err {
+    ($cond:expr) => {
+        if !$cond {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {}",
+                stringify!($cond)
+            )));
+        }
+    };
+    ($cond:expr, $($arg:tt)+) => {
+        if !$cond {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {}: {}",
+                stringify!($cond),
+                format!($($arg)+)
+            )));
+        }
+    };
+}
+
+/// Assert equality, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_eq_or_internal_err!(actual, expected);
+/// assert_eq_or_internal_err!(left_expr, right_expr, "values must match");
+/// assert_eq_or_internal_err!(lhs, rhs, "metadata: {}", extra);
+/// ```
+#[macro_export]
+macro_rules! assert_eq_or_internal_err {
+    ($left:expr, $right:expr $(,)?) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val != right_val {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {} == {} (left: {:?}, right: {:?})",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val
+            )));
+        }
+    }};
+    ($left:expr, $right:expr, $($arg:tt)+) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val != right_val {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {} == {} (left: {:?}, right: {:?}): {}",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val,
+                format!($($arg)+)
+            )));
+        }
+    }};
+}
+
+/// Assert inequality, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_ne_or_internal_err!(left, right);
+/// assert_ne_or_internal_err!(lhs_expr, rhs_expr, "values must differ");
+/// assert_ne_or_internal_err!(a, b, "context {}", info);
+/// ```
+#[macro_export]
+macro_rules! assert_ne_or_internal_err {
+    ($left:expr, $right:expr $(,)?) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val == right_val {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {} != {} (left: {:?}, right: {:?})",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val
+            )));
+        }
+    }};
+    ($left:expr, $right:expr, $($arg:tt)+) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val == right_val {
+            return Err($crate::DataFusionError::Internal(format!(
+                "Assertion failed: {} != {} (left: {:?}, right: {:?}): {}",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val,
+                format!($($arg)+)
+            )));
+        }
+    }};
+}
+
 /// Add a macros for concise  DataFusionError::* errors declaration
 /// supports placeholders the same way as `format!`
 /// Examples:
@@ -807,14 +942,9 @@ macro_rules! make_error {
             }
 
 
-            // Note: Certain macros are used in this  crate, but not all.
-            // This macro generates a use or all of them in case they are needed
-            // so we allow unused code to avoid warnings when they are not used
             #[doc(hidden)]
-            #[allow(unused)]
             pub use $NAME_ERR as [<_ $NAME_ERR>];
             #[doc(hidden)]
-            #[allow(unused)]
             pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
         }
     };
@@ -841,11 +971,14 @@ make_error!(substrait_err, substrait_datafusion_err, Substrait);
 // Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace
 make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
 
+// Exposes a macro to create `DataFusionError::Ffi` with optional backtrace
+make_error!(ffi_err, ffi_datafusion_err, Ffi);
+
 // Exposes a macro to create `DataFusionError::SQL` with optional backtrace
 #[macro_export]
 macro_rules! sql_datafusion_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
-        let err = DataFusionError::SQL(Box::new($ERR), Some(DataFusionError::get_back_trace()));
+        let err = $crate::DataFusionError::SQL(Box::new($ERR), Some($crate::DataFusionError::get_back_trace()));
         $(
             let err = err.with_diagnostic($DIAG);
         )?
@@ -857,7 +990,7 @@ macro_rules! sql_datafusion_err {
 #[macro_export]
 macro_rules! sql_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
-        let err = datafusion_common::sql_datafusion_err!($ERR);
+        let err = $crate::sql_datafusion_err!($ERR);
         $(
             let err = err.with_diagnostic($DIAG);
         )?
@@ -869,7 +1002,7 @@ macro_rules! sql_err {
 #[macro_export]
 macro_rules! arrow_datafusion_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
-        let err = DataFusionError::ArrowError(Box::new($ERR), Some(DataFusionError::get_back_trace()));
+        let err = $crate::DataFusionError::ArrowError(Box::new($ERR), Some($crate::DataFusionError::get_back_trace()));
         $(
             let err = err.with_diagnostic($DIAG);
         )?
@@ -882,7 +1015,7 @@ macro_rules! arrow_datafusion_err {
 macro_rules! arrow_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {
     {
-        let err = datafusion_common::arrow_datafusion_err!($ERR);
+        let err = $crate::arrow_datafusion_err!($ERR);
         $(
             let err = err.with_diagnostic($DIAG);
         )?
@@ -894,9 +1027,9 @@ macro_rules! arrow_err {
 #[macro_export]
 macro_rules! schema_datafusion_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
-        let err = $crate::error::DataFusionError::SchemaError(
+        let err = $crate::DataFusionError::SchemaError(
             Box::new($ERR),
-            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
+            Box::new(Some($crate::DataFusionError::get_back_trace())),
         );
         $(
             let err = err.with_diagnostic($DIAG);
@@ -909,9 +1042,9 @@ macro_rules! schema_datafusion_err {
 #[macro_export]
 macro_rules! schema_err {
     ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
-        let err = $crate::error::DataFusionError::SchemaError(
+        let err = $crate::DataFusionError::SchemaError(
             Box::new($ERR),
-            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
+            Box::new(Some($crate::DataFusionError::get_back_trace())),
         );
         $(
             let err = err.with_diagnostic($DIAG);
@@ -974,6 +1107,115 @@ mod test {
     use std::sync::Arc;
 
     use arrow::error::ArrowError;
+    use insta::assert_snapshot;
+
+    fn ok_result() -> Result<()> {
+        Ok(())
+    }
+
+    #[test]
+    fn test_assert_eq_or_internal_err_passes() -> Result<()> {
+        assert_eq_or_internal_err!(1, 1);
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_eq_or_internal_err_fails() {
+        fn check() -> Result<()> {
+            assert_eq_or_internal_err!(1, 2, "expected equality");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: 1 == 2 (left: 1, right: 2): expected equality.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_ne_or_internal_err_passes() -> Result<()> {
+        assert_ne_or_internal_err!(1, 2);
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_ne_or_internal_err_fails() {
+        fn check() -> Result<()> {
+            assert_ne_or_internal_err!(3, 3, "values must differ");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: 3 != 3 (left: 3, right: 3): values must differ.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_passes() -> Result<()> {
+        assert_or_internal_err!(true);
+        assert_or_internal_err!(true, "message");
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_fails_default() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false);
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_fails_with_message() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false, "custom message");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false: custom message.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_with_format_arguments() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false, "custom {}", 42);
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false: custom 42.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
 
     #[test]
     fn test_error_size() {
@@ -986,9 +1228,10 @@ mod test {
     #[test]
     fn datafusion_error_to_arrow() {
         let res = return_arrow_error().unwrap_err();
-        assert!(res
-            .to_string()
-            .starts_with("External error: Error during planning: foo"));
+        assert!(
+            res.to_string()
+                .starts_with("External error: Error during planning: foo")
+        );
     }
 
     #[test]
@@ -1000,7 +1243,7 @@ mod test {
     // To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
     #[cfg(feature = "backtrace")]
     #[test]
-    #[allow(clippy::unnecessary_literal_unwrap)]
+    #[expect(clippy::unnecessary_literal_unwrap)]
     fn test_enabled_backtrace() {
         match std::env::var("RUST_BACKTRACE") {
             Ok(val) if val == "1" => {}
@@ -1017,17 +1260,17 @@ mod test {
                 .unwrap(),
             &"Error during planning: Err"
         );
-        assert!(!err
-            .split(DataFusionError::BACK_TRACE_SEP)
-            .collect::<Vec<&str>>()
-            .get(1)
-            .unwrap()
-            .is_empty());
+        assert!(
+            !err.split(DataFusionError::BACK_TRACE_SEP)
+                .collect::<Vec<&str>>()
+                .get(1)
+                .unwrap()
+                .is_empty()
+        );
     }
 
     #[cfg(not(feature = "backtrace"))]
     #[test]
-    #[allow(clippy::unnecessary_literal_unwrap)]
     fn test_disabled_backtrace() {
         let res: Result<(), DataFusionError> = plan_err!("Err");
         let res = res.unwrap_err().to_string();
@@ -1097,7 +1340,6 @@ mod test {
     }
 
     #[test]
-    #[allow(clippy::unnecessary_literal_unwrap)]
     fn test_make_error_parse_input() {
         let res: Result<(), DataFusionError> = plan_err!("Err");
         let res = res.unwrap_err();
@@ -1166,9 +1408,11 @@ mod test {
         let external_error_2: DataFusionError = generic_error_2.into();
 
         println!("{external_error_2}");
-        assert!(external_error_2
-            .to_string()
-            .starts_with("External error: io error"));
+        assert!(
+            external_error_2
+                .to_string()
+                .starts_with("External error: io error")
+        );
     }
 
     /// Model what happens when implementing SendableRecordBatchStream:
diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs
index 943288af91642..4e6f74a4448af 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -31,6 +31,8 @@ pub struct CsvWriterOptions {
     /// Compression to apply after ArrowWriter serializes RecordBatches.
     /// This compression is applied by DataFusion not the ArrowWriter itself.
     pub compression: CompressionTypeVariant,
+    /// Compression level for the output file.
+    pub compression_level: Option<u32>,
 }
 
 impl CsvWriterOptions {
@@ -41,6 +43,20 @@ impl CsvWriterOptions {
         Self {
             writer_options,
             compression,
+            compression_level: None,
+        }
+    }
+
+    /// Create a new `CsvWriterOptions` with the specified compression level.
+    pub fn new_with_level(
+        writer_options: WriterBuilder,
+        compression: CompressionTypeVariant,
+        compression_level: u32,
+    ) -> Self {
+        Self {
+            writer_options,
+            compression,
+            compression_level: Some(compression_level),
         }
     }
 }
@@ -81,6 +97,7 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
         Ok(CsvWriterOptions {
             writer_options: builder,
             compression: value.compression,
+            compression_level: value.compression_level,
         })
     }
 }
diff --git a/datafusion/common/src/file_options/json_writer.rs b/datafusion/common/src/file_options/json_writer.rs
index 750d2972329bb..a537192c8128a 100644
--- a/datafusion/common/src/file_options/json_writer.rs
+++ b/datafusion/common/src/file_options/json_writer.rs
@@ -27,11 +27,26 @@ use crate::{
 #[derive(Clone, Debug)]
 pub struct JsonWriterOptions {
     pub compression: CompressionTypeVariant,
+    pub compression_level: Option<u32>,
 }
 
 impl JsonWriterOptions {
     pub fn new(compression: CompressionTypeVariant) -> Self {
-        Self { compression }
+        Self {
+            compression,
+            compression_level: None,
+        }
+    }
+
+    /// Create a new `JsonWriterOptions` with the specified compression and level.
+    pub fn new_with_level(
+        compression: CompressionTypeVariant,
+        compression_level: u32,
+    ) -> Self {
+        Self {
+            compression,
+            compression_level: Some(compression_level),
+        }
     }
 }
 
@@ -41,6 +56,7 @@ impl TryFrom<&JsonOptions> for JsonWriterOptions {
     fn try_from(value: &JsonOptions) -> Result<Self> {
         Ok(JsonWriterOptions {
             compression: value.compression,
+            compression_level: value.compression_level,
         })
     }
 }
diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs
index 02667e0165717..c7374949ecef5 100644
--- a/datafusion/common/src/file_options/mod.rs
+++ b/datafusion/common/src/file_options/mod.rs
@@ -31,10 +31,10 @@ mod tests {
     use std::collections::HashMap;
 
     use crate::{
+        Result,
         config::{ConfigFileType, TableOptions},
         file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
         parsers::CompressionTypeVariant,
-        Result,
     };
 
     use parquet::{
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 564929c61bab0..8aa0134d09ec8 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -20,22 +20,20 @@
 use std::sync::Arc;
 
 use crate::{
+    _internal_datafusion_err, DataFusionError, Result,
     config::{ParquetOptions, TableParquetOptions},
-    DataFusionError, Result, _internal_datafusion_err,
 };
 
 use arrow::datatypes::Schema;
 use parquet::arrow::encode_arrow_schema;
-// TODO: handle once deprecated
-#[allow(deprecated)]
 use parquet::{
     arrow::ARROW_SCHEMA_META_KEY,
     basic::{BrotliLevel, GzipLevel, ZstdLevel},
     file::{
         metadata::KeyValue,
         properties::{
-            EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
-            DEFAULT_STATISTICS_ENABLED,
+            DEFAULT_STATISTICS_ENABLED, EnabledStatistics, WriterProperties,
+            WriterPropertiesBuilder, WriterVersion,
         },
     },
     schema::types::ColumnPath,
@@ -106,7 +104,9 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
         if !global.skip_arrow_metadata
             && !key_value_metadata.contains_key(ARROW_SCHEMA_META_KEY)
         {
-            return Err(_internal_datafusion_err!("arrow schema was not added to the kv_metadata, even though it is required by configuration settings"));
+            return Err(_internal_datafusion_err!(
+                "arrow schema was not added to the kv_metadata, even though it is required by configuration settings"
+            ));
         }
 
         // add kv_meta, if any
@@ -174,7 +174,6 @@ impl ParquetOptions {
     ///
     /// Note that this method does not include the key_value_metadata from [`TableParquetOptions`].
     pub fn into_writer_properties_builder(&self) -> Result<WriterPropertiesBuilder> {
-        #[allow(deprecated)]
         let ParquetOptions {
             data_pagesize_limit,
             write_batch_size,
@@ -200,6 +199,7 @@ impl ParquetOptions {
             metadata_size_hint: _,
             pushdown_filters: _,
             reorder_filters: _,
+            force_filter_selections: _, // not used for writer props
             allow_single_file_parallelism: _,
             maximum_parallel_row_group_writers: _,
             maximum_buffered_record_batches_per_stream: _,
@@ -261,7 +261,7 @@ pub(crate) fn parse_encoding_string(
         "plain" => Ok(parquet::basic::Encoding::PLAIN),
         "plain_dictionary" => Ok(parquet::basic::Encoding::PLAIN_DICTIONARY),
         "rle" => Ok(parquet::basic::Encoding::RLE),
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         "bit_packed" => Ok(parquet::basic::Encoding::BIT_PACKED),
         "delta_binary_packed" => Ok(parquet::basic::Encoding::DELTA_BINARY_PACKED),
         "delta_length_byte_array" => {
@@ -402,14 +402,13 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::config::{
-        ConfigFileEncryptionProperties, ParquetColumnOptions, ParquetEncryptionOptions,
-        ParquetOptions,
-    };
+    #[cfg(feature = "parquet_encryption")]
+    use crate::config::ConfigFileEncryptionProperties;
+    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
     use parquet::basic::Compression;
     use parquet::file::properties::{
-        BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
-        DEFAULT_BLOOM_FILTER_NDV,
+        BloomFilterProperties, DEFAULT_BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_NDV,
+        EnabledStatistics,
     };
     use std::collections::HashMap;
 
@@ -438,7 +437,6 @@ mod tests {
             "1.0"
         };
 
-        #[allow(deprecated)] // max_statistics_size
         ParquetOptions {
             data_pagesize_limit: 42,
             write_batch_size: 42,
@@ -464,6 +462,7 @@ mod tests {
             metadata_size_hint: defaults.metadata_size_hint,
             pushdown_filters: defaults.pushdown_filters,
             reorder_filters: defaults.reorder_filters,
+            force_filter_selections: defaults.force_filter_selections,
             allow_single_file_parallelism: defaults.allow_single_file_parallelism,
             maximum_parallel_row_group_writers: defaults
                 .maximum_parallel_row_group_writers,
@@ -484,7 +483,6 @@ mod tests {
     ) -> ParquetColumnOptions {
         let bloom_filter_default_props = props.bloom_filter_properties(&col);
 
-        #[allow(deprecated)] // max_statistics_size
         ParquetColumnOptions {
             bloom_filter_enabled: Some(bloom_filter_default_props.is_some()),
             encoding: props.encoding(&col).map(|s| s.to_string()),
@@ -545,7 +543,6 @@ mod tests {
         #[cfg(not(feature = "parquet_encryption"))]
         let fep = None;
 
-        #[allow(deprecated)] // max_statistics_size
         TableParquetOptions {
             global: ParquetOptions {
                 // global options
@@ -577,6 +574,7 @@ mod tests {
                 metadata_size_hint: global_options_defaults.metadata_size_hint,
                 pushdown_filters: global_options_defaults.pushdown_filters,
                 reorder_filters: global_options_defaults.reorder_filters,
+                force_filter_selections: global_options_defaults.force_filter_selections,
                 allow_single_file_parallelism: global_options_defaults
                     .allow_single_file_parallelism,
                 maximum_parallel_row_group_writers: global_options_defaults
@@ -674,8 +672,7 @@ mod tests {
         let mut default_table_writer_opts = TableParquetOptions::default();
         let default_parquet_opts = ParquetOptions::default();
         assert_eq!(
-            default_table_writer_opts.global,
-            default_parquet_opts,
+            default_table_writer_opts.global, default_parquet_opts,
             "should have matching defaults for TableParquetOptions.global and ParquetOptions",
         );
 
@@ -699,7 +696,9 @@ mod tests {
             "should have different created_by sources",
         );
         assert!(
-            default_writer_props.created_by().starts_with("parquet-rs version"),
+            default_writer_props
+                .created_by()
+                .starts_with("parquet-rs version"),
             "should indicate that writer_props defaults came from the extern parquet crate",
         );
         assert!(
@@ -733,8 +732,7 @@ mod tests {
         from_extern_parquet.global.skip_arrow_metadata = true;
 
         assert_eq!(
-            default_table_writer_opts,
-            from_extern_parquet,
+            default_table_writer_opts, from_extern_parquet,
             "the default writer_props should have the same configuration as the session's default TableParquetOptions",
         );
     }
diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs
index 764190e1189bf..a505bd0e1c74e 100644
--- a/datafusion/common/src/format.rs
+++ b/datafusion/common/src/format.rs
@@ -176,9 +176,9 @@ impl FromStr for ExplainFormat {
             "tree" => Ok(ExplainFormat::Tree),
             "pgjson" => Ok(ExplainFormat::PostgresJSON),
             "graphviz" => Ok(ExplainFormat::Graphviz),
-            _ => {
-                Err(DataFusionError::Configuration(format!("Invalid explain format. Expected 'indent', 'tree', 'pgjson' or 'graphviz'. Got '{format}'")))
-            }
+            _ => Err(DataFusionError::Configuration(format!(
+                "Invalid explain format. Expected 'indent', 'tree', 'pgjson' or 'graphviz'. Got '{format}'"
+            ))),
         }
     }
 }
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index d60189fb6fa3f..98dd1f235aee7 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -28,11 +28,11 @@ use arrow::{downcast_dictionary_array, downcast_primitive_array};
 use crate::cast::{
     as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
     as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
-    as_string_array, as_string_view_array, as_struct_array,
+    as_string_array, as_string_view_array, as_struct_array, as_union_array,
 };
 use crate::error::Result;
-#[cfg(not(feature = "force_hash_collisions"))]
-use crate::error::_internal_err;
+use crate::error::{_internal_datafusion_err, _internal_err};
+use std::cell::RefCell;
 
 // Combines two hashes into one hash
 #[inline]
@@ -41,6 +41,94 @@ pub fn combine_hashes(l: u64, r: u64) -> u64 {
     hash.wrapping_mul(37).wrapping_add(r)
 }
 
+/// Maximum size for the thread-local hash buffer before truncation (4MB = 524,288 u64 elements).
+/// The goal of this is to avoid unbounded memory growth that would appear as a memory leak.
+/// We allow temporary allocations beyond this size, but after use the buffer is truncated
+/// to this size.
+const MAX_BUFFER_SIZE: usize = 524_288;
+
+thread_local! {
+    /// Thread-local buffer for hash computations to avoid repeated allocations.
+    /// The buffer is reused across calls and truncated if it exceeds MAX_BUFFER_SIZE.
+    /// Defaults to a capacity of 8192 u64 elements which is the default batch size.
+    /// This corresponds to 64KB of memory.
+    static HASH_BUFFER: RefCell<Vec<u64>> = const { RefCell::new(Vec::new()) };
+}
+
+/// Creates hashes for the given arrays using a thread-local buffer, then calls the provided callback
+/// with an immutable reference to the computed hashes.
+///
+/// This function manages a thread-local buffer to avoid repeated allocations. The buffer is automatically
+/// truncated if it exceeds `MAX_BUFFER_SIZE` after use.
+///
+/// # Arguments
+/// * `arrays` - The arrays to hash (must contain at least one array)
+/// * `random_state` - The random state for hashing
+/// * `callback` - A function that receives an immutable reference to the hash slice and returns a result
+///
+/// # Errors
+/// Returns an error if:
+/// - No arrays are provided
+/// - The function is called reentrantly (i.e., the callback invokes `with_hashes` again on the same thread)
+/// - The function is called during or after thread destruction
+///
+/// # Example
+/// ```ignore
+/// use datafusion_common::hash_utils::{with_hashes, RandomState};
+/// use arrow::array::{Int32Array, ArrayRef};
+/// use std::sync::Arc;
+///
+/// let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+/// let random_state = RandomState::new();
+///
+/// let result = with_hashes([&array], &random_state, |hashes| {
+///     // Use the hashes here
+///     Ok(hashes.len())
+/// })?;
+/// ```
+pub fn with_hashes<I, T, F, R>(
+    arrays: I,
+    random_state: &RandomState,
+    callback: F,
+) -> Result<R>
+where
+    I: IntoIterator<Item = T>,
+    T: AsDynArray,
+    F: FnOnce(&[u64]) -> Result<R>,
+{
+    // Peek at the first array to determine buffer size without fully collecting
+    let mut iter = arrays.into_iter().peekable();
+
+    // Get the required size from the first array
+    let required_size = match iter.peek() {
+        Some(arr) => arr.as_dyn_array().len(),
+        None => return _internal_err!("with_hashes requires at least one array"),
+    };
+
+    HASH_BUFFER.try_with(|cell| {
+        let mut buffer = cell.try_borrow_mut()
+            .map_err(|_| _internal_datafusion_err!("with_hashes cannot be called reentrantly on the same thread"))?;
+
+        // Ensure buffer has sufficient length, clearing old values
+        buffer.clear();
+        buffer.resize(required_size, 0);
+
+        // Create hashes in the buffer - this consumes the iterator
+        create_hashes(iter, random_state, &mut buffer[..required_size])?;
+
+        // Execute the callback with an immutable slice
+        let result = callback(&buffer[..required_size])?;
+
+        // Cleanup: truncate if buffer grew too large
+        if buffer.capacity() > MAX_BUFFER_SIZE {
+            buffer.truncate(MAX_BUFFER_SIZE);
+            buffer.shrink_to_fit();
+        }
+
+        Ok(result)
+    }).map_err(|_| _internal_datafusion_err!("with_hashes cannot access thread-local storage during or after thread destruction"))?
+}
+
 #[cfg(not(feature = "force_hash_collisions"))]
 fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) {
     if mul_col {
@@ -74,7 +162,7 @@ macro_rules! hash_value {
         })+
     };
 }
-hash_value!(i8, i16, i32, i64, i128, i256, u8, u16, u32, u64);
+hash_value!(i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128);
 hash_value!(bool, str, [u8], IntervalDayTime, IntervalMonthDayNano);
 
 macro_rules! hash_float_value {
@@ -181,6 +269,127 @@ fn hash_array<T>(
     }
 }
 
+/// Hash a StringView or BytesView array
+///
+/// Templated to optimize inner loop based on presence of nulls and external buffers.
+///
+/// HAS_NULLS: do we have to check null in the inner loop
+/// HAS_BUFFERS: if true, array has external buffers; if false, all strings are inlined/ less then 12 bytes
+/// REHASH: if true, combining with existing hash, otherwise initializing
+#[inline(never)]
+fn hash_string_view_array_inner<
+    T: ByteViewType,
+    const HAS_NULLS: bool,
+    const HAS_BUFFERS: bool,
+    const REHASH: bool,
+>(
+    array: &GenericByteViewArray<T>,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+) {
+    assert_eq!(
+        hashes_buffer.len(),
+        array.len(),
+        "hashes_buffer and array should be of equal length"
+    );
+
+    let buffers = array.data_buffers();
+    let view_bytes = |view_len: u32, view: u128| {
+        let view = ByteView::from(view);
+        let offset = view.offset as usize;
+        // SAFETY: view is a valid view as it came from the array
+        unsafe {
+            let data = buffers.get_unchecked(view.buffer_index as usize);
+            data.get_unchecked(offset..offset + view_len as usize)
+        }
+    };
+
+    let hashes_and_views = hashes_buffer.iter_mut().zip(array.views().iter());
+    for (i, (hash, &v)) in hashes_and_views.enumerate() {
+        if HAS_NULLS && array.is_null(i) {
+            continue;
+        }
+        let view_len = v as u32;
+        // all views are inlined, no need to access external buffers
+        if !HAS_BUFFERS || view_len <= 12 {
+            if REHASH {
+                *hash = combine_hashes(v.hash_one(random_state), *hash);
+            } else {
+                *hash = v.hash_one(random_state);
+            }
+            continue;
+        }
+        // view is not inlined, so we need to hash the bytes as well
+        let value = view_bytes(view_len, v);
+        if REHASH {
+            *hash = combine_hashes(value.hash_one(random_state), *hash);
+        } else {
+            *hash = value.hash_one(random_state);
+        }
+    }
+}
+
+/// Builds hash values for array views and writes them into `hashes_buffer`
+/// If `rehash==true` this combines the previous hash value in the buffer
+/// with the new hash using `combine_hashes`
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_generic_byte_view_array<T: ByteViewType>(
+    array: &GenericByteViewArray<T>,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+    rehash: bool,
+) {
+    // instantiate the correct version based on presence of nulls and external buffers
+    match (
+        array.null_count() != 0,
+        !array.data_buffers().is_empty(),
+        rehash,
+    ) {
+        // no nulls or buffers ==> hash the inlined views directly
+        // don't call the inner function as Rust seems better able to inline this simpler code (2-3% faster)
+        (false, false, false) => {
+            for (hash, &view) in hashes_buffer.iter_mut().zip(array.views().iter()) {
+                *hash = view.hash_one(random_state);
+            }
+        }
+        (false, false, true) => {
+            for (hash, &view) in hashes_buffer.iter_mut().zip(array.views().iter()) {
+                *hash = combine_hashes(view.hash_one(random_state), *hash);
+            }
+        }
+        (false, true, false) => hash_string_view_array_inner::<T, false, true, false>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+        (false, true, true) => hash_string_view_array_inner::<T, false, true, true>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+        (true, false, false) => hash_string_view_array_inner::<T, true, false, false>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+        (true, false, true) => hash_string_view_array_inner::<T, true, false, true>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+        (true, true, false) => hash_string_view_array_inner::<T, true, true, false>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+        (true, true, true) => hash_string_view_array_inner::<T, true, true, true>(
+            array,
+            random_state,
+            hashes_buffer,
+        ),
+    }
+}
+
 /// Helper function to update hash for a dictionary key if the value is valid
 #[cfg(not(feature = "force_hash_collisions"))]
 #[inline]
@@ -329,6 +538,40 @@ where
     Ok(())
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_union_array(
+    array: &UnionArray,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+) -> Result<()> {
+    use std::collections::HashMap;
+
+    let DataType::Union(union_fields, _mode) = array.data_type() else {
+        unreachable!()
+    };
+
+    let mut child_hashes = HashMap::with_capacity(union_fields.len());
+
+    for (type_id, _field) in union_fields.iter() {
+        let child = array.child(type_id);
+        let mut child_hash_buffer = vec![0; child.len()];
+        create_hashes([child], random_state, &mut child_hash_buffer)?;
+
+        child_hashes.insert(type_id, child_hash_buffer);
+    }
+
+    #[expect(clippy::needless_range_loop)]
+    for i in 0..array.len() {
+        let type_id = array.type_id(i);
+        let child_offset = array.value_offset(i);
+
+        let child_hash = child_hashes.get(&type_id).expect("invalid type_id");
+        hashes_buffer[i] = combine_hashes(hashes_buffer[i], child_hash[child_offset]);
+    }
+
+    Ok(())
+}
+
 #[cfg(not(feature = "force_hash_collisions"))]
 fn hash_fixed_list_array(
     array: &FixedSizeListArray,
@@ -362,6 +605,76 @@ fn hash_fixed_list_array(
     Ok(())
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_run_array<R: RunEndIndexType>(
+    array: &RunArray<R>,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+    rehash: bool,
+) -> Result<()> {
+    // We find the relevant runs that cover potentially sliced arrays, so we can only hash those
+    // values. Then we find the runs that refer to the original runs and ensure that we apply
+    // hashes correctly to the sliced, whether sliced at the start, end, or both.
+    let array_offset = array.offset();
+    let array_len = array.len();
+
+    if array_len == 0 {
+        return Ok(());
+    }
+
+    let run_ends = array.run_ends();
+    let run_ends_values = run_ends.values();
+    let values = array.values();
+
+    let start_physical_index = array.get_start_physical_index();
+    // get_end_physical_index returns the inclusive last index, but we need the exclusive range end
+    // for the operations we use below.
+    let end_physical_index = array.get_end_physical_index() + 1;
+
+    let sliced_values = values.slice(
+        start_physical_index,
+        end_physical_index - start_physical_index,
+    );
+    let mut values_hashes = vec![0u64; sliced_values.len()];
+    create_hashes(
+        std::slice::from_ref(&sliced_values),
+        random_state,
+        &mut values_hashes,
+    )?;
+
+    let mut start_in_slice = 0;
+    for (adjusted_physical_index, &absolute_run_end) in run_ends_values
+        [start_physical_index..end_physical_index]
+        .iter()
+        .enumerate()
+    {
+        let is_null_value = sliced_values.is_null(adjusted_physical_index);
+        let absolute_run_end = absolute_run_end.as_usize();
+
+        let end_in_slice = (absolute_run_end - array_offset).min(array_len);
+
+        if rehash {
+            if !is_null_value {
+                let value_hash = values_hashes[adjusted_physical_index];
+                for hash in hashes_buffer
+                    .iter_mut()
+                    .take(end_in_slice)
+                    .skip(start_in_slice)
+                {
+                    *hash = combine_hashes(value_hash, *hash);
+                }
+            }
+        } else {
+            let value_hash = values_hashes[adjusted_physical_index];
+            hashes_buffer[start_in_slice..end_in_slice].fill(value_hash);
+        }
+
+        start_in_slice = end_in_slice;
+    }
+
+    Ok(())
+}
+
 /// Internal helper function that hashes a single array and either initializes or combines
 /// the hash values in the buffer.
 #[cfg(not(feature = "force_hash_collisions"))]
@@ -376,10 +689,10 @@ fn hash_single_array(
         DataType::Null => hash_null(random_state, hashes_buffer, rehash),
         DataType::Boolean => hash_array(&as_boolean_array(array)?, random_state, hashes_buffer, rehash),
         DataType::Utf8 => hash_array(&as_string_array(array)?, random_state, hashes_buffer, rehash),
-        DataType::Utf8View => hash_array(&as_string_view_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::Utf8View => hash_generic_byte_view_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
         DataType::LargeUtf8 => hash_array(&as_largestring_array(array), random_state, hashes_buffer, rehash),
         DataType::Binary => hash_array(&as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
-        DataType::BinaryView => hash_array(&as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::BinaryView => hash_generic_byte_view_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
         DataType::LargeBinary => hash_array(&as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
         DataType::FixedSizeBinary(_) => {
             let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
@@ -409,6 +722,14 @@ fn hash_single_array(
             let array = as_fixed_size_list_array(array)?;
             hash_fixed_list_array(array, random_state, hashes_buffer)?;
         }
+        DataType::Union(_, _) => {
+            let array = as_union_array(array)?;
+            hash_union_array(array, random_state, hashes_buffer)?;
+        }
+        DataType::RunEndEncoded(_, _) => downcast_run_array! {
+            array => hash_run_array(array, random_state, hashes_buffer, rehash)?,
+            _ => unreachable!()
+        }
         _ => {
             // This is internal because we should have caught this before.
             return _internal_err!(
@@ -478,8 +799,8 @@ impl AsDynArray for &ArrayRef {
 pub fn create_hashes<'a, I, T>(
     arrays: I,
     random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>>
+    hashes_buffer: &'a mut [u64],
+) -> Result<&'a mut [u64]>
 where
     I: IntoIterator<Item = T>,
     T: AsDynArray,
@@ -522,7 +843,7 @@ mod tests {
     fn create_hashes_for_empty_fixed_size_lit() -> Result<()> {
         let empty_array = FixedSizeListBuilder::new(StringBuilder::new(), 1).finish();
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
-        let hashes_buff = &mut vec![0; 0];
+        let hashes_buff = &mut [0; 0];
         let hashes = create_hashes(
             &[Arc::new(empty_array) as ArrayRef],
             &random_state,
@@ -567,8 +888,6 @@ mod tests {
 
                 let binary_array: ArrayRef =
                     Arc::new(binary.iter().cloned().collect::<$ARRAY>());
-                let ref_array: ArrayRef =
-                    Arc::new(binary.iter().cloned().collect::<BinaryArray>());
 
                 let random_state = RandomState::with_seeds(0, 0, 0, 0);
 
@@ -576,9 +895,6 @@ mod tests {
                 create_hashes(&[binary_array], &random_state, &mut binary_hashes)
                     .unwrap();
 
-                let mut ref_hashes = vec![0; binary.len()];
-                create_hashes(&[ref_array], &random_state, &mut ref_hashes).unwrap();
-
                 // Null values result in a zero hash,
                 for (val, hash) in binary.iter().zip(binary_hashes.iter()) {
                     match val {
@@ -587,9 +903,6 @@ mod tests {
                     }
                 }
 
-                // same logical values should hash to the same hash value
-                assert_eq!(binary_hashes, ref_hashes);
-
                 // Same values should map to same hash values
                 assert_eq!(binary[0], binary[5]);
                 assert_eq!(binary[4], binary[6]);
@@ -601,6 +914,7 @@ mod tests {
     }
 
     create_hash_binary!(binary_array, BinaryArray);
+    create_hash_binary!(large_binary_array, LargeBinaryArray);
     create_hash_binary!(binary_view_array, BinaryViewArray);
 
     #[test]
@@ -677,6 +991,74 @@ mod tests {
     create_hash_string!(string_view_array, StringArray);
     create_hash_string!(dict_string_array, DictionaryArray<Int8Type>);
 
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_run_array() -> Result<()> {
+        let values = Arc::new(Int32Array::from(vec![10, 20, 30]));
+        let run_ends = Arc::new(Int32Array::from(vec![2, 5, 7]));
+        let array = Arc::new(RunArray::try_new(&run_ends, values.as_ref()).unwrap());
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let hashes_buff = &mut vec![0; array.len()];
+        let hashes = create_hashes(
+            &[Arc::clone(&array) as ArrayRef],
+            &random_state,
+            hashes_buff,
+        )?;
+
+        assert_eq!(hashes.len(), 7);
+        assert_eq!(hashes[0], hashes[1]);
+        assert_eq!(hashes[2], hashes[3]);
+        assert_eq!(hashes[3], hashes[4]);
+        assert_eq!(hashes[5], hashes[6]);
+        assert_ne!(hashes[0], hashes[2]);
+        assert_ne!(hashes[2], hashes[5]);
+        assert_ne!(hashes[0], hashes[5]);
+
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_multi_column_hash_with_run_array() -> Result<()> {
+        let int_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]));
+        let values = Arc::new(StringArray::from(vec!["foo", "bar", "baz"]));
+        let run_ends = Arc::new(Int32Array::from(vec![2, 5, 7]));
+        let run_array = Arc::new(RunArray::try_new(&run_ends, values.as_ref()).unwrap());
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut one_col_hashes = vec![0; int_array.len()];
+        create_hashes(
+            &[Arc::clone(&int_array) as ArrayRef],
+            &random_state,
+            &mut one_col_hashes,
+        )?;
+
+        let mut two_col_hashes = vec![0; int_array.len()];
+        create_hashes(
+            &[
+                Arc::clone(&int_array) as ArrayRef,
+                Arc::clone(&run_array) as ArrayRef,
+            ],
+            &random_state,
+            &mut two_col_hashes,
+        )?;
+
+        assert_eq!(one_col_hashes.len(), 7);
+        assert_eq!(two_col_hashes.len(), 7);
+        assert_ne!(one_col_hashes, two_col_hashes);
+
+        let diff_0_vs_1_one_col = one_col_hashes[0] != one_col_hashes[1];
+        let diff_0_vs_1_two_col = two_col_hashes[0] != two_col_hashes[1];
+        assert_eq!(diff_0_vs_1_one_col, diff_0_vs_1_two_col);
+
+        let diff_2_vs_3_one_col = one_col_hashes[2] != one_col_hashes[3];
+        let diff_2_vs_3_two_col = two_col_hashes[2] != two_col_hashes[3];
+        assert_eq!(diff_2_vs_3_one_col, diff_2_vs_3_two_col);
+
+        Ok(())
+    }
+
     #[test]
     // Tests actual values of hashes, which are different if forcing collisions
     #[cfg(not(feature = "force_hash_collisions"))]
@@ -1000,4 +1382,297 @@ mod tests {
 
         assert_eq!(hashes1, hashes2);
     }
+
+    #[test]
+    fn test_with_hashes() {
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        // Test that with_hashes produces the same results as create_hashes
+        let mut expected_hashes = vec![0; array.len()];
+        create_hashes([&array], &random_state, &mut expected_hashes).unwrap();
+
+        let result = with_hashes([&array], &random_state, |hashes| {
+            assert_eq!(hashes.len(), 4);
+            // Verify hashes match expected values
+            assert_eq!(hashes, &expected_hashes[..]);
+            // Return a copy of the hashes
+            Ok(hashes.to_vec())
+        })
+        .unwrap();
+
+        // Verify callback result is returned correctly
+        assert_eq!(result, expected_hashes);
+    }
+
+    #[test]
+    fn test_with_hashes_multi_column() {
+        let int_array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let str_array: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        // Test multi-column hashing
+        let mut expected_hashes = vec![0; int_array.len()];
+        create_hashes(
+            [&int_array, &str_array],
+            &random_state,
+            &mut expected_hashes,
+        )
+        .unwrap();
+
+        with_hashes([&int_array, &str_array], &random_state, |hashes| {
+            assert_eq!(hashes.len(), 3);
+            assert_eq!(hashes, &expected_hashes[..]);
+            Ok(())
+        })
+        .unwrap();
+    }
+
+    #[test]
+    fn test_with_hashes_empty_arrays() {
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        // Test that passing no arrays returns an error
+        let empty: [&ArrayRef; 0] = [];
+        let result = with_hashes(empty, &random_state, |_hashes| Ok(()));
+
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("requires at least one array")
+        );
+    }
+
+    #[test]
+    fn test_with_hashes_reentrancy() {
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let array2: ArrayRef = Arc::new(Int32Array::from(vec![4, 5, 6]));
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        // Test that reentrant calls return an error instead of panicking
+        let result = with_hashes([&array], &random_state, |_hashes| {
+            // Try to call with_hashes again inside the callback
+            with_hashes([&array2], &random_state, |_inner_hashes| Ok(()))
+        });
+
+        assert!(result.is_err());
+        let err_msg = result.unwrap_err().to_string();
+        assert!(
+            err_msg.contains("reentrantly") || err_msg.contains("cannot be called"),
+            "Error message should mention reentrancy: {err_msg}",
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_sparse_union_arrays() {
+        // logical array: [int(5), str("foo"), int(10), int(5)]
+        let int_array = Int32Array::from(vec![Some(5), None, Some(10), Some(5)]);
+        let str_array = StringArray::from(vec![None, Some("foo"), None, None]);
+
+        let type_ids = vec![0_i8, 1, 0, 0].into();
+        let children = vec![
+            Arc::new(int_array) as ArrayRef,
+            Arc::new(str_array) as ArrayRef,
+        ];
+
+        let union_fields = [
+            (0, Arc::new(Field::new("a", DataType::Int32, true))),
+            (1, Arc::new(Field::new("b", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+
+        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
+        let array_ref = Arc::new(array) as ArrayRef;
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; array_ref.len()];
+        create_hashes(&[array_ref], &random_state, &mut hashes).unwrap();
+
+        // Rows 0 and 3 both have type_id=0 (int) with value 5
+        assert_eq!(hashes[0], hashes[3]);
+        // Row 0 (int 5) vs Row 2 (int 10) - different values
+        assert_ne!(hashes[0], hashes[2]);
+        // Row 0 (int) vs Row 1 (string) - different types
+        assert_ne!(hashes[0], hashes[1]);
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_sparse_union_arrays_with_nulls() {
+        // logical array: [int(5), str("foo"), int(null), str(null)]
+        let int_array = Int32Array::from(vec![Some(5), None, None, None]);
+        let str_array = StringArray::from(vec![None, Some("foo"), None, None]);
+
+        let type_ids = vec![0, 1, 0, 1].into();
+        let children = vec![
+            Arc::new(int_array) as ArrayRef,
+            Arc::new(str_array) as ArrayRef,
+        ];
+
+        let union_fields = [
+            (0, Arc::new(Field::new("a", DataType::Int32, true))),
+            (1, Arc::new(Field::new("b", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+
+        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
+        let array_ref = Arc::new(array) as ArrayRef;
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; array_ref.len()];
+        create_hashes(&[array_ref], &random_state, &mut hashes).unwrap();
+
+        // row 2 (int null) and row 3 (str null) should have the same hash
+        // because they are both null values
+        assert_eq!(hashes[2], hashes[3]);
+
+        // row 0 (int 5) vs row 2 (int null) - different (value vs null)
+        assert_ne!(hashes[0], hashes[2]);
+
+        // row 1 (str "foo") vs row 3 (str null) - different (value vs null)
+        assert_ne!(hashes[1], hashes[3]);
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_dense_union_arrays() {
+        // creates a dense union array with int and string types
+        // [67, "norm", 100, "macdonald", 67]
+        let int_array = Int32Array::from(vec![67, 100, 67]);
+        let str_array = StringArray::from(vec!["norm", "macdonald"]);
+
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+        let offsets = vec![0, 0, 1, 1, 2].into();
+        let children = vec![
+            Arc::new(int_array) as ArrayRef,
+            Arc::new(str_array) as ArrayRef,
+        ];
+
+        let union_fields = [
+            (0, Arc::new(Field::new("a", DataType::Int32, false))),
+            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect();
+
+        let array =
+            UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
+        let array_ref = Arc::new(array) as ArrayRef;
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; array_ref.len()];
+        create_hashes(&[array_ref], &random_state, &mut hashes).unwrap();
+
+        // 67 vs "norm"
+        assert_ne!(hashes[0], hashes[1]);
+        // 67 vs 100
+        assert_ne!(hashes[0], hashes[2]);
+        // "norm" vs "macdonald"
+        assert_ne!(hashes[1], hashes[3]);
+        // 100 vs "macdonald"
+        assert_ne!(hashes[2], hashes[3]);
+        // 67 vs 67
+        assert_eq!(hashes[0], hashes[4]);
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_sliced_run_array() -> Result<()> {
+        let values = Arc::new(Int32Array::from(vec![10, 20, 30]));
+        let run_ends = Arc::new(Int32Array::from(vec![2, 5, 7]));
+        let array = Arc::new(RunArray::try_new(&run_ends, values.as_ref()).unwrap());
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut full_hashes = vec![0; array.len()];
+        create_hashes(
+            &[Arc::clone(&array) as ArrayRef],
+            &random_state,
+            &mut full_hashes,
+        )?;
+
+        let array_ref: ArrayRef = Arc::clone(&array) as ArrayRef;
+        let sliced_array = array_ref.slice(2, 3);
+
+        let mut sliced_hashes = vec![0; sliced_array.len()];
+        create_hashes(
+            std::slice::from_ref(&sliced_array),
+            &random_state,
+            &mut sliced_hashes,
+        )?;
+
+        assert_eq!(sliced_hashes.len(), 3);
+        assert_eq!(sliced_hashes[0], sliced_hashes[1]);
+        assert_eq!(sliced_hashes[1], sliced_hashes[2]);
+        assert_eq!(&sliced_hashes, &full_hashes[2..5]);
+
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn test_run_array_with_nulls() -> Result<()> {
+        let values = Arc::new(Int32Array::from(vec![Some(10), None, Some(20)]));
+        let run_ends = Arc::new(Int32Array::from(vec![2, 4, 6]));
+        let array = Arc::new(RunArray::try_new(&run_ends, values.as_ref()).unwrap());
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; array.len()];
+        create_hashes(
+            &[Arc::clone(&array) as ArrayRef],
+            &random_state,
+            &mut hashes,
+        )?;
+
+        assert_eq!(hashes[0], hashes[1]);
+        assert_ne!(hashes[0], 0);
+        assert_eq!(hashes[2], hashes[3]);
+        assert_eq!(hashes[2], 0);
+        assert_eq!(hashes[4], hashes[5]);
+        assert_ne!(hashes[4], 0);
+        assert_ne!(hashes[0], hashes[4]);
+
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn test_run_array_with_nulls_multicolumn() -> Result<()> {
+        let primitive_array = Arc::new(Int32Array::from(vec![Some(10), None, Some(20)]));
+        let run_values = Arc::new(Int32Array::from(vec![Some(10), None, Some(20)]));
+        let run_ends = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let run_array =
+            Arc::new(RunArray::try_new(&run_ends, run_values.as_ref()).unwrap());
+        let second_col = Arc::new(Int32Array::from(vec![100, 200, 300]));
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        let mut primitive_hashes = vec![0; 3];
+        create_hashes(
+            &[
+                Arc::clone(&primitive_array) as ArrayRef,
+                Arc::clone(&second_col) as ArrayRef,
+            ],
+            &random_state,
+            &mut primitive_hashes,
+        )?;
+
+        let mut run_hashes = vec![0; 3];
+        create_hashes(
+            &[
+                Arc::clone(&run_array) as ArrayRef,
+                Arc::clone(&second_col) as ArrayRef,
+            ],
+            &random_state,
+            &mut run_hashes,
+        )?;
+
+        assert_eq!(primitive_hashes, run_hashes);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/common/src/instant.rs b/datafusion/common/src/instant.rs
index 42f21c061c0c2..a5dfb28292581 100644
--- a/datafusion/common/src/instant.rs
+++ b/datafusion/common/src/instant.rs
@@ -22,7 +22,7 @@
 /// under `wasm` feature gate. It provides the same API as [`std::time::Instant`].
 pub type Instant = web_time::Instant;
 
-#[allow(clippy::disallowed_types)]
+#[expect(clippy::disallowed_types)]
 #[cfg(not(target_family = "wasm"))]
 /// DataFusion wrapper around [`std::time::Instant`]. This is only a type alias.
 pub type Instant = std::time::Instant;
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 549c265024f91..3bec9bd35cbd0 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -23,17 +23,14 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
-// https://github.com/apache/datafusion/issues/18503
-#![deny(clippy::needless_pass_by_value)]
 #![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 mod column;
 mod dfschema;
 mod functional_dependencies;
 mod join_type;
 mod param_value;
-#[cfg(feature = "pyarrow")]
-mod pyarrow;
 mod schema_reference;
 mod table_reference;
 mod unnest;
@@ -69,21 +66,24 @@ pub mod utils;
 pub use arrow;
 pub use column::Column;
 pub use dfschema::{
-    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema,
+    DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema, qualified_name,
 };
 pub use diagnostic::Diagnostic;
+pub use display::human_readable::{
+    human_readable_count, human_readable_duration, human_readable_size, units,
+};
 pub use error::{
-    field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
-    SharedResult,
+    DataFusionError, Result, SchemaError, SharedResult, field_not_found,
+    unqualified_field_not_found,
 };
 pub use file_options::file_type::{
-    GetExt, DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
-    DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION,
+    DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
+    DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION, GetExt,
 };
 pub use functional_dependencies::{
+    Constraint, Constraints, Dependency, FunctionalDependence, FunctionalDependencies,
     aggregate_functional_dependencies, get_required_group_by_exprs_indices,
-    get_target_functional_dependencies, Constraint, Constraints, Dependency,
-    FunctionalDependence, FunctionalDependencies,
+    get_target_functional_dependencies,
 };
 use hashbrown::hash_map::DefaultHashBuilder;
 pub use join_type::{JoinConstraint, JoinSide, JoinType};
@@ -105,9 +105,9 @@ pub use utils::project_schema;
 // https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
 #[doc(hidden)]
 pub use error::{
-    _config_datafusion_err, _exec_datafusion_err, _internal_datafusion_err,
-    _not_impl_datafusion_err, _plan_datafusion_err, _resources_datafusion_err,
-    _substrait_datafusion_err,
+    _config_datafusion_err, _exec_datafusion_err, _ffi_datafusion_err,
+    _internal_datafusion_err, _not_impl_datafusion_err, _plan_datafusion_err,
+    _resources_datafusion_err, _substrait_datafusion_err,
 };
 
 // The HashMap and HashSet implementations that should be used as the uniform defaults
@@ -139,10 +139,10 @@ macro_rules! downcast_value {
 // Not public API.
 #[doc(hidden)]
 pub mod __private {
-    use crate::error::_internal_datafusion_err;
     use crate::Result;
+    use crate::error::_internal_datafusion_err;
     use arrow::array::Array;
-    use std::any::{type_name, Any};
+    use std::any::{Any, type_name};
 
     #[doc(hidden)]
     pub trait DowncastArrayHelper {
@@ -193,7 +193,7 @@ mod tests {
 
         assert_starts_with(
             error.to_string(),
-            "Internal error: could not cast array of type Int32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::UInt64Type>"
+            "Internal error: could not cast array of type Int32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::UInt64Type>",
         );
     }
 
diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs
index 3a10cc2b42f9f..eb687bde07d0b 100644
--- a/datafusion/common/src/metadata.rs
+++ b/datafusion/common/src/metadata.rs
@@ -17,10 +17,10 @@
 
 use std::{collections::BTreeMap, sync::Arc};
 
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, FieldRef};
 use hashbrown::HashMap;
 
-use crate::{error::_plan_err, DataFusionError, ScalarValue};
+use crate::{DataFusionError, ScalarValue, error::_plan_err};
 
 /// A [`ScalarValue`] with optional [`FieldMetadata`]
 #[derive(Debug, Clone)]
@@ -320,6 +320,16 @@ impl FieldMetadata {
 
         field.with_metadata(self.to_hashmap())
     }
+
+    /// Updates the metadata on the FieldRef with this metadata, if it is not empty.
+    pub fn add_to_field_ref(&self, mut field_ref: FieldRef) -> FieldRef {
+        if self.inner.is_empty() {
+            return field_ref;
+        }
+
+        Arc::make_mut(&mut field_ref).set_metadata(self.to_hashmap());
+        field_ref
+    }
 }
 
 impl From<&Field> for FieldMetadata {
diff --git a/datafusion/common/src/nested_struct.rs b/datafusion/common/src/nested_struct.rs
index d43816f75b0ed..086d96e85230d 100644
--- a/datafusion/common/src/nested_struct.rs
+++ b/datafusion/common/src/nested_struct.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::error::{Result, _plan_err};
+use crate::error::{_plan_err, Result};
 use arrow::{
-    array::{new_null_array, Array, ArrayRef, StructArray},
-    compute::{cast_with_options, CastOptions},
+    array::{Array, ArrayRef, StructArray, new_null_array},
+    compute::{CastOptions, cast_with_options},
     datatypes::{DataType::Struct, Field, FieldRef},
 };
 use std::sync::Arc;
diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs
index ebf68e4dd210d..0fac6b529eb0f 100644
--- a/datafusion/common/src/param_value.rs
+++ b/datafusion/common/src/param_value.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::error::{_plan_datafusion_err, _plan_err};
-use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata};
+use crate::metadata::{ScalarAndMetadata, check_metadata_with_storage_equal};
 use crate::{Result, ScalarValue};
 use arrow::datatypes::{DataType, Field, FieldRef};
 use std::collections::HashMap;
diff --git a/datafusion/common/src/pruning.rs b/datafusion/common/src/pruning.rs
index 48750e3c995c4..5a7598ea1f299 100644
--- a/datafusion/common/src/pruning.rs
+++ b/datafusion/common/src/pruning.rs
@@ -135,6 +135,10 @@ pub trait PruningStatistics {
 /// This feeds into [`CompositePruningStatistics`] to allow pruning
 /// with filters that depend both on partition columns and data columns
 /// (e.g. `WHERE partition_col = data_col`).
+#[deprecated(
+    since = "52.0.0",
+    note = "This struct is no longer used internally. Use `replace_columns_with_literals` from `datafusion-physical-expr-adapter` to substitute partition column values before pruning. It will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first."
+)]
 #[derive(Clone)]
 pub struct PartitionPruningStatistics {
     /// Values for each column for each container.
@@ -156,6 +160,7 @@ pub struct PartitionPruningStatistics {
     partition_schema: SchemaRef,
 }
 
+#[expect(deprecated)]
 impl PartitionPruningStatistics {
     /// Create a new instance of [`PartitionPruningStatistics`].
     ///
@@ -169,6 +174,36 @@ impl PartitionPruningStatistics {
     ///   This must **not** be the schema of the entire file or table:
     ///   instead it must only be the schema of the partition columns,
     ///   in the same order as the values in `partition_values`.
+    ///
+    /// # Example
+    ///
+    /// To create [`PartitionPruningStatistics`] for two partition columns `a` and `b`,
+    /// for three containers like this:
+    ///
+    /// | a | b |
+    /// | - | - |
+    /// | 1 | 2 |
+    /// | 3 | 4 |
+    /// | 5 | 6 |
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_common::ScalarValue;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::pruning::PartitionPruningStatistics;
+    ///
+    /// let partition_values = vec![
+    ///     vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
+    ///     vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
+    ///     vec![ScalarValue::from(5i32), ScalarValue::from(6i32)],
+    /// ];
+    /// let partition_fields = vec![
+    ///     Arc::new(Field::new("a", DataType::Int32, false)),
+    ///     Arc::new(Field::new("b", DataType::Int32, false)),
+    /// ];
+    /// let partition_stats =
+    ///     PartitionPruningStatistics::try_new(partition_values, partition_fields).unwrap();
+    /// ```
     pub fn try_new(
         partition_values: Vec<Vec<ScalarValue>>,
         partition_fields: Vec<FieldRef>,
@@ -202,6 +237,7 @@ impl PartitionPruningStatistics {
     }
 }
 
+#[expect(deprecated)]
 impl PruningStatistics for PartitionPruningStatistics {
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
         let index = self.partition_schema.index_of(column.name()).ok()?;
@@ -245,7 +281,7 @@ impl PruningStatistics for PartitionPruningStatistics {
             match acc {
                 None => Some(Some(eq_result)),
                 Some(acc_array) => {
-                    arrow::compute::kernels::boolean::and(&acc_array, &eq_result)
+                    arrow::compute::kernels::boolean::or_kleene(&acc_array, &eq_result)
                         .map(Some)
                         .ok()
                 }
@@ -409,10 +445,15 @@ impl PruningStatistics for PrunableStatistics {
 /// the first one is returned without any regard for completeness or accuracy.
 /// That is: if the first statistics has information for a column, even if it is incomplete,
 /// that is returned even if a later statistics has more complete information.
+#[deprecated(
+    since = "52.0.0",
+    note = "This struct is no longer used internally. It may be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first. Please open an issue if you have a use case for it."
+)]
 pub struct CompositePruningStatistics {
     pub statistics: Vec<Box<dyn PruningStatistics>>,
 }
 
+#[expect(deprecated)]
 impl CompositePruningStatistics {
     /// Create a new instance of [`CompositePruningStatistics`] from
     /// a vector of [`PruningStatistics`].
@@ -427,6 +468,7 @@ impl CompositePruningStatistics {
     }
 }
 
+#[expect(deprecated)]
 impl PruningStatistics for CompositePruningStatistics {
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
         for stats in &self.statistics {
@@ -483,18 +525,25 @@ impl PruningStatistics for CompositePruningStatistics {
 }
 
 #[cfg(test)]
+#[expect(deprecated)]
 mod tests {
     use crate::{
-        cast::{as_int32_array, as_uint64_array},
         ColumnStatistics,
+        cast::{as_int32_array, as_uint64_array},
     };
 
     use super::*;
     use arrow::datatypes::{DataType, Field};
     use std::sync::Arc;
 
-    #[test]
-    fn test_partition_pruning_statistics() {
+    /// return a PartitionPruningStatistics for two columns 'a' and 'b'
+    /// and the following stats
+    ///
+    /// | a | b |
+    /// | - | - |
+    /// | 1 | 2 |
+    /// | 3 | 4 |
+    fn partition_pruning_statistics_setup() -> PartitionPruningStatistics {
         let partition_values = vec![
             vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
             vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
@@ -503,9 +552,12 @@ mod tests {
             Arc::new(Field::new("a", DataType::Int32, false)),
             Arc::new(Field::new("b", DataType::Int32, false)),
         ];
-        let partition_stats =
-            PartitionPruningStatistics::try_new(partition_values, partition_fields)
-                .unwrap();
+        PartitionPruningStatistics::try_new(partition_values, partition_fields).unwrap()
+    }
+
+    #[test]
+    fn test_partition_pruning_statistics() {
+        let partition_stats = partition_pruning_statistics_setup();
 
         let column_a = Column::new_unqualified("a");
         let column_b = Column::new_unqualified("b");
@@ -560,6 +612,85 @@ mod tests {
         assert_eq!(partition_stats.num_containers(), 2);
     }
 
+    #[test]
+    fn test_partition_pruning_statistics_multiple_positive_values() {
+        let partition_stats = partition_pruning_statistics_setup();
+
+        let column_a = Column::new_unqualified("a");
+
+        // The two containers have `a` values 1 and 3, so they both only contain values from 1 and 3
+        let values = HashSet::from([ScalarValue::from(1i32), ScalarValue::from(3i32)]);
+        let contained_a = partition_stats.contained(&column_a, &values).unwrap();
+        let expected_contained_a = BooleanArray::from(vec![true, true]);
+        assert_eq!(contained_a, expected_contained_a);
+    }
+
+    #[test]
+    fn test_partition_pruning_statistics_multiple_negative_values() {
+        let partition_stats = partition_pruning_statistics_setup();
+
+        let column_a = Column::new_unqualified("a");
+
+        // The two containers have `a` values 1 and 3,
+        // so the first contains ONLY values from 1,2
+        // but the second does not
+        let values = HashSet::from([ScalarValue::from(1i32), ScalarValue::from(2i32)]);
+        let contained_a = partition_stats.contained(&column_a, &values).unwrap();
+        let expected_contained_a = BooleanArray::from(vec![true, false]);
+        assert_eq!(contained_a, expected_contained_a);
+    }
+
+    #[test]
+    fn test_partition_pruning_statistics_null_in_values() {
+        let partition_values = vec![
+            vec![
+                ScalarValue::from(1i32),
+                ScalarValue::from(2i32),
+                ScalarValue::from(3i32),
+            ],
+            vec![
+                ScalarValue::from(4i32),
+                ScalarValue::from(5i32),
+                ScalarValue::from(6i32),
+            ],
+        ];
+        let partition_fields = vec![
+            Arc::new(Field::new("a", DataType::Int32, false)),
+            Arc::new(Field::new("b", DataType::Int32, false)),
+            Arc::new(Field::new("c", DataType::Int32, false)),
+        ];
+        let partition_stats =
+            PartitionPruningStatistics::try_new(partition_values, partition_fields)
+                .unwrap();
+
+        let column_a = Column::new_unqualified("a");
+        let column_b = Column::new_unqualified("b");
+        let column_c = Column::new_unqualified("c");
+
+        let values_a = HashSet::from([ScalarValue::from(1i32), ScalarValue::Int32(None)]);
+        let contained_a = partition_stats.contained(&column_a, &values_a).unwrap();
+        let mut builder = BooleanArray::builder(2);
+        builder.append_value(true);
+        builder.append_null();
+        let expected_contained_a = builder.finish();
+        assert_eq!(contained_a, expected_contained_a);
+
+        // First match creates a NULL boolean array
+        // The accumulator should update the value to true for the second value
+        let values_b = HashSet::from([ScalarValue::Int32(None), ScalarValue::from(5i32)]);
+        let contained_b = partition_stats.contained(&column_b, &values_b).unwrap();
+        let mut builder = BooleanArray::builder(2);
+        builder.append_null();
+        builder.append_value(true);
+        let expected_contained_b = builder.finish();
+        assert_eq!(contained_b, expected_contained_b);
+
+        // All matches are null, contained should return None
+        let values_c = HashSet::from([ScalarValue::Int32(None)]);
+        let contained_c = partition_stats.contained(&column_c, &values_c);
+        assert!(contained_c.is_none());
+    }
+
     #[test]
     fn test_partition_pruning_statistics_empty() {
         let partition_values = vec![];
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
deleted file mode 100644
index 18c6739735ff7..0000000000000
--- a/datafusion/common/src/pyarrow.rs
+++ /dev/null
@@ -1,169 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Conversions between PyArrow and DataFusion types
-
-use arrow::array::{Array, ArrayData};
-use arrow::pyarrow::{FromPyArrow, ToPyArrow};
-use pyo3::exceptions::PyException;
-use pyo3::prelude::PyErr;
-use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyResult, Python};
-
-use crate::{DataFusionError, ScalarValue};
-
-impl From<DataFusionError> for PyErr {
-    fn from(err: DataFusionError) -> PyErr {
-        PyException::new_err(err.to_string())
-    }
-}
-
-impl FromPyArrow for ScalarValue {
-    fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> PyResult<Self> {
-        let py = value.py();
-        let typ = value.getattr("type")?;
-        let val = value.call_method0("as_py")?;
-
-        // construct pyarrow array from the python value and pyarrow type
-        let factory = py.import("pyarrow")?.getattr("array")?;
-        let args = PyList::new(py, [val])?;
-        let array = factory.call1((args, typ))?;
-
-        // convert the pyarrow array to rust array using C data interface
-        let array = arrow::array::make_array(ArrayData::from_pyarrow_bound(&array)?);
-        let scalar = ScalarValue::try_from_array(&array, 0)?;
-
-        Ok(scalar)
-    }
-}
-
-impl ToPyArrow for ScalarValue {
-    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
-        let array = self.to_array()?;
-        // convert to pyarrow array using C data interface
-        let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyscalar = pyarray.call_method1("__getitem__", (0,))?;
-
-        Ok(pyscalar)
-    }
-}
-
-impl<'source> FromPyObject<'source> for ScalarValue {
-    fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
-        Self::from_pyarrow_bound(value)
-    }
-}
-
-impl<'source> IntoPyObject<'source> for ScalarValue {
-    type Target = PyAny;
-
-    type Output = Bound<'source, Self::Target>;
-
-    type Error = PyErr;
-
-    fn into_pyobject(self, py: Python<'source>) -> Result<Self::Output, Self::Error> {
-        let array = self.to_array()?;
-        // convert to pyarrow array using C data interface
-        let pyarray = array.to_data().to_pyarrow(py)?;
-        pyarray.call_method1("__getitem__", (0,))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use pyo3::ffi::c_str;
-    use pyo3::py_run;
-    use pyo3::types::PyDict;
-    use pyo3::Python;
-
-    use super::*;
-
-    fn init_python() {
-        Python::initialize();
-        Python::attach(|py| {
-            if py.run(c_str!("import pyarrow"), None, None).is_err() {
-                let locals = PyDict::new(py);
-                py.run(
-                    c_str!(
-                        "import sys; executable = sys.executable; python_path = sys.path"
-                    ),
-                    None,
-                    Some(&locals),
-                )
-                .expect("Couldn't get python info");
-                let executable = locals.get_item("executable").unwrap();
-                let executable: String = executable.extract().unwrap();
-
-                let python_path = locals.get_item("python_path").unwrap();
-                let python_path: Vec<String> = python_path.extract().unwrap();
-
-                panic!("pyarrow not found\nExecutable: {executable}\nPython path: {python_path:?}\n\
-                         HINT: try `pip install pyarrow`\n\
-                         NOTE: On Mac OS, you must compile against a Framework Python \
-                         (default in python.org installers and brew, but not pyenv)\n\
-                         NOTE: On Mac OS, PYO3 might point to incorrect Python library \
-                         path when using virtual environments. Try \
-                         `export PYTHONPATH=$(python -c \"import sys; print(sys.path[-1])\")`\n")
-            }
-        })
-    }
-
-    #[test]
-    fn test_roundtrip() {
-        init_python();
-
-        let example_scalars = [
-            ScalarValue::Boolean(Some(true)),
-            ScalarValue::Int32(Some(23)),
-            ScalarValue::Float64(Some(12.34)),
-            ScalarValue::from("Hello!"),
-            ScalarValue::Date32(Some(1234)),
-        ];
-
-        Python::attach(|py| {
-            for scalar in example_scalars.iter() {
-                let result =
-                    ScalarValue::from_pyarrow_bound(&scalar.to_pyarrow(py).unwrap())
-                        .unwrap();
-                assert_eq!(scalar, &result);
-            }
-        });
-    }
-
-    #[test]
-    fn test_py_scalar() -> PyResult<()> {
-        init_python();
-
-        Python::attach(|py| -> PyResult<()> {
-            let scalar_float = ScalarValue::Float64(Some(12.34));
-            let py_float = scalar_float
-                .into_pyobject(py)?
-                .call_method0("as_py")
-                .unwrap();
-            py_run!(py, py_float, "assert py_float == 12.34");
-
-            let scalar_string = ScalarValue::Utf8(Some("Hello!".to_string()));
-            let py_string = scalar_string
-                .into_pyobject(py)?
-                .call_method0("as_py")
-                .unwrap();
-            py_run!(py, py_string, "assert py_string == 'Hello!'");
-
-            Ok(())
-        })
-    }
-}
diff --git a/datafusion/common/src/rounding.rs b/datafusion/common/src/rounding.rs
index 95eefd3235b5f..1796143d7cf1a 100644
--- a/datafusion/common/src/rounding.rs
+++ b/datafusion/common/src/rounding.rs
@@ -47,7 +47,7 @@ extern crate libc;
     any(target_arch = "x86_64", target_arch = "aarch64"),
     not(target_os = "windows")
 ))]
-extern "C" {
+unsafe extern "C" {
     fn fesetround(round: i32);
     fn fegetround() -> i32;
 }
diff --git a/datafusion/common/src/scalar/cache.rs b/datafusion/common/src/scalar/cache.rs
index f1476a518774b..5b1ad4e4ede01 100644
--- a/datafusion/common/src/scalar/cache.rs
+++ b/datafusion/common/src/scalar/cache.rs
@@ -20,10 +20,10 @@
 use std::iter::repeat_n;
 use std::sync::{Arc, LazyLock, Mutex};
 
-use arrow::array::{new_null_array, Array, ArrayRef, PrimitiveArray};
+use arrow::array::{Array, ArrayRef, PrimitiveArray, new_null_array};
 use arrow::datatypes::{
-    ArrowDictionaryKeyType, DataType, Int16Type, Int32Type, Int64Type, Int8Type,
-    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowDictionaryKeyType, DataType, Int8Type, Int16Type, Int32Type, Int64Type,
+    UInt8Type, UInt16Type, UInt32Type, UInt64Type,
 };
 
 /// Maximum number of rows to cache to be conservative on memory usage
diff --git a/datafusion/common/src/scalar/consts.rs b/datafusion/common/src/scalar/consts.rs
index 8cb446b1c9211..599c2523cd2c7 100644
--- a/datafusion/common/src/scalar/consts.rs
+++ b/datafusion/common/src/scalar/consts.rs
@@ -17,24 +17,36 @@
 
 // Constants defined for scalar construction.
 
+// Next F16 value above π (upper bound)
+pub(super) const PI_UPPER_F16: half::f16 = half::f16::from_bits(0x4249);
+
 // Next f32 value above π (upper bound)
 pub(super) const PI_UPPER_F32: f32 = std::f32::consts::PI.next_up();
 
 // Next f64 value above π (upper bound)
 pub(super) const PI_UPPER_F64: f64 = std::f64::consts::PI.next_up();
 
+// Next f16 value below -π (lower bound)
+pub(super) const NEGATIVE_PI_LOWER_F16: half::f16 = half::f16::from_bits(0xC249);
+
 // Next f32 value below -π (lower bound)
 pub(super) const NEGATIVE_PI_LOWER_F32: f32 = (-std::f32::consts::PI).next_down();
 
 // Next f64 value below -π (lower bound)
 pub(super) const NEGATIVE_PI_LOWER_F64: f64 = (-std::f64::consts::PI).next_down();
 
+// Next f16 value above π/2 (upper bound)
+pub(super) const FRAC_PI_2_UPPER_F16: half::f16 = half::f16::from_bits(0x3E49);
+
 // Next f32 value above π/2 (upper bound)
 pub(super) const FRAC_PI_2_UPPER_F32: f32 = std::f32::consts::FRAC_PI_2.next_up();
 
 // Next f64 value above π/2 (upper bound)
 pub(super) const FRAC_PI_2_UPPER_F64: f64 = std::f64::consts::FRAC_PI_2.next_up();
 
+// Next f32 value below -π/2 (lower bound)
+pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F16: half::f16 = half::f16::from_bits(0xBE49);
+
 // Next f32 value below -π/2 (lower bound)
 pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F32: f32 =
     (-std::f32::consts::FRAC_PI_2).next_down();
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index fadd2e41eaba4..e4e048ad3c0d8 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -33,64 +33,162 @@ use std::mem::{size_of, size_of_val};
 use std::str::FromStr;
 use std::sync::Arc;
 
+use crate::assert_or_internal_err;
 use crate::cast::{
     as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
-    as_date64_array, as_decimal128_array, as_decimal256_array, as_decimal32_array,
-    as_decimal64_array, as_dictionary_array, as_duration_microsecond_array,
+    as_date64_array, as_decimal32_array, as_decimal64_array, as_decimal128_array,
+    as_decimal256_array, as_dictionary_array, as_duration_microsecond_array,
     as_duration_millisecond_array, as_duration_nanosecond_array,
     as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
-    as_float16_array, as_float32_array, as_float64_array, as_int16_array, as_int32_array,
-    as_int64_array, as_int8_array, as_interval_dt_array, as_interval_mdn_array,
+    as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array,
+    as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array,
     as_interval_ym_array, as_large_binary_array, as_large_list_array,
     as_large_string_array, as_string_array, as_string_view_array,
     as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
     as_time64_nanosecond_array, as_timestamp_microsecond_array,
     as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
-    as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array,
-    as_uint8_array, as_union_array,
+    as_timestamp_second_array, as_uint8_array, as_uint16_array, as_uint32_array,
+    as_uint64_array, as_union_array,
 };
-use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
+use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result};
 use crate::format::DEFAULT_CAST_OPTIONS;
 use crate::hash_utils::create_hashes;
 use crate::utils::SingleRowListArrayBuilder;
 use crate::{_internal_datafusion_err, arrow_datafusion_err};
 use arrow::array::{
-    new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp,
-    ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
-    Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
+    Array, ArrayData, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, AsArray,
+    BinaryArray, BinaryViewArray, BinaryViewBuilder, BooleanArray, Date32Array,
+    Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
     DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
     DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
-    FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
-    Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
-    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
-    LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
-    PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
-    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array, UnionArray,
+    FixedSizeBinaryBuilder, FixedSizeListArray, Float16Array, Float32Array, Float64Array,
+    GenericListArray, Int8Array, Int16Array, Int32Array, Int64Array,
+    IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
+    LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray,
+    MutableArrayData, OffsetSizeTrait, PrimitiveArray, Scalar, StringArray,
+    StringViewArray, StringViewBuilder, StructArray, Time32MillisecondArray,
+    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray,
+    new_empty_array, new_null_array,
 };
 use arrow::buffer::{BooleanBuffer, ScalarBuffer};
-use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
+use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
 use arrow::compute::kernels::numeric::{
     add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
 };
 use arrow::datatypes::{
-    i256, validate_decimal_precision_and_scale, ArrowDictionaryKeyType, ArrowNativeType,
-    ArrowTimestampType, DataType, Date32Type, Decimal128Type, Decimal256Type,
-    Decimal32Type, Decimal64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
-    IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
-    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
-    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, UnionFields,
-    UnionMode, DECIMAL128_MAX_PRECISION,
+    ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, Date32Type,
+    Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, Field,
+    Float32Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTime,
+    IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit,
+    IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type, UnionFields, UnionMode, i256, validate_decimal_precision_and_scale,
 };
-use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
+use arrow::util::display::{ArrayFormatter, FormatOptions, array_value_to_string};
 use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
 use chrono::{Duration, NaiveDate};
 use half::f16;
 pub use struct_builder::ScalarStructBuilder;
 
+const SECONDS_PER_DAY: i64 = 86_400;
+const MILLIS_PER_DAY: i64 = SECONDS_PER_DAY * 1_000;
+const MICROS_PER_DAY: i64 = MILLIS_PER_DAY * 1_000;
+const NANOS_PER_DAY: i64 = MICROS_PER_DAY * 1_000;
+const MICROS_PER_MILLISECOND: i64 = 1_000;
+const NANOS_PER_MILLISECOND: i64 = 1_000_000;
+
+/// Returns the multiplier that converts the input date representation into the
+/// desired timestamp unit, if the conversion requires a multiplication that can
+/// overflow an `i64`.
+pub fn date_to_timestamp_multiplier(
+    source_type: &DataType,
+    target_type: &DataType,
+) -> Option<i64> {
+    let DataType::Timestamp(target_unit, _) = target_type else {
+        return None;
+    };
+
+    // Only `Timestamp` target types have a time unit; otherwise no
+    // multiplier applies (handled above). The function returns `Some(m)`
+    // when converting the `source_type` to `target_type` requires a
+    // multiplication that could overflow `i64`. It returns `None` when
+    // the conversion is a division or otherwise doesn't require a
+    // multiplication (e.g. Date64 -> Second).
+    match source_type {
+        // Date32 stores days since epoch. Converting to any timestamp
+        // unit requires multiplying by the per-day factor (seconds,
+        // milliseconds, microseconds, nanoseconds).
+        DataType::Date32 => Some(match target_unit {
+            TimeUnit::Second => SECONDS_PER_DAY,
+            TimeUnit::Millisecond => MILLIS_PER_DAY,
+            TimeUnit::Microsecond => MICROS_PER_DAY,
+            TimeUnit::Nanosecond => NANOS_PER_DAY,
+        }),
+
+        // Date64 stores milliseconds since epoch. Converting to
+        // seconds is a division (no multiplication), so return `None`.
+        // Converting to milliseconds is 1:1 (multiplier 1). Converting
+        // to micro/nano requires multiplying by 1_000 / 1_000_000.
+        DataType::Date64 => match target_unit {
+            TimeUnit::Second => None,
+            // Converting Date64 (ms since epoch) to millisecond timestamps
+            // is an identity conversion and does not require multiplication.
+            // Returning `None` indicates no multiplication-based overflow
+            // check is necessary.
+            TimeUnit::Millisecond => None,
+            TimeUnit::Microsecond => Some(MICROS_PER_MILLISECOND),
+            TimeUnit::Nanosecond => Some(NANOS_PER_MILLISECOND),
+        },
+
+        _ => None,
+    }
+}
+
+/// Ensures the provided value can be represented as a timestamp with the given
+/// multiplier. Returns an [`DataFusionError::Execution`] when the converted
+/// value would overflow the timestamp range.
+pub fn ensure_timestamp_in_bounds(
+    value: i64,
+    multiplier: i64,
+    source_type: &DataType,
+    target_type: &DataType,
+) -> Result<()> {
+    if multiplier <= 1 {
+        return Ok(());
+    }
+
+    if value.checked_mul(multiplier).is_none() {
+        let target = format_timestamp_type_for_error(target_type);
+        _exec_err!(
+            "Cannot cast {} value {} to {}: converted value exceeds the representable i64 range",
+            source_type,
+            value,
+            target
+        )
+    } else {
+        Ok(())
+    }
+}
+
+/// Format a `DataType::Timestamp` into a short, stable string used in
+/// user-facing error messages.
+pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String {
+    match target_type {
+        DataType::Timestamp(unit, _) => {
+            let s = match unit {
+                TimeUnit::Second => "s",
+                TimeUnit::Millisecond => "ms",
+                TimeUnit::Microsecond => "us",
+                TimeUnit::Nanosecond => "ns",
+            };
+            format!("Timestamp({s})")
+        }
+        other => format!("{other}"),
+    }
+}
+
 /// A dynamically typed, nullable single value.
 ///
 /// While an arrow  [`Array`]) stores one or more values of the same type, in a
@@ -622,11 +720,7 @@ impl PartialOrd for ScalarValue {
             (Union(_, _, _), _) => None,
             (Dictionary(k1, v1), Dictionary(k2, v2)) => {
                 // Don't compare if the key types don't match (it is effectively a different datatype)
-                if k1 == k2 {
-                    v1.partial_cmp(v2)
-                } else {
-                    None
-                }
+                if k1 == k2 { v1.partial_cmp(v2) } else { None }
             }
             (Dictionary(_, _), _) => None,
             (Null, Null) => Some(Ordering::Equal),
@@ -646,7 +740,9 @@ fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
     } else if let Some(arr) = arr.as_fixed_size_list_opt() {
         arr.value(0)
     } else {
-        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
+        unreachable!(
+            "Since only List / LargeList / FixedSizeList are supported, this should never happen"
+        )
     }
 }
 
@@ -1055,13 +1151,8 @@ impl ScalarValue {
 
     /// Create a decimal Scalar from value/precision and scale.
     pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
-        // make sure the precision and scale is valid
-        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
-            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
-        }
-        _internal_err!(
-            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
-        )
+        Self::validate_decimal_or_internal_err::<Decimal128Type>(precision, scale)?;
+        Ok(ScalarValue::Decimal128(Some(value), precision, scale))
     }
 
     /// Create a Null instance of ScalarValue for this datatype
@@ -1153,7 +1244,7 @@ impl ScalarValue {
                 index_type.clone(),
                 Box::new(value_type.as_ref().try_into()?),
             ),
-            // `ScalaValue::List` contains single element `ListArray`.
+            // `ScalarValue::List` contains single element `ListArray`.
             DataType::List(field_ref) => ScalarValue::List(Arc::new(
                 GenericListArray::new_null(Arc::clone(field_ref), 1),
             )),
@@ -1161,7 +1252,7 @@ impl ScalarValue {
             DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
                 GenericListArray::new_null(Arc::clone(field_ref), 1),
             )),
-            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
+            // `ScalarValue::FixedSizeList` contains single element `FixedSizeList`.
             DataType::FixedSizeList(field_ref, fixed_length) => {
                 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
                     Arc::clone(field_ref),
@@ -1241,6 +1332,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing PI
     pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(f16::PI)),
             DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
             DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
             _ => _internal_err!("PI is not supported for data type: {}", datatype),
@@ -1250,6 +1342,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing PI's upper bound
     pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::Float16(Some(consts::PI_UPPER_F16))),
             DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
             DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
             _ => {
@@ -1261,6 +1354,9 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing -PI's lower bound
     pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => {
+                Ok(ScalarValue::Float16(Some(consts::NEGATIVE_PI_LOWER_F16)))
+            }
             DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
             DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
             _ => {
@@ -1272,6 +1368,9 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
     pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => {
+                Ok(ScalarValue::Float16(Some(consts::FRAC_PI_2_UPPER_F16)))
+            }
             DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
             DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
             _ => {
@@ -1283,6 +1382,9 @@ impl ScalarValue {
     // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
     pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::Float16(Some(
+                consts::NEGATIVE_FRAC_PI_2_LOWER_F16,
+            ))),
             DataType::Float32 => {
                 Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
             }
@@ -1298,6 +1400,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing -PI
     pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(-f16::PI)),
             DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
             DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
             _ => _internal_err!("-PI is not supported for data type: {}", datatype),
@@ -1307,6 +1410,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing PI/2
     pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(f16::FRAC_PI_2)),
             DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
             DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
             _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
@@ -1316,6 +1420,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing -PI/2
     pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(-f16::FRAC_PI_2)),
             DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
             DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
             _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
@@ -1325,6 +1430,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing infinity
     pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(f16::INFINITY)),
             DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
             DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
             _ => {
@@ -1336,6 +1442,7 @@ impl ScalarValue {
     /// Returns a [`ScalarValue`] representing negative infinity
     pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
         match datatype {
+            DataType::Float16 => Ok(ScalarValue::from(f16::NEG_INFINITY)),
             DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
             DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
             _ => {
@@ -1359,7 +1466,7 @@ impl ScalarValue {
             DataType::UInt16 => ScalarValue::UInt16(Some(0)),
             DataType::UInt32 => ScalarValue::UInt32(Some(0)),
             DataType::UInt64 => ScalarValue::UInt64(Some(0)),
-            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
+            DataType::Float16 => ScalarValue::Float16(Some(f16::ZERO)),
             DataType::Float32 => ScalarValue::Float32(Some(0.0)),
             DataType::Float64 => ScalarValue::Float64(Some(0.0)),
             DataType::Decimal32(precision, scale) => {
@@ -1574,16 +1681,14 @@ impl ScalarValue {
             DataType::UInt16 => ScalarValue::UInt16(Some(1)),
             DataType::UInt32 => ScalarValue::UInt32(Some(1)),
             DataType::UInt64 => ScalarValue::UInt64(Some(1)),
-            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
+            DataType::Float16 => ScalarValue::Float16(Some(f16::ONE)),
             DataType::Float32 => ScalarValue::Float32(Some(1.0)),
             DataType::Float64 => ScalarValue::Float64(Some(1.0)),
             DataType::Decimal32(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal32Type>(
+                Self::validate_decimal_or_internal_err::<Decimal32Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match 10_i32.checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal32(Some(value), *precision, *scale)
@@ -1592,12 +1697,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal64(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal64Type>(
+                Self::validate_decimal_or_internal_err::<Decimal64Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i64::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal64(Some(value), *precision, *scale)
@@ -1606,12 +1709,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal128(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal128Type>(
+                Self::validate_decimal_or_internal_err::<Decimal128Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i128::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal128(Some(value), *precision, *scale)
@@ -1620,12 +1721,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal256(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal256Type>(
+                Self::validate_decimal_or_internal_err::<Decimal256Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i256::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal256(Some(value), *precision, *scale)
@@ -1648,16 +1747,14 @@ impl ScalarValue {
             DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
             DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
             DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
-            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
+            DataType::Float16 => ScalarValue::Float16(Some(f16::NEG_ONE)),
             DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
             DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
             DataType::Decimal32(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal32Type>(
+                Self::validate_decimal_or_internal_err::<Decimal32Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match 10_i32.checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal32(Some(-value), *precision, *scale)
@@ -1666,12 +1763,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal64(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal64Type>(
+                Self::validate_decimal_or_internal_err::<Decimal64Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i64::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal64(Some(-value), *precision, *scale)
@@ -1680,12 +1775,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal128(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal128Type>(
+                Self::validate_decimal_or_internal_err::<Decimal128Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i128::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal128(Some(-value), *precision, *scale)
@@ -1694,12 +1787,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal256(precision, scale) => {
-                validate_decimal_precision_and_scale::<Decimal256Type>(
+                Self::validate_decimal_or_internal_err::<Decimal256Type>(
                     *precision, *scale,
                 )?;
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i256::from(10).checked_pow(*scale as u32) {
                     Some(value) => {
                         ScalarValue::Decimal256(Some(-value), *precision, *scale)
@@ -1729,14 +1820,10 @@ impl ScalarValue {
             DataType::Float32 => ScalarValue::Float32(Some(10.0)),
             DataType::Float64 => ScalarValue::Float64(Some(10.0)),
             DataType::Decimal32(precision, scale) => {
-                if let Err(err) = validate_decimal_precision_and_scale::<Decimal32Type>(
+                Self::validate_decimal_or_internal_err::<Decimal32Type>(
                     *precision, *scale,
-                ) {
-                    return _internal_err!("Invalid precision and scale {err}");
-                }
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                )?;
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match 10_i32.checked_pow((*scale + 1) as u32) {
                     Some(value) => {
                         ScalarValue::Decimal32(Some(value), *precision, *scale)
@@ -1745,14 +1832,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal64(precision, scale) => {
-                if let Err(err) = validate_decimal_precision_and_scale::<Decimal64Type>(
+                Self::validate_decimal_or_internal_err::<Decimal64Type>(
                     *precision, *scale,
-                ) {
-                    return _internal_err!("Invalid precision and scale {err}");
-                }
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                )?;
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i64::from(10).checked_pow((*scale + 1) as u32) {
                     Some(value) => {
                         ScalarValue::Decimal64(Some(value), *precision, *scale)
@@ -1761,14 +1844,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal128(precision, scale) => {
-                if let Err(err) = validate_decimal_precision_and_scale::<Decimal128Type>(
+                Self::validate_decimal_or_internal_err::<Decimal128Type>(
                     *precision, *scale,
-                ) {
-                    return _internal_err!("Invalid precision and scale {err}");
-                }
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                )?;
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i128::from(10).checked_pow((*scale + 1) as u32) {
                     Some(value) => {
                         ScalarValue::Decimal128(Some(value), *precision, *scale)
@@ -1777,14 +1856,10 @@ impl ScalarValue {
                 }
             }
             DataType::Decimal256(precision, scale) => {
-                if let Err(err) = validate_decimal_precision_and_scale::<Decimal256Type>(
+                Self::validate_decimal_or_internal_err::<Decimal256Type>(
                     *precision, *scale,
-                ) {
-                    return _internal_err!("Invalid precision and scale {err}");
-                }
-                if *scale < 0 {
-                    return _internal_err!("Negative scale is not supported");
-                }
+                )?;
+                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
                 match i256::from(10).checked_pow((*scale + 1) as u32) {
                     Some(value) => {
                         ScalarValue::Decimal256(Some(value), *precision, *scale)
@@ -1899,9 +1974,7 @@ impl ScalarValue {
             | ScalarValue::Float16(None)
             | ScalarValue::Float32(None)
             | ScalarValue::Float64(None) => Ok(self.clone()),
-            ScalarValue::Float16(Some(v)) => {
-                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
-            }
+            ScalarValue::Float16(Some(v)) => Ok(ScalarValue::Float16(Some(-v))),
             ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
             ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
             ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
@@ -2022,6 +2095,7 @@ impl ScalarValue {
         let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
         Self::try_from_array(r.as_ref(), 0)
     }
+
     /// Checked addition of `ScalarValue`
     ///
     /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
@@ -2293,18 +2367,20 @@ impl ScalarValue {
         macro_rules! build_array_primitive {
             ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
                 {
-                    let array = scalars.map(|sv| {
-                        if let ScalarValue::$SCALAR_TY(v) = sv {
-                            Ok(v)
-                        } else {
-                            _exec_err!(
-                                "Inconsistent types in ScalarValue::iter_to_array. \
+                    let array = scalars
+                        .map(|sv| {
+                            if let ScalarValue::$SCALAR_TY(v) = sv {
+                                Ok(v)
+                            } else {
+                                _exec_err!(
+                                    "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
-                                data_type, sv
-                            )
-                        }
-                    })
-                    .collect::<Result<$ARRAY_TY>>()?;
+                                    data_type,
+                                    sv
+                                )
+                            }
+                        })
+                        .collect::<Result<$ARRAY_TY>>()?;
                     Arc::new(array)
                 }
             }};
@@ -2313,18 +2389,20 @@ impl ScalarValue {
         macro_rules! build_array_primitive_tz {
             ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
                 {
-                    let array = scalars.map(|sv| {
-                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
-                            Ok(v)
-                        } else {
-                            _exec_err!(
-                                "Inconsistent types in ScalarValue::iter_to_array. \
+                    let array = scalars
+                        .map(|sv| {
+                            if let ScalarValue::$SCALAR_TY(v, _) = sv {
+                                Ok(v)
+                            } else {
+                                _exec_err!(
+                                    "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
-                                data_type, sv
-                            )
-                        }
-                    })
-                    .collect::<Result<$ARRAY_TY>>()?;
+                                    data_type,
+                                    sv
+                                )
+                            }
+                        })
+                        .collect::<Result<$ARRAY_TY>>()?;
                     Arc::new(array.with_timezone_opt($TZ.clone()))
                 }
             }};
@@ -2335,18 +2413,20 @@ impl ScalarValue {
         macro_rules! build_array_string {
             ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
                 {
-                    let array = scalars.map(|sv| {
-                        if let ScalarValue::$SCALAR_TY(v) = sv {
-                            Ok(v)
-                        } else {
-                            _exec_err!(
-                                "Inconsistent types in ScalarValue::iter_to_array. \
+                    let array = scalars
+                        .map(|sv| {
+                            if let ScalarValue::$SCALAR_TY(v) = sv {
+                                Ok(v)
+                            } else {
+                                _exec_err!(
+                                    "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
-                                data_type, sv
-                            )
-                        }
-                    })
-                    .collect::<Result<$ARRAY_TY>>()?;
+                                    data_type,
+                                    sv
+                                )
+                            }
+                        })
+                        .collect::<Result<$ARRAY_TY>>()?;
                     Arc::new(array)
                 }
             }};
@@ -2648,71 +2728,6 @@ impl ScalarValue {
         Ok(array)
     }
 
-    fn build_decimal32_array(
-        value: Option<i32>,
-        precision: u8,
-        scale: i8,
-        size: usize,
-    ) -> Result<Decimal32Array> {
-        Ok(match value {
-            Some(val) => Decimal32Array::from(vec![val; size])
-                .with_precision_and_scale(precision, scale)?,
-            None => {
-                let mut builder = Decimal32Array::builder(size)
-                    .with_precision_and_scale(precision, scale)?;
-                builder.append_nulls(size);
-                builder.finish()
-            }
-        })
-    }
-
-    fn build_decimal64_array(
-        value: Option<i64>,
-        precision: u8,
-        scale: i8,
-        size: usize,
-    ) -> Result<Decimal64Array> {
-        Ok(match value {
-            Some(val) => Decimal64Array::from(vec![val; size])
-                .with_precision_and_scale(precision, scale)?,
-            None => {
-                let mut builder = Decimal64Array::builder(size)
-                    .with_precision_and_scale(precision, scale)?;
-                builder.append_nulls(size);
-                builder.finish()
-            }
-        })
-    }
-
-    fn build_decimal128_array(
-        value: Option<i128>,
-        precision: u8,
-        scale: i8,
-        size: usize,
-    ) -> Result<Decimal128Array> {
-        Ok(match value {
-            Some(val) => Decimal128Array::from(vec![val; size])
-                .with_precision_and_scale(precision, scale)?,
-            None => {
-                let mut builder = Decimal128Array::builder(size)
-                    .with_precision_and_scale(precision, scale)?;
-                builder.append_nulls(size);
-                builder.finish()
-            }
-        })
-    }
-
-    fn build_decimal256_array(
-        value: Option<i256>,
-        precision: u8,
-        scale: i8,
-        size: usize,
-    ) -> Result<Decimal256Array> {
-        Ok(repeat_n(value, size)
-            .collect::<Decimal256Array>()
-            .with_precision_and_scale(precision, scale)?)
-    }
-
     /// Converts `Vec<ScalarValue>` where each element has type corresponding to
     /// `data_type`, to a single element [`ListArray`].
     ///
@@ -2868,18 +2883,35 @@ impl ScalarValue {
     /// - a `Dictionary` that fails be converted to a dictionary array of size
     pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
         Ok(match self {
-            ScalarValue::Decimal32(e, precision, scale) => Arc::new(
-                ScalarValue::build_decimal32_array(*e, *precision, *scale, size)?,
+            ScalarValue::Decimal32(Some(e), precision, scale) => Arc::new(
+                Decimal32Array::from_value(*e, size)
+                    .with_precision_and_scale(*precision, *scale)?,
             ),
-            ScalarValue::Decimal64(e, precision, scale) => Arc::new(
-                ScalarValue::build_decimal64_array(*e, *precision, *scale, size)?,
+            ScalarValue::Decimal32(None, precision, scale) => {
+                new_null_array(&DataType::Decimal32(*precision, *scale), size)
+            }
+            ScalarValue::Decimal64(Some(e), precision, scale) => Arc::new(
+                Decimal64Array::from_value(*e, size)
+                    .with_precision_and_scale(*precision, *scale)?,
             ),
-            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
-                ScalarValue::build_decimal128_array(*e, *precision, *scale, size)?,
+            ScalarValue::Decimal64(None, precision, scale) => {
+                new_null_array(&DataType::Decimal64(*precision, *scale), size)
+            }
+            ScalarValue::Decimal128(Some(e), precision, scale) => Arc::new(
+                Decimal128Array::from_value(*e, size)
+                    .with_precision_and_scale(*precision, *scale)?,
             ),
-            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
-                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
+            ScalarValue::Decimal128(None, precision, scale) => {
+                new_null_array(&DataType::Decimal128(*precision, *scale), size)
+            }
+            ScalarValue::Decimal256(Some(e), precision, scale) => Arc::new(
+                Decimal256Array::from_value(*e, size)
+                    .with_precision_and_scale(*precision, *scale)?,
             ),
+            ScalarValue::Decimal256(None, precision, scale) => {
+                new_null_array(&DataType::Decimal256(*precision, *scale), size)
+            }
+
             ScalarValue::Boolean(e) => match e {
                 None => new_null_array(&DataType::Boolean, size),
                 Some(true) => {
@@ -2952,33 +2984,43 @@ impl ScalarValue {
                 )
             }
             ScalarValue::Utf8(e) => match e {
-                Some(value) => {
-                    Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
-                }
+                Some(value) => Arc::new(StringArray::new_repeated(value, size)),
                 None => new_null_array(&DataType::Utf8, size),
             },
             ScalarValue::Utf8View(e) => match e {
                 Some(value) => {
-                    Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
+                    let mut builder =
+                        StringViewBuilder::with_capacity(size).with_deduplicate_strings();
+                    // Replace with upstream arrow-rs code when available:
+                    // https://github.com/apache/arrow-rs/issues/9034
+                    for _ in 0..size {
+                        builder.append_value(value);
+                    }
+                    let array = builder.finish();
+                    Arc::new(array)
                 }
                 None => new_null_array(&DataType::Utf8View, size),
             },
             ScalarValue::LargeUtf8(e) => match e {
-                Some(value) => {
-                    Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
-                }
+                Some(value) => Arc::new(LargeStringArray::new_repeated(value, size)),
                 None => new_null_array(&DataType::LargeUtf8, size),
             },
             ScalarValue::Binary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
-                ),
+                Some(value) => {
+                    Arc::new(BinaryArray::new_repeated(value.as_slice(), size))
+                }
                 None => new_null_array(&DataType::Binary, size),
             },
             ScalarValue::BinaryView(e) => match e {
-                Some(value) => Arc::new(
-                    repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
-                ),
+                Some(value) => {
+                    let mut builder =
+                        BinaryViewBuilder::with_capacity(size).with_deduplicate_strings();
+                    for _ in 0..size {
+                        builder.append_value(value);
+                    }
+                    let array = builder.finish();
+                    Arc::new(array)
+                }
                 None => new_null_array(&DataType::BinaryView, size),
             },
             ScalarValue::FixedSizeBinary(s, e) => match e {
@@ -2989,12 +3031,19 @@ impl ScalarValue {
                     )
                     .unwrap(),
                 ),
-                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
+                None => {
+                    // TODO: Replace with FixedSizeBinaryArray::new_null once a fix for
+                    // https://github.com/apache/arrow-rs/issues/8900 is in the used arrow-rs
+                    // version.
+                    let mut builder = FixedSizeBinaryBuilder::new(*s);
+                    builder.append_nulls(size);
+                    Arc::new(builder.finish())
+                }
             },
             ScalarValue::LargeBinary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
-                ),
+                Some(value) => {
+                    Arc::new(LargeBinaryArray::new_repeated(value.as_slice(), size))
+                }
                 None => new_null_array(&DataType::LargeBinary, size),
             },
             ScalarValue::List(arr) => {
@@ -3153,10 +3202,7 @@ impl ScalarValue {
                     .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
                     Arc::new(ar)
                 }
-                None => {
-                    let dt = self.data_type();
-                    new_null_array(&dt, size)
-                }
+                None => new_null_array(&DataType::Union(fields.clone(), *mode), size),
             },
             ScalarValue::Dictionary(key_type, v) => {
                 // values array is one element long (the value)
@@ -3650,11 +3696,26 @@ impl ScalarValue {
         target_type: &DataType,
         cast_options: &CastOptions<'static>,
     ) -> Result<Self> {
+        let source_type = self.data_type();
+        if let Some(multiplier) = date_to_timestamp_multiplier(&source_type, target_type)
+            && let Some(value) = self.date_scalar_value_as_i64()
+        {
+            ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
+        }
+
         let scalar_array = self.to_array()?;
         let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
         ScalarValue::try_from_array(&cast_arr, 0)
     }
 
+    fn date_scalar_value_as_i64(&self) -> Option<i64> {
+        match self {
+            ScalarValue::Date32(Some(value)) => Some(i64::from(*value)),
+            ScalarValue::Date64(Some(value)) => Some(*value),
+            _ => None,
+        }
+    }
+
     fn eq_array_decimal32(
         array: &ArrayRef,
         index: usize,
@@ -4354,6 +4415,20 @@ impl ScalarValue {
             _ => None,
         }
     }
+
+    /// A thin wrapper on Arrow's validation that throws internal error if validation
+    /// fails.
+    fn validate_decimal_or_internal_err<T: DecimalType>(
+        precision: u8,
+        scale: i8,
+    ) -> Result<()> {
+        validate_decimal_precision_and_scale::<T>(precision, scale).map_err(|err| {
+            _internal_datafusion_err!(
+                "Decimal precision/scale invariant violated \
+                 (precision={precision}, scale={scale}): {err}"
+            )
+        })
+    }
 }
 
 /// Compacts the data of an `ArrayData` into a new `ArrayData`.
@@ -5008,7 +5083,8 @@ mod tests {
     use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
     use arrow::compute::{is_null, kernels};
     use arrow::datatypes::{
-        ArrowNumericType, Fields, Float64Type, DECIMAL256_MAX_PRECISION,
+        ArrowNumericType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, Fields,
+        Float64Type, TimeUnit,
     };
     use arrow::error::ArrowError;
     use arrow::util::pretty::pretty_format_columns;
@@ -5041,6 +5117,52 @@ mod tests {
         assert_eq!(actual, &expected);
     }
 
+    #[test]
+    fn test_format_timestamp_type_for_error_and_bounds() {
+        // format helper
+        let ts_ns = format_timestamp_type_for_error(&DataType::Timestamp(
+            TimeUnit::Nanosecond,
+            None,
+        ));
+        assert_eq!(ts_ns, "Timestamp(ns)");
+
+        let ts_us = format_timestamp_type_for_error(&DataType::Timestamp(
+            TimeUnit::Microsecond,
+            None,
+        ));
+        assert_eq!(ts_us, "Timestamp(us)");
+
+        // ensure_timestamp_in_bounds: Date32 non-overflow
+        let ok = ensure_timestamp_in_bounds(
+            1000,
+            NANOS_PER_DAY,
+            &DataType::Date32,
+            &DataType::Timestamp(TimeUnit::Nanosecond, None),
+        );
+        assert!(ok.is_ok());
+
+        // Date32 overflow -- known large day value (9999-12-31 -> 2932896)
+        let err = ensure_timestamp_in_bounds(
+            2932896,
+            NANOS_PER_DAY,
+            &DataType::Date32,
+            &DataType::Timestamp(TimeUnit::Nanosecond, None),
+        );
+        assert!(err.is_err());
+        let msg = err.unwrap_err().to_string();
+        assert!(msg.contains("Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"));
+
+        // Date64 overflow for ns (millis * 1_000_000)
+        let overflow_millis: i64 = (i64::MAX / NANOS_PER_MILLISECOND) + 1;
+        let err2 = ensure_timestamp_in_bounds(
+            overflow_millis,
+            NANOS_PER_MILLISECOND,
+            &DataType::Date64,
+            &DataType::Timestamp(TimeUnit::Nanosecond, None),
+        );
+        assert!(err2.is_err());
+    }
+
     #[test]
     fn test_scalar_value_from_for_struct() {
         let boolean = Arc::new(BooleanArray::from(vec![false]));
@@ -5172,6 +5294,18 @@ mod tests {
         assert_eq!(empty_array.len(), 0);
     }
 
+    /// See https://github.com/apache/datafusion/issues/18870
+    #[test]
+    fn test_to_array_of_size_for_none_fsb() {
+        let sv = ScalarValue::FixedSizeBinary(5, None);
+        let result = sv
+            .to_array_of_size(2)
+            .expect("Failed to convert to array of size");
+        assert_eq!(result.len(), 2);
+        assert_eq!(result.null_count(), 2);
+        assert_eq!(result.as_fixed_size_binary().values().len(), 10);
+    }
+
     #[test]
     fn test_list_to_array_string() {
         let scalars = vec![
@@ -5527,7 +5661,10 @@ mod tests {
             .sub_checked(&int_value_2)
             .unwrap_err()
             .strip_backtrace();
-        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
+        assert_eq!(
+            err,
+            "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808"
+        )
     }
 
     #[test]
@@ -5675,12 +5812,16 @@ mod tests {
         assert_eq!(123i128, array_decimal.value(0));
         assert_eq!(123i128, array_decimal.value(9));
         // test eq array
-        assert!(decimal_value
-            .eq_array(&array, 1)
-            .expect("Failed to compare arrays"));
-        assert!(decimal_value
-            .eq_array(&array, 5)
-            .expect("Failed to compare arrays"));
+        assert!(
+            decimal_value
+                .eq_array(&array, 1)
+                .expect("Failed to compare arrays")
+        );
+        assert!(
+            decimal_value
+                .eq_array(&array, 5)
+                .expect("Failed to compare arrays")
+        );
         // test try from array
         assert_eq!(
             decimal_value,
@@ -5725,18 +5866,24 @@ mod tests {
         assert_eq!(4, array.len());
         assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
 
-        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
-            .unwrap()
-            .eq_array(&array, 0)
-            .expect("Failed to compare arrays"));
-        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
-            .unwrap()
-            .eq_array(&array, 1)
-            .expect("Failed to compare arrays"));
-        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
-            .unwrap()
-            .eq_array(&array, 2)
-            .expect("Failed to compare arrays"));
+        assert!(
+            ScalarValue::try_new_decimal128(1, 10, 2)
+                .unwrap()
+                .eq_array(&array, 0)
+                .expect("Failed to compare arrays")
+        );
+        assert!(
+            ScalarValue::try_new_decimal128(2, 10, 2)
+                .unwrap()
+                .eq_array(&array, 1)
+                .expect("Failed to compare arrays")
+        );
+        assert!(
+            ScalarValue::try_new_decimal128(3, 10, 2)
+                .unwrap()
+                .eq_array(&array, 2)
+                .expect("Failed to compare arrays")
+        );
         assert_eq!(
             ScalarValue::Decimal128(None, 10, 2),
             ScalarValue::try_from_array(&array, 3).unwrap()
@@ -6172,8 +6319,6 @@ mod tests {
     }
 
     #[test]
-    // despite clippy claiming they are useless, the code doesn't compile otherwise.
-    #[allow(clippy::useless_vec)]
     fn scalar_iter_to_array_boolean() {
         check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
         check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
@@ -6223,12 +6368,12 @@ mod tests {
         check_scalar_iter_binary!(
             Binary,
             BinaryArray,
-            vec![Some(b"foo"), None, Some(b"bar")]
+            [Some(b"foo"), None, Some(b"bar")]
         );
         check_scalar_iter_binary!(
             LargeBinary,
             LargeBinaryArray,
-            vec![Some(b"foo"), None, Some(b"bar")]
+            [Some(b"foo"), None, Some(b"bar")]
         );
     }
 
@@ -6681,7 +6826,9 @@ mod tests {
                 for other_index in 0..array.len() {
                     if index != other_index {
                         assert!(
-                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
+                            !scalar
+                                .eq_array(&array, other_index)
+                                .expect("Failed to compare arrays"),
                             "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
                         );
                     }
@@ -7606,7 +7753,6 @@ mod tests {
     }
 
     #[test]
-    #[allow(arithmetic_overflow)] // we want to test them
     fn test_scalar_negative_overflows() -> Result<()> {
         macro_rules! test_overflow_on_value {
             ($($val:expr),* $(,)?) => {$(
@@ -8622,6 +8768,19 @@ mod tests {
         assert!(dense_scalar.is_null());
     }
 
+    #[test]
+    fn cast_date_to_timestamp_overflow_returns_error() {
+        let scalar = ScalarValue::Date32(Some(i32::MAX));
+        let err = scalar
+            .cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None))
+            .expect_err("expected cast to fail");
+        assert!(
+            err.to_string()
+                .contains("converted value exceeds the representable i64 range"),
+            "unexpected error: {err}"
+        );
+    }
+
     #[test]
     fn null_dictionary_scalar_produces_null_dictionary_array() {
         let dictionary_scalar = ScalarValue::Dictionary(
@@ -9047,6 +9206,27 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_views_minimize_memory() {
+        let value = "this string is longer than 12 bytes".to_string();
+
+        let scalar = ScalarValue::Utf8View(Some(value.clone()));
+        let array = scalar.to_array_of_size(10).unwrap();
+        let array = array.as_string_view();
+        let buffers = array.data_buffers();
+        assert_eq!(1, buffers.len());
+        // Ensure we only have a single copy of the value string
+        assert_eq!(value.len(), buffers[0].len());
+
+        // Same but for BinaryView
+        let scalar = ScalarValue::BinaryView(Some(value.bytes().collect()));
+        let array = scalar.to_array_of_size(10).unwrap();
+        let array = array.as_binary_view();
+        let buffers = array.data_buffers();
+        assert_eq!(1, buffers.len());
+        assert_eq!(value.len(), buffers[0].len());
+    }
+
     #[test]
     fn test_convert_array_to_scalar_vec() {
         // 1: Regular ListArray
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index da298c20ebcb4..ba13ef392d912 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -283,9 +283,13 @@ impl From<Precision<usize>> for Precision<ScalarValue> {
 /// and the transformations output are not always predictable.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Statistics {
-    /// The number of table rows.
+    /// The number of rows estimated to be scanned.
     pub num_rows: Precision<usize>,
-    /// Total bytes of the table rows.
+    /// The total bytes of the output data.
+    /// Note that this is not the same as the total bytes that may be scanned,
+    /// processed, etc.
+    /// E.g. we may read 1GB of data from a Parquet file but the Arrow data
+    /// the node produces may be 2GB; it's this 2GB that is tracked here.
     pub total_byte_size: Precision<usize>,
     /// Statistics on a column level.
     ///
@@ -317,6 +321,31 @@ impl Statistics {
         }
     }
 
+    /// Calculates `total_byte_size` based on the schema and `num_rows`.
+    /// If any of the columns has non-primitive width, `total_byte_size` is set to inexact.
+    pub fn calculate_total_byte_size(&mut self, schema: &Schema) {
+        let mut row_size = Some(0);
+        for field in schema.fields() {
+            match field.data_type().primitive_width() {
+                Some(width) => {
+                    row_size = row_size.map(|s| s + width);
+                }
+                None => {
+                    row_size = None;
+                    break;
+                }
+            }
+        }
+        match row_size {
+            None => {
+                self.total_byte_size = self.total_byte_size.to_inexact();
+            }
+            Some(size) => {
+                self.total_byte_size = self.num_rows.multiply(&Precision::Exact(size));
+            }
+        }
+    }
+
     /// Returns an unbounded `ColumnStatistics` for each field in the schema.
     pub fn unknown_column(schema: &Schema) -> Vec<ColumnStatistics> {
         schema
@@ -367,7 +396,7 @@ impl Statistics {
             return self;
         };
 
-        #[allow(clippy::large_enum_variant)]
+        #[expect(clippy::large_enum_variant)]
         enum Slot {
             /// The column is taken and put into the specified statistics location
             Taken(usize),
@@ -477,15 +506,38 @@ impl Statistics {
         self.column_statistics = self
             .column_statistics
             .into_iter()
-            .map(ColumnStatistics::to_inexact)
+            .map(|cs| {
+                let mut cs = cs.to_inexact();
+                // Scale byte_size by the row ratio
+                cs.byte_size = match cs.byte_size {
+                    Precision::Exact(n) | Precision::Inexact(n) => {
+                        Precision::Inexact((n as f64 * ratio) as usize)
+                    }
+                    Precision::Absent => Precision::Absent,
+                };
+                cs
+            })
             .collect();
-        // Adjust the total_byte_size for the ratio of rows before and after, also marking it as inexact
-        self.total_byte_size = match &self.total_byte_size {
-            Precision::Exact(n) | Precision::Inexact(n) => {
-                let adjusted = (*n as f64 * ratio) as usize;
-                Precision::Inexact(adjusted)
+
+        // Compute total_byte_size as sum of column byte_size values if all are present,
+        // otherwise fall back to scaling the original total_byte_size
+        let sum_scan_bytes: Option<usize> = self
+            .column_statistics
+            .iter()
+            .map(|cs| cs.byte_size.get_value().copied())
+            .try_fold(0usize, |acc, val| val.map(|v| acc + v));
+
+        self.total_byte_size = match sum_scan_bytes {
+            Some(sum) => Precision::Inexact(sum),
+            None => {
+                // Fall back to scaling original total_byte_size if not all columns have byte_size
+                match &self.total_byte_size {
+                    Precision::Exact(n) | Precision::Inexact(n) => {
+                        Precision::Inexact((*n as f64 * ratio) as usize)
+                    }
+                    Precision::Absent => Precision::Absent,
+                }
             }
-            Precision::Absent => Precision::Absent,
         };
         Ok(self)
     }
@@ -581,6 +633,7 @@ impl Statistics {
             col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
             col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
             col_stats.distinct_count = Precision::Absent;
+            col_stats.byte_size = col_stats.byte_size.add(&item_col_stats.byte_size);
         }
 
         Ok(Statistics {
@@ -642,6 +695,11 @@ impl Display for Statistics {
                 } else {
                     s
                 };
+                let s = if cs.byte_size != Precision::Absent {
+                    format!("{} ScanBytes={}", s, cs.byte_size)
+                } else {
+                    s
+                };
 
                 s + ")"
             })
@@ -671,6 +729,21 @@ pub struct ColumnStatistics {
     pub sum_value: Precision<ScalarValue>,
     /// Number of distinct values
     pub distinct_count: Precision<usize>,
+    /// Estimated size of this column's data in bytes for the output.
+    ///
+    /// Note that this is not the same as the total bytes that may be scanned,
+    /// processed, etc.
+    ///
+    /// E.g. we may read 1GB of data from a Parquet file but the Arrow data
+    /// the node produces may be 2GB; it's this 2GB that is tracked here.
+    ///
+    /// Currently this is accurately calculated for primitive types only.
+    /// For complex types (like Utf8, List, Struct, etc), this value may be
+    /// absent or inexact (e.g. estimated from the size of the data in the source Parquet files).
+    ///
+    /// This value is automatically scaled when operations like limits or
+    /// filters reduce the number of rows (see [`Statistics::with_fetch`]).
+    pub byte_size: Precision<usize>,
 }
 
 impl ColumnStatistics {
@@ -693,6 +766,7 @@ impl ColumnStatistics {
             min_value: Precision::Absent,
             sum_value: Precision::Absent,
             distinct_count: Precision::Absent,
+            byte_size: Precision::Absent,
         }
     }
 
@@ -726,6 +800,13 @@ impl ColumnStatistics {
         self
     }
 
+    /// Set the scan byte size
+    /// This should initially be set to the total size of the column.
+    pub fn with_byte_size(mut self, byte_size: Precision<usize>) -> Self {
+        self.byte_size = byte_size;
+        self
+    }
+
     /// If the exactness of a [`ColumnStatistics`] instance is lost, this
     /// function relaxes the exactness of all information by converting them
     /// [`Precision::Inexact`].
@@ -735,6 +816,7 @@ impl ColumnStatistics {
         self.min_value = self.min_value.to_inexact();
         self.sum_value = self.sum_value.to_inexact();
         self.distinct_count = self.distinct_count.to_inexact();
+        self.byte_size = self.byte_size.to_inexact();
         self
     }
 }
@@ -961,9 +1043,11 @@ mod tests {
             Precision::Exact(ScalarValue::Int64(None)),
         );
         // Overflow returns error
-        assert!(Precision::Exact(ScalarValue::Int32(Some(256)))
-            .cast_to(&DataType::Int8)
-            .is_err());
+        assert!(
+            Precision::Exact(ScalarValue::Int32(Some(256)))
+                .cast_to(&DataType::Int8)
+                .is_err()
+        );
     }
 
     #[test]
@@ -976,8 +1060,6 @@ mod tests {
         // Precision<ScalarValue> is not copy (requires .clone())
         let precision: Precision<ScalarValue> =
             Precision::Exact(ScalarValue::Int64(Some(42)));
-        // Clippy would complain about this if it were Copy
-        #[allow(clippy::redundant_clone)]
         let p2 = precision.clone();
         assert_eq!(precision, p2);
     }
@@ -1026,6 +1108,7 @@ mod tests {
             min_value: Precision::Exact(ScalarValue::Int64(Some(64))),
             sum_value: Precision::Exact(ScalarValue::Int64(Some(4600))),
             distinct_count: Precision::Exact(100),
+            byte_size: Precision::Exact(800),
         }
     }
 
@@ -1048,6 +1131,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
                     sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(40),
                 },
                 ColumnStatistics {
                     null_count: Precision::Exact(2),
@@ -1055,6 +1139,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
                     sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(40),
                 },
             ],
         };
@@ -1069,6 +1154,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
                     sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(60),
                 },
                 ColumnStatistics {
                     null_count: Precision::Exact(3),
@@ -1076,6 +1162,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
                     sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(60),
                 },
             ],
         };
@@ -1139,6 +1226,7 @@ mod tests {
                 min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
                 sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
                 distinct_count: Precision::Absent,
+                byte_size: Precision::Exact(40),
             }],
         };
 
@@ -1151,6 +1239,7 @@ mod tests {
                 min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
                 sum_value: Precision::Absent,
                 distinct_count: Precision::Absent,
+                byte_size: Precision::Inexact(60),
             }],
         };
 
@@ -1215,7 +1304,10 @@ mod tests {
         let items = vec![stats1, stats2];
 
         let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
-        assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
+        assert_contains!(
+            e.to_string(),
+            "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1"
+        );
     }
 
     #[test]
@@ -1277,6 +1369,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(0))),
                     sum_value: Precision::Exact(ScalarValue::Int32(Some(5050))),
                     distinct_count: Precision::Exact(50),
+                    byte_size: Precision::Exact(4000),
                 },
                 ColumnStatistics {
                     null_count: Precision::Exact(20),
@@ -1284,6 +1377,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(10))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(10100))),
                     distinct_count: Precision::Exact(75),
+                    byte_size: Precision::Exact(8000),
                 },
             ],
         };
@@ -1294,9 +1388,9 @@ mod tests {
         // Check num_rows
         assert_eq!(result.num_rows, Precision::Exact(100));
 
-        // Check total_byte_size is scaled proportionally and marked as inexact
-        // 100/1000 = 0.1, so 8000 * 0.1 = 800
-        assert_eq!(result.total_byte_size, Precision::Inexact(800));
+        // Check total_byte_size is computed as sum of scaled column byte_size values
+        // Column 1: 4000 * 0.1 = 400, Column 2: 8000 * 0.1 = 800, Sum = 1200
+        assert_eq!(result.total_byte_size, Precision::Inexact(1200));
 
         // Check column statistics are preserved but marked as inexact
         assert_eq!(result.column_statistics.len(), 2);
@@ -1358,6 +1452,7 @@ mod tests {
                 min_value: Precision::Inexact(ScalarValue::Int32(Some(0))),
                 sum_value: Precision::Inexact(ScalarValue::Int32(Some(5050))),
                 distinct_count: Precision::Inexact(50),
+                byte_size: Precision::Inexact(4000),
             }],
         };
 
@@ -1366,9 +1461,9 @@ mod tests {
         // Check num_rows is inexact
         assert_eq!(result.num_rows, Precision::Inexact(500));
 
-        // Check total_byte_size is scaled and inexact
-        // 500/1000 = 0.5, so 8000 * 0.5 = 4000
-        assert_eq!(result.total_byte_size, Precision::Inexact(4000));
+        // Check total_byte_size is computed as sum of scaled column byte_size values
+        // Column 1: 4000 * 0.5 = 2000, Sum = 2000
+        assert_eq!(result.total_byte_size, Precision::Inexact(2000));
 
         // Column stats remain inexact
         assert_eq!(
@@ -1425,8 +1520,8 @@ mod tests {
             .unwrap();
 
         assert_eq!(result.num_rows, Precision::Exact(300));
-        // 300/1000 = 0.3, so 8000 * 0.3 = 2400
-        assert_eq!(result.total_byte_size, Precision::Inexact(2400));
+        // Column 1: byte_size 800 * (300/500) = 240, Sum = 240
+        assert_eq!(result.total_byte_size, Precision::Inexact(240));
     }
 
     #[test]
@@ -1442,8 +1537,8 @@ mod tests {
         let result = original_stats.clone().with_fetch(Some(100), 0, 4).unwrap();
 
         assert_eq!(result.num_rows, Precision::Exact(400));
-        // 400/1000 = 0.4, so 8000 * 0.4 = 3200
-        assert_eq!(result.total_byte_size, Precision::Inexact(3200));
+        // Column 1: byte_size 800 * 0.4 = 320, Sum = 320
+        assert_eq!(result.total_byte_size, Precision::Inexact(320));
     }
 
     #[test]
@@ -1458,6 +1553,7 @@ mod tests {
                 min_value: Precision::Absent,
                 sum_value: Precision::Absent,
                 distinct_count: Precision::Absent,
+                byte_size: Precision::Absent,
             }],
         };
 
@@ -1496,6 +1592,7 @@ mod tests {
             min_value: Precision::Exact(ScalarValue::Int32(Some(-100))),
             sum_value: Precision::Exact(ScalarValue::Int32(Some(123456))),
             distinct_count: Precision::Exact(789),
+            byte_size: Precision::Exact(4000),
         };
 
         let original_stats = Statistics {
@@ -1524,4 +1621,140 @@ mod tests {
         );
         assert_eq!(result_col_stats.distinct_count, Precision::Inexact(789));
     }
+
+    #[test]
+    fn test_byte_size_try_merge() {
+        // Test that byte_size is summed correctly in try_merge
+        let col_stats1 = ColumnStatistics {
+            null_count: Precision::Exact(10),
+            max_value: Precision::Absent,
+            min_value: Precision::Absent,
+            sum_value: Precision::Absent,
+            distinct_count: Precision::Absent,
+            byte_size: Precision::Exact(1000),
+        };
+        let col_stats2 = ColumnStatistics {
+            null_count: Precision::Exact(20),
+            max_value: Precision::Absent,
+            min_value: Precision::Absent,
+            sum_value: Precision::Absent,
+            distinct_count: Precision::Absent,
+            byte_size: Precision::Exact(2000),
+        };
+
+        let stats1 = Statistics {
+            num_rows: Precision::Exact(50),
+            total_byte_size: Precision::Exact(1000),
+            column_statistics: vec![col_stats1],
+        };
+        let stats2 = Statistics {
+            num_rows: Precision::Exact(100),
+            total_byte_size: Precision::Exact(2000),
+            column_statistics: vec![col_stats2],
+        };
+
+        let merged = stats1.try_merge(&stats2).unwrap();
+        assert_eq!(
+            merged.column_statistics[0].byte_size,
+            Precision::Exact(3000) // 1000 + 2000
+        );
+    }
+
+    #[test]
+    fn test_byte_size_to_inexact() {
+        let col_stats = ColumnStatistics {
+            null_count: Precision::Exact(10),
+            max_value: Precision::Absent,
+            min_value: Precision::Absent,
+            sum_value: Precision::Absent,
+            distinct_count: Precision::Absent,
+            byte_size: Precision::Exact(5000),
+        };
+
+        let inexact = col_stats.to_inexact();
+        assert_eq!(inexact.byte_size, Precision::Inexact(5000));
+    }
+
+    #[test]
+    fn test_with_byte_size_builder() {
+        let col_stats =
+            ColumnStatistics::new_unknown().with_byte_size(Precision::Exact(8192));
+        assert_eq!(col_stats.byte_size, Precision::Exact(8192));
+    }
+
+    #[test]
+    fn test_with_fetch_scales_byte_size() {
+        // Test that byte_size is scaled by the row ratio in with_fetch
+        let original_stats = Statistics {
+            num_rows: Precision::Exact(1000),
+            total_byte_size: Precision::Exact(8000),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(10),
+                    max_value: Precision::Absent,
+                    min_value: Precision::Absent,
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(4000),
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(20),
+                    max_value: Precision::Absent,
+                    min_value: Precision::Absent,
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(8000),
+                },
+            ],
+        };
+
+        // Apply fetch of 100 rows (10% of original)
+        let result = original_stats.with_fetch(Some(100), 0, 1).unwrap();
+
+        // byte_size should be scaled: 4000 * 0.1 = 400, 8000 * 0.1 = 800
+        assert_eq!(
+            result.column_statistics[0].byte_size,
+            Precision::Inexact(400)
+        );
+        assert_eq!(
+            result.column_statistics[1].byte_size,
+            Precision::Inexact(800)
+        );
+
+        // total_byte_size should be computed as sum of byte_size values: 400 + 800 = 1200
+        assert_eq!(result.total_byte_size, Precision::Inexact(1200));
+    }
+
+    #[test]
+    fn test_with_fetch_total_byte_size_fallback() {
+        // Test that total_byte_size falls back to scaling when not all columns have byte_size
+        let original_stats = Statistics {
+            num_rows: Precision::Exact(1000),
+            total_byte_size: Precision::Exact(8000),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(10),
+                    max_value: Precision::Absent,
+                    min_value: Precision::Absent,
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Exact(4000),
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(20),
+                    max_value: Precision::Absent,
+                    min_value: Precision::Absent,
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent, // One column has no byte_size
+                },
+            ],
+        };
+
+        // Apply fetch of 100 rows (10% of original)
+        let result = original_stats.with_fetch(Some(100), 0, 1).unwrap();
+
+        // total_byte_size should fall back to scaling: 8000 * 0.1 = 800
+        assert_eq!(result.total_byte_size, Precision::Inexact(800));
+    }
 }
diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs
index c51dea1c4de04..f060704944233 100644
--- a/datafusion/common/src/test_util.rs
+++ b/datafusion/common/src/test_util.rs
@@ -735,32 +735,34 @@ mod tests {
         let non_existing = cwd.join("non-existing-dir").display().to_string();
         let non_existing_str = non_existing.as_str();
 
-        env::set_var(udf_env, non_existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_err());
-
-        env::set_var(udf_env, "");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, " ");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::remove_var(udf_env);
-        let res = get_data_dir(udf_env, non_existing_str);
-        assert!(res.is_err());
-
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
+        unsafe {
+            env::set_var(udf_env, non_existing_str);
+            let res = get_data_dir(udf_env, existing_str);
+            assert!(res.is_err());
+
+            env::set_var(udf_env, "");
+            let res = get_data_dir(udf_env, existing_str);
+            assert!(res.is_ok());
+            assert_eq!(res.unwrap(), existing_pb);
+
+            env::set_var(udf_env, " ");
+            let res = get_data_dir(udf_env, existing_str);
+            assert!(res.is_ok());
+            assert_eq!(res.unwrap(), existing_pb);
+
+            env::set_var(udf_env, existing_str);
+            let res = get_data_dir(udf_env, existing_str);
+            assert!(res.is_ok());
+            assert_eq!(res.unwrap(), existing_pb);
+
+            env::remove_var(udf_env);
+            let res = get_data_dir(udf_env, non_existing_str);
+            assert!(res.is_err());
+
+            let res = get_data_dir(udf_env, existing_str);
+            assert!(res.is_ok());
+            assert_eq!(res.unwrap(), existing_pb);
+        }
     }
 
     #[test]
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index 9b36266eec2e9..1e7c02e424256 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -956,12 +956,12 @@ impl<'a, T: 'a, C0: TreeNodeContainer<'a, T>, C1: TreeNodeContainer<'a, T>>
 }
 
 impl<
-        'a,
-        T: 'a,
-        C0: TreeNodeContainer<'a, T>,
-        C1: TreeNodeContainer<'a, T>,
-        C2: TreeNodeContainer<'a, T>,
-    > TreeNodeContainer<'a, T> for (C0, C1, C2)
+    'a,
+    T: 'a,
+    C0: TreeNodeContainer<'a, T>,
+    C1: TreeNodeContainer<'a, T>,
+    C2: TreeNodeContainer<'a, T>,
+> TreeNodeContainer<'a, T> for (C0, C1, C2)
 {
     fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &'a self,
@@ -992,13 +992,13 @@ impl<
 }
 
 impl<
-        'a,
-        T: 'a,
-        C0: TreeNodeContainer<'a, T>,
-        C1: TreeNodeContainer<'a, T>,
-        C2: TreeNodeContainer<'a, T>,
-        C3: TreeNodeContainer<'a, T>,
-    > TreeNodeContainer<'a, T> for (C0, C1, C2, C3)
+    'a,
+    T: 'a,
+    C0: TreeNodeContainer<'a, T>,
+    C1: TreeNodeContainer<'a, T>,
+    C2: TreeNodeContainer<'a, T>,
+    C3: TreeNodeContainer<'a, T>,
+> TreeNodeContainer<'a, T> for (C0, C1, C2, C3)
 {
     fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &'a self,
@@ -1090,12 +1090,12 @@ impl<'a, T: 'a, C0: TreeNodeContainer<'a, T>, C1: TreeNodeContainer<'a, T>>
 }
 
 impl<
-        'a,
-        T: 'a,
-        C0: TreeNodeContainer<'a, T>,
-        C1: TreeNodeContainer<'a, T>,
-        C2: TreeNodeContainer<'a, T>,
-    > TreeNodeRefContainer<'a, T> for (&'a C0, &'a C1, &'a C2)
+    'a,
+    T: 'a,
+    C0: TreeNodeContainer<'a, T>,
+    C1: TreeNodeContainer<'a, T>,
+    C2: TreeNodeContainer<'a, T>,
+> TreeNodeRefContainer<'a, T> for (&'a C0, &'a C1, &'a C2)
 {
     fn apply_ref_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &self,
@@ -1109,13 +1109,13 @@ impl<
 }
 
 impl<
-        'a,
-        T: 'a,
-        C0: TreeNodeContainer<'a, T>,
-        C1: TreeNodeContainer<'a, T>,
-        C2: TreeNodeContainer<'a, T>,
-        C3: TreeNodeContainer<'a, T>,
-    > TreeNodeRefContainer<'a, T> for (&'a C0, &'a C1, &'a C2, &'a C3)
+    'a,
+    T: 'a,
+    C0: TreeNodeContainer<'a, T>,
+    C1: TreeNodeContainer<'a, T>,
+    C2: TreeNodeContainer<'a, T>,
+    C3: TreeNodeContainer<'a, T>,
+> TreeNodeRefContainer<'a, T> for (&'a C0, &'a C1, &'a C2, &'a C3)
 {
     fn apply_ref_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &self,
@@ -1336,11 +1336,11 @@ pub(crate) mod tests {
     use std::collections::HashMap;
     use std::fmt::Display;
 
+    use crate::Result;
     use crate::tree_node::{
         Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, TreeNodeRewriter,
         TreeNodeVisitor,
     };
-    use crate::Result;
 
     #[derive(Debug, Eq, Hash, PartialEq, Clone)]
     pub struct TestTreeNode<T> {
diff --git a/datafusion/common/src/types/builtin.rs b/datafusion/common/src/types/builtin.rs
index 314529b99a342..dfd2cc4cf2d8b 100644
--- a/datafusion/common/src/types/builtin.rs
+++ b/datafusion/common/src/types/builtin.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use arrow::datatypes::IntervalUnit::*;
+use arrow::datatypes::TimeUnit::*;
 
 use crate::types::{LogicalTypeRef, NativeType};
 use std::sync::{Arc, LazyLock};
@@ -82,3 +83,17 @@ singleton_variant!(
     Interval,
     MonthDayNano
 );
+
+singleton_variant!(
+    LOGICAL_INTERVAL_YEAR_MONTH,
+    logical_interval_year_month,
+    Interval,
+    YearMonth
+);
+
+singleton_variant!(
+    LOGICAL_DURATION_MICROSECOND,
+    logical_duration_microsecond,
+    Duration,
+    Microsecond
+);
diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs
index a1495b779ac97..766c50441613b 100644
--- a/datafusion/common/src/types/native.rs
+++ b/datafusion/common/src/types/native.rs
@@ -19,11 +19,11 @@ use super::{
     LogicalField, LogicalFieldRef, LogicalFields, LogicalType, LogicalUnionFields,
     TypeSignature,
 };
-use crate::error::{Result, _internal_err};
+use crate::error::{_internal_err, Result};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{
-    DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
-    DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType,
+    Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
 };
 use std::{fmt::Display, sync::Arc};
 
@@ -241,9 +241,7 @@ impl LogicalType for NativeType {
             (Self::Decimal(p, s), _) => Decimal256(*p, *s),
             (Self::Timestamp(tu, tz), _) => Timestamp(*tu, tz.clone()),
             // If given type is Date, return the same type
-            (Self::Date, origin) if matches!(origin, Date32 | Date64) => {
-                origin.to_owned()
-            }
+            (Self::Date, Date32 | Date64) => origin.to_owned(),
             (Self::Date, _) => Date32,
             (Self::Time(tu), _) => match tu {
                 TimeUnit::Second | TimeUnit::Millisecond => Time32(*tu),
@@ -253,6 +251,8 @@ impl LogicalType for NativeType {
             (Self::Interval(iu), _) => Interval(*iu),
             (Self::Binary, LargeUtf8) => LargeBinary,
             (Self::Binary, Utf8View) => BinaryView,
+            // We don't cast to another kind of binary type if the origin one is already a binary type
+            (Self::Binary, Binary | LargeBinary | BinaryView) => origin.to_owned(),
             (Self::Binary, data_type) if can_cast_types(data_type, &BinaryView) => {
                 BinaryView
             }
@@ -364,7 +364,7 @@ impl LogicalType for NativeType {
                     "Unavailable default cast for native type {} from physical type {}",
                     self,
                     origin
-                )
+                );
             }
         })
     }
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index a56b940fab666..78ec434d2b577 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -18,8 +18,10 @@
 //! This module provides a function to estimate the memory size of a HashTable prior to allocation
 
 use crate::error::_exec_datafusion_err;
-use crate::Result;
-use std::mem::size_of;
+use crate::{HashSet, Result};
+use arrow::array::ArrayData;
+use arrow::record_batch::RecordBatch;
+use std::{mem::size_of, ptr::NonNull};
 
 /// Estimates the memory size required for a hash table prior to allocation.
 ///
@@ -99,6 +101,74 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
         })
 }
 
+/// Calculate total used memory of this batch.
+///
+/// This function is used to estimate the physical memory usage of the `RecordBatch`.
+/// It only counts the memory of large data `Buffer`s, and ignores metadata like
+/// types and pointers.
+/// The implementation will add up all unique `Buffer`'s memory
+/// size, due to:
+/// - The data pointer inside `Buffer` are memory regions returned by global memory
+///   allocator, those regions can't have overlap.
+/// - The actual used range of `ArrayRef`s inside `RecordBatch` can have overlap
+///   or reuse the same `Buffer`. For example: taking a slice from `Array`.
+///
+/// Example:
+/// For a `RecordBatch` with two columns: `col1` and `col2`, two columns are pointing
+/// to a sub-region of the same buffer.
+///
+/// {xxxxxxxxxxxxxxxxxxx} <--- buffer
+///       ^    ^  ^    ^
+///       |    |  |    |
+/// col1->{    }  |    |
+/// col2--------->{    }
+///
+/// In the above case, `get_record_batch_memory_size` will return the size of
+/// the buffer, instead of the sum of `col1` and `col2`'s actual memory size.
+///
+/// Note: Current `RecordBatch`.get_array_memory_size()` will double count the
+/// buffer memory size if multiple arrays within the batch are sharing the same
+/// `Buffer`. This method provides temporary fix until the issue is resolved:
+/// <https://github.com/apache/arrow-rs/issues/6439>
+pub fn get_record_batch_memory_size(batch: &RecordBatch) -> usize {
+    // Store pointers to `Buffer`'s start memory address (instead of actual
+    // used data region's pointer represented by current `Array`)
+    let mut counted_buffers: HashSet<NonNull<u8>> = HashSet::new();
+    let mut total_size = 0;
+
+    for array in batch.columns() {
+        let array_data = array.to_data();
+        count_array_data_memory_size(&array_data, &mut counted_buffers, &mut total_size);
+    }
+
+    total_size
+}
+
+/// Count the memory usage of `array_data` and its children recursively.
+fn count_array_data_memory_size(
+    array_data: &ArrayData,
+    counted_buffers: &mut HashSet<NonNull<u8>>,
+    total_size: &mut usize,
+) {
+    // Count memory usage for `array_data`
+    for buffer in array_data.buffers() {
+        if counted_buffers.insert(buffer.data_ptr()) {
+            *total_size += buffer.capacity();
+        } // Otherwise the buffer's memory is already counted
+    }
+
+    if let Some(null_buffer) = array_data.nulls()
+        && counted_buffers.insert(null_buffer.inner().inner().data_ptr())
+    {
+        *total_size += null_buffer.inner().inner().capacity();
+    }
+
+    // Count all children `ArrayData` recursively
+    for child in array_data.child_data() {
+        count_array_data_memory_size(child, counted_buffers, total_size);
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::{collections::HashSet, mem::size_of};
@@ -132,3 +202,129 @@ mod tests {
         assert!(estimated.is_err());
     }
 }
+
+#[cfg(test)]
+mod record_batch_tests {
+    use super::*;
+    use arrow::array::{Float64Array, Int32Array, ListArray};
+    use arrow::datatypes::{DataType, Field, Int32Type, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_get_record_batch_memory_size() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("ints", DataType::Int32, true),
+            Field::new("float64", DataType::Float64, false),
+        ]));
+
+        let int_array =
+            Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)]);
+        let float64_array = Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(int_array), Arc::new(float64_array)],
+        )
+        .unwrap();
+
+        let size = get_record_batch_memory_size(&batch);
+        assert_eq!(size, 60);
+    }
+
+    #[test]
+    fn test_get_record_batch_memory_size_with_null() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("ints", DataType::Int32, true),
+            Field::new("float64", DataType::Float64, false),
+        ]));
+
+        let int_array = Int32Array::from(vec![None, Some(2), Some(3)]);
+        let float64_array = Float64Array::from(vec![1.0, 2.0, 3.0]);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(int_array), Arc::new(float64_array)],
+        )
+        .unwrap();
+
+        let size = get_record_batch_memory_size(&batch);
+        assert_eq!(size, 100);
+    }
+
+    #[test]
+    fn test_get_record_batch_memory_size_empty() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "ints",
+            DataType::Int32,
+            false,
+        )]));
+
+        let int_array: Int32Array = Int32Array::from(vec![] as Vec<i32>);
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(int_array)]).unwrap();
+
+        let size = get_record_batch_memory_size(&batch);
+        assert_eq!(size, 0, "Empty batch should have 0 memory size");
+    }
+
+    #[test]
+    fn test_get_record_batch_memory_size_shared_buffer() {
+        let original = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let slice1 = original.slice(0, 3);
+        let slice2 = original.slice(2, 3);
+
+        let schema_origin = Arc::new(Schema::new(vec![Field::new(
+            "origin_col",
+            DataType::Int32,
+            false,
+        )]));
+        let batch_origin =
+            RecordBatch::try_new(schema_origin, vec![Arc::new(original)]).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("slice1", DataType::Int32, false),
+            Field::new("slice2", DataType::Int32, false),
+        ]));
+
+        let batch_sliced =
+            RecordBatch::try_new(schema, vec![Arc::new(slice1), Arc::new(slice2)])
+                .unwrap();
+
+        let size_origin = get_record_batch_memory_size(&batch_origin);
+        let size_sliced = get_record_batch_memory_size(&batch_sliced);
+
+        assert_eq!(size_origin, size_sliced);
+    }
+
+    #[test]
+    fn test_get_record_batch_memory_size_nested_array() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "nested_int",
+                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
+                false,
+            ),
+            Field::new(
+                "nested_int2",
+                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
+                false,
+            ),
+        ]));
+
+        let int_list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            Some(vec![Some(1), Some(2), Some(3)]),
+        ]);
+
+        let int_list_array2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            Some(vec![Some(4), Some(5), Some(6)]),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(int_list_array), Arc::new(int_list_array2)],
+        )
+        .unwrap();
+
+        let size = get_record_batch_memory_size(&batch);
+        assert_eq!(size, 8208);
+    }
+}
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 7b145ac3ae21d..e061f852637ca 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -22,19 +22,20 @@ pub mod memory;
 pub mod proxy;
 pub mod string_utils;
 
-use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
+use crate::assert_or_internal_err;
+use crate::error::{_exec_datafusion_err, _internal_datafusion_err};
 use crate::{Result, ScalarValue};
 use arrow::array::{
-    cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
+    cast::AsArray,
 };
 use arrow::buffer::OffsetBuffer;
-use arrow::compute::{partition, SortColumn, SortOptions};
+use arrow::compute::{SortColumn, SortOptions, partition};
 use arrow::datatypes::{DataType, Field, SchemaRef};
 #[cfg(feature = "sql")]
 use sqlparser::{ast::Ident, dialect::GenericDialect, parser::Parser};
 use std::borrow::{Borrow, Cow};
-use std::cmp::{min, Ordering};
+use std::cmp::{Ordering, min};
 use std::collections::HashSet;
 use std::num::NonZero;
 use std::ops::Range;
@@ -265,10 +266,10 @@ fn needs_quotes(s: &str) -> bool {
     let mut chars = s.chars();
 
     // first char can not be a number unless escaped
-    if let Some(first_char) = chars.next() {
-        if !(first_char.is_ascii_lowercase() || first_char == '_') {
-            return true;
-        }
+    if let Some(first_char) = chars.next()
+        && !(first_char.is_ascii_lowercase() || first_char == '_')
+    {
+        return true;
     }
 
     !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
@@ -519,9 +520,7 @@ pub fn arrays_into_list_array(
     arr: impl IntoIterator<Item = ArrayRef>,
 ) -> Result<ListArray> {
     let arr = arr.into_iter().collect::<Vec<_>>();
-    if arr.is_empty() {
-        return _internal_err!("Cannot wrap empty array into list array");
-    }
+    assert_or_internal_err!(!arr.is_empty(), "Cannot wrap empty array into list array");
 
     let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
     // Assume data type is consistent
@@ -944,8 +943,6 @@ mod tests {
     use super::*;
     use crate::ScalarValue::Null;
     use arrow::array::Float64Array;
-    use sqlparser::ast::Ident;
-    use sqlparser::tokenizer::Span;
 
     #[test]
     fn test_bisect_linear_left_and_right() -> Result<()> {
@@ -1174,7 +1171,7 @@ mod tests {
             let expected_parsed = vec![Ident {
                 value: identifier.to_string(),
                 quote_style,
-                span: Span::empty(),
+                span: sqlparser::tokenizer::Span::empty(),
             }];
 
             assert_eq!(
diff --git a/datafusion/common/src/utils/proxy.rs b/datafusion/common/src/utils/proxy.rs
index fb951aa3b0289..fddf834912544 100644
--- a/datafusion/common/src/utils/proxy.rs
+++ b/datafusion/common/src/utils/proxy.rs
@@ -15,12 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`VecAllocExt`] and [`RawTableAllocExt`] to help tracking of memory allocations
+//! [`VecAllocExt`] to help tracking of memory allocations
 
-use hashbrown::{
-    hash_table::HashTable,
-    raw::{Bucket, RawTable},
-};
+use hashbrown::hash_table::HashTable;
 use std::mem::size_of;
 
 /// Extension trait for [`Vec`] to account for allocations.
@@ -114,75 +111,6 @@ impl<T> VecAllocExt for Vec<T> {
     }
 }
 
-/// Extension trait for hash browns [`RawTable`] to account for allocations.
-pub trait RawTableAllocExt {
-    /// Item type.
-    type T;
-
-    /// [Insert](RawTable::insert) new element into table and increase
-    /// `accounting` by any newly allocated bytes.
-    ///
-    /// Returns the bucket where the element was inserted.
-    /// Note that allocation counts capacity, not size.
-    ///
-    /// # Example:
-    /// ```
-    /// # use datafusion_common::utils::proxy::RawTableAllocExt;
-    /// # use hashbrown::raw::RawTable;
-    /// let mut table = RawTable::new();
-    /// let mut allocated = 0;
-    /// let hash_fn = |x: &u32| (*x as u64) % 1000;
-    /// // pretend 0x3117 is the hash value for 1
-    /// table.insert_accounted(1, hash_fn, &mut allocated);
-    /// assert_eq!(allocated, 64);
-    ///
-    /// // insert more values
-    /// for i in 0..100 {
-    ///     table.insert_accounted(i, hash_fn, &mut allocated);
-    /// }
-    /// assert_eq!(allocated, 400);
-    /// ```
-    fn insert_accounted(
-        &mut self,
-        x: Self::T,
-        hasher: impl Fn(&Self::T) -> u64,
-        accounting: &mut usize,
-    ) -> Bucket<Self::T>;
-}
-
-impl<T> RawTableAllocExt for RawTable<T> {
-    type T = T;
-
-    fn insert_accounted(
-        &mut self,
-        x: Self::T,
-        hasher: impl Fn(&Self::T) -> u64,
-        accounting: &mut usize,
-    ) -> Bucket<Self::T> {
-        let hash = hasher(&x);
-
-        match self.try_insert_no_grow(hash, x) {
-            Ok(bucket) => bucket,
-            Err(x) => {
-                // need to request more memory
-
-                let bump_elements = self.capacity().max(16);
-                let bump_size = bump_elements * size_of::<T>();
-                *accounting = (*accounting).checked_add(bump_size).expect("overflow");
-
-                self.reserve(bump_elements, hasher);
-
-                // still need to insert the element since first try failed
-                // Note: cannot use `.expect` here because `T` may not implement `Debug`
-                match self.try_insert_no_grow(hash, x) {
-                    Ok(bucket) => bucket,
-                    Err(_) => panic!("just grew the container"),
-                }
-            }
-        }
-    }
-}
-
 /// Extension trait for hash browns [`HashTable`] to account for allocations.
 pub trait HashTableAllocExt {
     /// Item type.
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 67a73ac6f6693..bd88ed3b9ca1e 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -46,7 +46,7 @@ array_expressions = ["nested_expressions"]
 avro = ["datafusion-common/avro", "datafusion-datasource-avro"]
 backtrace = ["datafusion-common/backtrace"]
 compression = [
-    "xz2",
+    "liblzma",
     "bzip2",
     "flate2",
     "zstd",
@@ -79,7 +79,6 @@ parquet_encryption = [
     "datafusion-common/parquet_encryption",
     "datafusion-datasource-parquet/parquet_encryption",
 ]
-pyarrow = ["datafusion-common/pyarrow", "parquet"]
 regex_expressions = [
     "datafusion-functions/regex_expressions",
 ]
@@ -88,6 +87,7 @@ recursive_protection = [
     "datafusion-expr/recursive_protection",
     "datafusion-optimizer/recursive_protection",
     "datafusion-physical-optimizer/recursive_protection",
+    "datafusion-physical-expr/recursive_protection",
     "datafusion-sql/recursive_protection",
     "sqlparser/recursive-protection",
 ]
@@ -115,7 +115,7 @@ arrow = { workspace = true }
 arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.1", optional = true }
+bzip2 = { workspace = true, optional = true }
 chrono = { workspace = true }
 datafusion-catalog = { workspace = true }
 datafusion-catalog-listing = { workspace = true }
@@ -143,24 +143,23 @@ datafusion-physical-optimizer = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-session = { workspace = true }
 datafusion-sql = { workspace = true, optional = true }
-flate2 = { version = "1.1.4", optional = true }
+flate2 = { workspace = true, optional = true }
 futures = { workspace = true }
 itertools = { workspace = true }
+liblzma = { workspace = true, optional = true }
 log = { workspace = true }
 object_store = { workspace = true }
 parking_lot = { workspace = true }
 parquet = { workspace = true, optional = true, default-features = true }
 rand = { workspace = true }
 regex = { workspace = true }
-rstest = { workspace = true }
 serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
 sqlparser = { workspace = true, optional = true }
 tempfile = { workspace = true }
 tokio = { workspace = true }
 url = { workspace = true }
-uuid = { version = "1.18", features = ["v4", "js"] }
-xz2 = { version = "0.1", optional = true, features = ["static"] }
-zstd = { version = "0.13", optional = true, default-features = false }
+uuid = { version = "1.19", features = ["v4", "js"] }
+zstd = { workspace = true, optional = true }
 
 [dev-dependencies]
 async-trait = { workspace = true }
@@ -173,9 +172,9 @@ datafusion-macros = { workspace = true }
 datafusion-physical-optimizer = { workspace = true }
 doc-comment = { workspace = true }
 env_logger = { workspace = true }
-glob = { version = "0.3.0" }
+glob = { workspace = true }
 insta = { workspace = true }
-paste = "^1.0"
+paste = { workspace = true }
 rand = { workspace = true, features = ["small_rng"] }
 rand_distr = "0.5"
 regex = { workspace = true }
@@ -240,6 +239,10 @@ harness = false
 name = "parquet_query_sql"
 required-features = ["parquet"]
 
+[[bench]]
+harness = false
+name = "range_and_generate_series"
+
 [[bench]]
 harness = false
 name = "sql_planner"
@@ -272,3 +275,8 @@ name = "dataframe"
 [[bench]]
 harness = false
 name = "spm"
+
+[[bench]]
+harness = false
+name = "preserve_file_partitioning"
+required-features = ["parquet"]
diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs
index 87aeed49337eb..4aa667504e459 100644
--- a/datafusion/core/benches/aggregate_query_sql.rs
+++ b/datafusion/core/benches/aggregate_query_sql.rs
@@ -31,6 +31,7 @@ use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
+#[expect(clippy::needless_pass_by_value)]
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     black_box(rt.block_on(df.collect()).unwrap());
diff --git a/datafusion/core/benches/csv_load.rs b/datafusion/core/benches/csv_load.rs
index de0f0d8250572..228457947fd5a 100644
--- a/datafusion/core/benches/csv_load.rs
+++ b/datafusion/core/benches/csv_load.rs
@@ -34,6 +34,7 @@ use std::time::Duration;
 use test_utils::AccessLogGenerator;
 use tokio::runtime::Runtime;
 
+#[expect(clippy::needless_pass_by_value)]
 fn load_csv(
     ctx: Arc<Mutex<SessionContext>>,
     rt: &Runtime,
diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs
index fffe2e2d17522..630bc056600b4 100644
--- a/datafusion/core/benches/data_utils/mod.rs
+++ b/datafusion/core/benches/data_utils/mod.rs
@@ -18,9 +18,9 @@
 //! This module provides the in-memory table for more realistic benchmarking.
 
 use arrow::array::{
-    builder::{Int64Builder, StringBuilder},
     ArrayRef, Float32Array, Float64Array, RecordBatch, StringArray, StringViewBuilder,
     UInt64Array,
+    builder::{Int64Builder, StringBuilder},
 };
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::datasource::MemTable;
@@ -139,6 +139,7 @@ fn create_record_batch(
 
 /// Create record batches of `partitions_len` partitions and `batch_size` for each batch,
 /// with a total number of `array_len` records
+#[expect(clippy::needless_pass_by_value)]
 pub fn create_record_batches(
     schema: SchemaRef,
     array_len: usize,
diff --git a/datafusion/core/benches/dataframe.rs b/datafusion/core/benches/dataframe.rs
index 00fa85918347a..726187ab5e922 100644
--- a/datafusion/core/benches/dataframe.rs
+++ b/datafusion/core/benches/dataframe.rs
@@ -45,6 +45,7 @@ fn create_context(field_count: u32) -> datafusion_common::Result<Arc<SessionCont
     Ok(Arc::new(ctx))
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn run(column_count: u32, ctx: Arc<SessionContext>, rt: &Runtime) {
     black_box(rt.block_on(async {
         let mut data_frame = ctx.table("t").await.unwrap();
diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs
index d05e8b13b2af3..0e638e293d8cf 100644
--- a/datafusion/core/benches/distinct_query_sql.rs
+++ b/datafusion/core/benches/distinct_query_sql.rs
@@ -24,16 +24,17 @@ mod data_utils;
 use crate::criterion::Criterion;
 use data_utils::{create_table_provider, make_data};
 use datafusion::execution::context::SessionContext;
-use datafusion::physical_plan::{collect, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, collect};
 use datafusion::{datasource::MemTable, error::Result};
-use datafusion_execution::config::SessionConfig;
 use datafusion_execution::TaskContext;
+use datafusion_execution::config::SessionConfig;
 
 use parking_lot::Mutex;
 use std::hint::black_box;
 use std::{sync::Arc, time::Duration};
 use tokio::runtime::Runtime;
 
+#[expect(clippy::needless_pass_by_value)]
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     black_box(rt.block_on(df.collect()).unwrap());
@@ -124,6 +125,7 @@ async fn distinct_with_limit(
     Ok(())
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>) {
     black_box(rt.block_on(distinct_with_limit(plan.clone(), ctx.clone()))).unwrap();
 }
diff --git a/datafusion/core/benches/filter_query_sql.rs b/datafusion/core/benches/filter_query_sql.rs
index 16905e0f96605..3b80518d32dcd 100644
--- a/datafusion/core/benches/filter_query_sql.rs
+++ b/datafusion/core/benches/filter_query_sql.rs
@@ -20,7 +20,7 @@ use arrow::{
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
 use futures::executor::block_on;
diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs
index 09234546b2dfe..67904197bc257 100644
--- a/datafusion/core/benches/map_query_sql.rs
+++ b/datafusion/core/benches/map_query_sql.rs
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashSet;
 use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int32Array, RecordBatch};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use parking_lot::Mutex;
-use rand::prelude::ThreadRng;
 use rand::Rng;
+use rand::prelude::ThreadRng;
 use tokio::runtime::Runtime;
 
 use datafusion::prelude::SessionContext;
@@ -33,11 +34,12 @@ use datafusion_functions_nested::map::map;
 mod data_utils;
 
 fn build_keys(rng: &mut ThreadRng) -> Vec<String> {
-    let mut keys = vec![];
-    for _ in 0..1000 {
-        keys.push(rng.random_range(0..9999).to_string());
+    let mut keys = HashSet::with_capacity(1000);
+    while keys.len() < 1000 {
+        let key = rng.random_range(0..9999).to_string();
+        keys.insert(key);
     }
-    keys
+    keys.into_iter().collect()
 }
 
 fn build_values(rng: &mut ThreadRng) -> Vec<i32> {
diff --git a/datafusion/core/benches/math_query_sql.rs b/datafusion/core/benches/math_query_sql.rs
index 76824850c114c..4d1d4abb6783c 100644
--- a/datafusion/core/benches/math_query_sql.rs
+++ b/datafusion/core/benches/math_query_sql.rs
@@ -36,6 +36,7 @@ use datafusion::datasource::MemTable;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionContext;
 
+#[expect(clippy::needless_pass_by_value)]
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs
index e2b3810480130..e44524127bf18 100644
--- a/datafusion/core/benches/parquet_query_sql.rs
+++ b/datafusion/core/benches/parquet_query_sql.rs
@@ -23,14 +23,14 @@ use arrow::datatypes::{
     SchemaRef,
 };
 use arrow::record_batch::RecordBatch;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::instant::Instant;
 use futures::stream::StreamExt;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{WriterProperties, WriterVersion};
-use rand::distr::uniform::SampleUniform;
 use rand::distr::Alphanumeric;
+use rand::distr::uniform::SampleUniform;
 use rand::prelude::*;
 use rand::rng;
 use std::fs::File;
diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
index e4838572f60fb..e6763b4761c2a 100644
--- a/datafusion/core/benches/physical_plan.rs
+++ b/datafusion/core/benches/physical_plan.rs
@@ -32,7 +32,7 @@ use tokio::runtime::Runtime;
 use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion::physical_plan::{
     collect,
-    expressions::{col, PhysicalSortExpr},
+    expressions::{PhysicalSortExpr, col},
 };
 use datafusion::prelude::SessionContext;
 use datafusion_datasource::memory::MemorySourceConfig;
@@ -40,6 +40,7 @@ use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 // Initialize the operator using the provided record batches and the sort key
 // as inputs. All record batches must have the same schema.
+#[expect(clippy::needless_pass_by_value)]
 fn sort_preserving_merge_operator(
     session_ctx: Arc<SessionContext>,
     rt: &Runtime,
diff --git a/datafusion/core/benches/preserve_file_partitioning.rs b/datafusion/core/benches/preserve_file_partitioning.rs
new file mode 100644
index 0000000000000..17ebca52cd1d2
--- /dev/null
+++ b/datafusion/core/benches/preserve_file_partitioning.rs
@@ -0,0 +1,838 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmark for `preserve_file_partitions` optimization.
+//!
+//! When enabled, this optimization declares Hive-partitioned tables as
+//! `Hash([partition_col])` partitioned, allowing the query optimizer to
+//! skip unnecessary repartitioning and sorting operations.
+//!
+//! When This Optimization Helps
+//! - Window functions: PARTITION BY on partition column eliminates RepartitionExec and SortExec
+//! - Aggregates with ORDER BY: GROUP BY partition column and ORDER BY eliminates post aggregate sort
+//!
+//! When This Optimization Does NOT Help
+//! - GROUP BY non-partition columns: Required Hash distribution doesn't match declared partitioning
+//! - When the number of distinct file partitioning groups < the number of CPUs available: Reduces
+//!   parallelization, thus may outweigh the pros of reduced shuffles
+//!
+//! Usage
+//! - BENCH_SIZE=small|medium|large cargo bench -p datafusion --bench preserve_file_partitions
+//! - SAVE_PLANS=1 cargo bench ...  # Save query plans to files
+
+use arrow::array::{ArrayRef, Float64Array, StringArray, TimestampMillisecondArray};
+use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+use arrow::record_batch::RecordBatch;
+use arrow::util::pretty::pretty_format_batches;
+use criterion::{Criterion, criterion_group, criterion_main};
+use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext, col};
+use datafusion_expr::SortExpr;
+use parquet::arrow::ArrowWriter;
+use parquet::file::properties::WriterProperties;
+use std::fs::{self, File};
+use std::io::Write;
+use std::path::Path;
+use std::sync::Arc;
+use tempfile::TempDir;
+use tokio::runtime::Runtime;
+
+#[derive(Debug, Clone, Copy)]
+struct BenchConfig {
+    fact_partitions: usize,
+    rows_per_partition: usize,
+    target_partitions: usize,
+    measurement_time_secs: u64,
+}
+
+impl BenchConfig {
+    fn small() -> Self {
+        Self {
+            fact_partitions: 10,
+            rows_per_partition: 1_000_000,
+            target_partitions: 10,
+            measurement_time_secs: 15,
+        }
+    }
+
+    fn medium() -> Self {
+        Self {
+            fact_partitions: 30,
+            rows_per_partition: 3_000_000,
+            target_partitions: 30,
+            measurement_time_secs: 30,
+        }
+    }
+
+    fn large() -> Self {
+        Self {
+            fact_partitions: 50,
+            rows_per_partition: 6_000_000,
+            target_partitions: 50,
+            measurement_time_secs: 90,
+        }
+    }
+
+    fn from_env() -> Self {
+        match std::env::var("BENCH_SIZE").as_deref() {
+            Ok("small") | Ok("SMALL") => Self::small(),
+            Ok("medium") | Ok("MEDIUM") => Self::medium(),
+            Ok("large") | Ok("LARGE") => Self::large(),
+            _ => {
+                println!("Using SMALL dataset (set BENCH_SIZE=small|medium|large)");
+                Self::small()
+            }
+        }
+    }
+
+    fn total_rows(&self) -> usize {
+        self.fact_partitions * self.rows_per_partition
+    }
+
+    fn high_cardinality(base: &Self) -> Self {
+        Self {
+            fact_partitions: (base.fact_partitions as f64 * 2.5) as usize,
+            rows_per_partition: base.rows_per_partition / 2,
+            target_partitions: base.target_partitions,
+            measurement_time_secs: base.measurement_time_secs,
+        }
+    }
+}
+
+fn dkey_names(count: usize) -> Vec<String> {
+    (0..count)
+        .map(|i| {
+            if i < 26 {
+                ((b'A' + i as u8) as char).to_string()
+            } else {
+                format!(
+                    "{}{}",
+                    (b'A' + ((i / 26) - 1) as u8) as char,
+                    (b'A' + (i % 26) as u8) as char
+                )
+            }
+        })
+        .collect()
+}
+
+/// Hive-partitioned fact table, sorted by timestamp within each partition.
+fn generate_fact_table(
+    base_dir: &Path,
+    num_partitions: usize,
+    rows_per_partition: usize,
+) {
+    let fact_dir = base_dir.join("fact");
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new(
+            "timestamp",
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            false,
+        ),
+        Field::new("value", DataType::Float64, false),
+    ]));
+
+    let props = WriterProperties::builder()
+        .set_compression(parquet::basic::Compression::SNAPPY)
+        .build();
+
+    let dkeys = dkey_names(num_partitions);
+
+    for dkey in &dkeys {
+        let part_dir = fact_dir.join(format!("f_dkey={dkey}"));
+        fs::create_dir_all(&part_dir).unwrap();
+        let file_path = part_dir.join("data.parquet");
+        let file = File::create(file_path).unwrap();
+
+        let mut writer =
+            ArrowWriter::try_new(file, schema.clone(), Some(props.clone())).unwrap();
+
+        let base_ts = 1672567200000i64; // 2023-01-01T09:00:00
+        let timestamps: Vec<i64> = (0..rows_per_partition)
+            .map(|i| base_ts + (i as i64 * 10000))
+            .collect();
+
+        let values: Vec<f64> = (0..rows_per_partition)
+            .map(|i| 50.0 + (i % 100) as f64 + ((i % 7) as f64 * 10.0))
+            .collect();
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(TimestampMillisecondArray::from(timestamps)) as ArrayRef,
+                Arc::new(Float64Array::from(values)),
+            ],
+        )
+        .unwrap();
+
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+    }
+}
+
+/// Single-file dimension table for CollectLeft joins.
+fn generate_dimension_table(base_dir: &Path, num_partitions: usize) {
+    let dim_dir = base_dir.join("dimension");
+    fs::create_dir_all(&dim_dir).unwrap();
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("d_dkey", DataType::Utf8, false),
+        Field::new("env", DataType::Utf8, false),
+        Field::new("service", DataType::Utf8, false),
+        Field::new("host", DataType::Utf8, false),
+    ]));
+
+    let props = WriterProperties::builder()
+        .set_compression(parquet::basic::Compression::SNAPPY)
+        .build();
+
+    let file_path = dim_dir.join("data.parquet");
+    let file = File::create(file_path).unwrap();
+    let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props)).unwrap();
+
+    let dkeys = dkey_names(num_partitions);
+    let envs = ["dev", "prod", "staging", "test"];
+    let services = ["log", "trace", "metric"];
+    let hosts = ["ma", "vim", "nano", "emacs"];
+
+    let d_dkey_vals: Vec<String> = dkeys.clone();
+    let env_vals: Vec<String> = dkeys
+        .iter()
+        .enumerate()
+        .map(|(i, _)| envs[i % envs.len()].to_string())
+        .collect();
+    let service_vals: Vec<String> = dkeys
+        .iter()
+        .enumerate()
+        .map(|(i, _)| services[i % services.len()].to_string())
+        .collect();
+    let host_vals: Vec<String> = dkeys
+        .iter()
+        .enumerate()
+        .map(|(i, _)| hosts[i % hosts.len()].to_string())
+        .collect();
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(StringArray::from(d_dkey_vals)) as ArrayRef,
+            Arc::new(StringArray::from(env_vals)),
+            Arc::new(StringArray::from(service_vals)),
+            Arc::new(StringArray::from(host_vals)),
+        ],
+    )
+    .unwrap();
+
+    writer.write(&batch).unwrap();
+    writer.close().unwrap();
+}
+
+struct BenchVariant {
+    name: &'static str,
+    preserve_file_partitions: usize,
+    prefer_existing_sort: bool,
+}
+
+const BENCH_VARIANTS: [BenchVariant; 3] = [
+    BenchVariant {
+        name: "with_optimization",
+        preserve_file_partitions: 1,
+        prefer_existing_sort: false,
+    },
+    BenchVariant {
+        name: "prefer_existing_sort",
+        preserve_file_partitions: 0,
+        prefer_existing_sort: true,
+    },
+    BenchVariant {
+        name: "without_optimization",
+        preserve_file_partitions: 0,
+        prefer_existing_sort: false,
+    },
+];
+
+async fn save_plans(
+    output_file: &Path,
+    fact_path: &str,
+    dim_path: Option<&str>,
+    target_partitions: usize,
+    query: &str,
+    file_sort_order: Option<Vec<Vec<SortExpr>>>,
+) {
+    let mut file = File::create(output_file).unwrap();
+    writeln!(file, "Query: {query}\n").unwrap();
+
+    for variant in &BENCH_VARIANTS {
+        let session_config = SessionConfig::new()
+            .with_target_partitions(target_partitions)
+            .set_usize(
+                "datafusion.optimizer.preserve_file_partitions",
+                variant.preserve_file_partitions,
+            )
+            .set_bool(
+                "datafusion.optimizer.prefer_existing_sort",
+                variant.prefer_existing_sort,
+            );
+        let ctx = SessionContext::new_with_config(session_config);
+
+        let mut fact_options = ParquetReadOptions {
+            table_partition_cols: vec![("f_dkey".to_string(), DataType::Utf8)],
+            ..Default::default()
+        };
+        if let Some(ref order) = file_sort_order {
+            fact_options.file_sort_order = order.clone();
+        }
+        ctx.register_parquet("fact", fact_path, fact_options)
+            .await
+            .unwrap();
+
+        if let Some(dim) = dim_path {
+            let dim_schema = Arc::new(Schema::new(vec![
+                Field::new("d_dkey", DataType::Utf8, false),
+                Field::new("env", DataType::Utf8, false),
+                Field::new("service", DataType::Utf8, false),
+                Field::new("host", DataType::Utf8, false),
+            ]));
+            let dim_options = ParquetReadOptions {
+                schema: Some(&dim_schema),
+                ..Default::default()
+            };
+            ctx.register_parquet("dimension", dim, dim_options)
+                .await
+                .unwrap();
+        }
+
+        let df = ctx.sql(query).await.unwrap();
+        let plan = df.explain(false, false).unwrap().collect().await.unwrap();
+        writeln!(file, "=== {} ===", variant.name).unwrap();
+        writeln!(file, "{}\n", pretty_format_batches(&plan).unwrap()).unwrap();
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
+fn run_benchmark(
+    c: &mut Criterion,
+    rt: &Runtime,
+    name: &str,
+    fact_path: &str,
+    dim_path: Option<&str>,
+    target_partitions: usize,
+    query: &str,
+    file_sort_order: &Option<Vec<Vec<SortExpr>>>,
+) {
+    if std::env::var("SAVE_PLANS").is_ok() {
+        let output_path = format!("{name}_plans.txt");
+        rt.block_on(save_plans(
+            Path::new(&output_path),
+            fact_path,
+            dim_path,
+            target_partitions,
+            query,
+            file_sort_order.clone(),
+        ));
+        println!("Plans saved to {output_path}");
+    }
+
+    let mut group = c.benchmark_group(name);
+
+    for variant in &BENCH_VARIANTS {
+        let fact_path_owned = fact_path.to_string();
+        let dim_path_owned = dim_path.map(|s| s.to_string());
+        let sort_order = file_sort_order.clone();
+        let query_owned = query.to_string();
+        let preserve_file_partitions = variant.preserve_file_partitions;
+        let prefer_existing_sort = variant.prefer_existing_sort;
+
+        group.bench_function(variant.name, |b| {
+            b.to_async(rt).iter(|| {
+                let fact_path = fact_path_owned.clone();
+                let dim_path = dim_path_owned.clone();
+                let sort_order = sort_order.clone();
+                let query = query_owned.clone();
+                async move {
+                    let session_config = SessionConfig::new()
+                        .with_target_partitions(target_partitions)
+                        .set_usize(
+                            "datafusion.optimizer.preserve_file_partitions",
+                            preserve_file_partitions,
+                        )
+                        .set_bool(
+                            "datafusion.optimizer.prefer_existing_sort",
+                            prefer_existing_sort,
+                        );
+                    let ctx = SessionContext::new_with_config(session_config);
+
+                    let mut fact_options = ParquetReadOptions {
+                        table_partition_cols: vec![(
+                            "f_dkey".to_string(),
+                            DataType::Utf8,
+                        )],
+                        ..Default::default()
+                    };
+                    if let Some(ref order) = sort_order {
+                        fact_options.file_sort_order = order.clone();
+                    }
+                    ctx.register_parquet("fact", &fact_path, fact_options)
+                        .await
+                        .unwrap();
+
+                    if let Some(ref dim) = dim_path {
+                        let dim_schema = Arc::new(Schema::new(vec![
+                            Field::new("d_dkey", DataType::Utf8, false),
+                            Field::new("env", DataType::Utf8, false),
+                            Field::new("service", DataType::Utf8, false),
+                            Field::new("host", DataType::Utf8, false),
+                        ]));
+                        let dim_options = ParquetReadOptions {
+                            schema: Some(&dim_schema),
+                            ..Default::default()
+                        };
+                        ctx.register_parquet("dimension", dim, dim_options)
+                            .await
+                            .unwrap();
+                    }
+
+                    let df = ctx.sql(&query).await.unwrap();
+                    df.collect().await.unwrap()
+                }
+            })
+        });
+    }
+
+    group.finish();
+}
+
+/// Aggregate on high-cardinality partitions which eliminates repartition and sort.
+///
+/// Query: SELECT f_dkey, COUNT(*), SUM(value) FROM fact GROUP BY f_dkey ORDER BY f_dkey
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                          with_optimization                                              │
+/// │                                   (preserve_file_partitions=enabled)                                    │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │ Sort Preserved                                                          │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     AggregateExec         │ No repartitioning needed                                                │
+/// │   │   (SinglePartitioned)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=Hash([f_dkey])                                             │
+/// │   │   file_groups={N groups}  │                                                                         │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                        prefer_existing_sort                                             │
+/// │                         (preserve_file_partitions=disabled, prefer_existing_sort=true)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │ Sort Preserved                                                          │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │    (FinalPartitioned)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │ Hash shuffle with order preservation                                    │
+/// │   │  Hash([f_dkey], N)        │ Uses k-way merge to maintain sort, has overhead                         │
+/// │   │  preserve_order=true      │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │        (Partial)          │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=UnknownPartitioning                                        │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                       without_optimization                                              │
+/// │                        (preserve_file_partitions=disabled, prefer_existing_sort=false)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │ FinalPartitioned                                                        │
+/// │   │    (FinalPartitioned)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │        SortExec           │ Must sort after shuffle                                                 │
+/// │   │    [f_dkey ASC]           │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │ Hash shuffle destroys ordering                                          │
+/// │   │     Hash([f_dkey], N)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │        (Partial)          │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=UnknownPartitioning                                                     │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+fn preserve_order_bench(
+    c: &mut Criterion,
+    rt: &Runtime,
+    hc_fact_path: &str,
+    target_partitions: usize,
+) {
+    let query = "SELECT f_dkey, COUNT(*) as cnt, SUM(value) as total \
+                 FROM fact \
+                 GROUP BY f_dkey \
+                 ORDER BY f_dkey";
+
+    let file_sort_order = vec![vec![col("f_dkey").sort(true, false)]];
+
+    run_benchmark(
+        c,
+        rt,
+        "preserve_order",
+        hc_fact_path,
+        None,
+        target_partitions,
+        query,
+        &Some(file_sort_order),
+    );
+}
+
+/// Join and aggregate on partition column which demonstrates propagation through join.
+///
+/// Query: SELECT f.f_dkey, MAX(d.env), ... FROM fact f JOIN dimension d ON f.f_dkey = d.d_dkey
+///        WHERE d.service = 'log' GROUP BY f.f_dkey ORDER BY f.f_dkey
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                          with_optimization                                              │
+/// │                                   (preserve_file_partitions=enabled)                                    │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │ Hash partitioning propagates through join                               │
+/// │   │    (SinglePartitioned)    │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      HashJoinExec         │ Hash partitioning preserved on probe side                               │
+/// │   │     (CollectLeft)         │                                                                         │
+/// │   └──────────┬────────────────┘                                                                         │
+/// │              │                                                                                          │
+/// │       ┌──────┴──────┐                                                                                   │
+/// │       │             │                                                                                   │
+/// │   ┌───▼───┐    ┌────▼────────────────┐                                                                  │
+/// │   │ Dim   │    │   DataSourceExec    │  partitioning=Hash([f_dkey]), output_ordering=[f_dkey]           │
+/// │   │ Table │    │  (fact, N groups)   │                                                                  │
+/// │   └───────┘    └─────────────────────┘                                                                  │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                        prefer_existing_sort                                             │
+/// │                         (preserve_file_partitions=disabled, prefer_existing_sort=true)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │    (FinalPartitioned)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │  Hash shuffle with order preservation                                   │
+/// │   │     preserve_order=true   │  Uses k-way merge to maintain sort, has overhead                        │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │        (Partial)          │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      HashJoinExec         │                                                                         │
+/// │   │     (CollectLeft)         │                                                                         │
+/// │   └──────────┬────────────────┘                                                                         │
+/// │              │                                                                                          │
+/// │       ┌──────┴──────┐                                                                                   │
+/// │       │             │                                                                                   │
+/// │   ┌───▼───┐    ┌────▼────────────────┐                                                                  │
+/// │   │ Dim   │    │   DataSourceExec    │ partitioning=UnknownPartitioning, output_ordering=[f_dkey]       │
+/// │   │ Table │    │      (fact)         │                                                                  │
+/// │   └───────┘    └─────────────────────┘                                                                  │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                       without_optimization                                              │
+/// │                        (preserve_file_partitions=disabled, prefer_existing_sort=false)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │  SortPreservingMergeExec  │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │    (FinalPartitioned)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │        SortExec           │ Must sort after shuffle                                                 │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │ Hash shuffle destroys ordering                                          │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      AggregateExec        │                                                                         │
+/// │   │        (Partial)          │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │      HashJoinExec         │                                                                         │
+/// │   │     (CollectLeft)         │                                                                         │
+/// │   └──────────┬────────────────┘                                                                         │
+/// │              │                                                                                          │
+/// │       ┌──────┴──────┐                                                                                   │
+/// │       │             │                                                                                   │
+/// │   ┌───▼───┐    ┌────▼────────────────┐                                                                  │
+/// │   │ Dim   │    │   DataSourceExec    │ partitioning=UnknownPartitioning, output_ordering=[f_dkey]       │
+/// │   │ Table │    │      (fact)         │                                                                  │
+/// │   └───────┘    └─────────────────────┘                                                                  │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+fn preserve_order_join_bench(
+    c: &mut Criterion,
+    rt: &Runtime,
+    hc_fact_path: &str,
+    dim_path: &str,
+    target_partitions: usize,
+) {
+    let query = "SELECT f.f_dkey, MAX(d.env), MAX(d.service), COUNT(*), SUM(f.value) \
+                 FROM fact f \
+                 INNER JOIN dimension d ON f.f_dkey = d.d_dkey \
+                 WHERE d.service = 'log' \
+                 GROUP BY f.f_dkey \
+                 ORDER BY f.f_dkey";
+
+    let file_sort_order = vec![vec![col("f_dkey").sort(true, false)]];
+
+    run_benchmark(
+        c,
+        rt,
+        "preserve_order_join",
+        hc_fact_path,
+        Some(dim_path),
+        target_partitions,
+        query,
+        &Some(file_sort_order),
+    );
+}
+
+/// Window function with LIMIT which demonstrates partition and sort elimination.
+///
+/// Query: SELECT f_dkey, timestamp, value,
+///               ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn
+///        FROM fact LIMIT 1000
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                          with_optimization                                              │
+/// │                                   (preserve_file_partitions=enabled)                                    │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │       GlobalLimitExec     │                                                                         │
+/// │   │        (LIMIT 1000)       │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │   BoundedWindowAggExec    │ No repaartition needed                                                  │
+/// │   │  PARTITION BY f_dkey      │                                                                         │
+/// │   │  ORDER BY timestamp       │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=Hash([f_dkey]), output_ordering=[f_dkey, timestamp]        │
+/// │   │   file_groups={N groups}  │                                                                         │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                        prefer_existing_sort                                             │
+/// │                         (preserve_file_partitions=disabled, prefer_existing_sort=true)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │       GlobalLimitExec     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │   BoundedWindowAggExec    │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │ Hash shuffle with order preservation                                    │
+/// │   │  Hash([f_dkey], N)        │ Uses k-way merge to maintain sort, has overhead                         │
+/// │   │  preserve_order=true      │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=UnknownPartitioning, output_ordering=[f_dkey, timestamp]   │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+///
+/// ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+/// │                                       without_optimization                                              │
+/// │                        (preserve_file_partitions=disabled, prefer_existing_sort=false)                  │
+/// │                                                                                                         │
+/// │   ┌───────────────────────────┐                                                                         │
+/// │   │       GlobalLimitExec     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │   BoundedWindowAggExec    │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │        SortExec           │ Must sort after shuffle                                                 │
+/// │   │  [f_dkey, timestamp]      │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     RepartitionExec       │ Hash shuffle destroys ordering                                          │
+/// │   │     Hash([f_dkey], N)     │                                                                         │
+/// │   └─────────────┬─────────────┘                                                                         │
+/// │                 │                                                                                       │
+/// │   ┌─────────────▼─────────────┐                                                                         │
+/// │   │     DataSourceExec        │ partitioning=UnknownPartitioning, output_ordering=[f_dkey, timestamp]   │
+/// │   └───────────────────────────┘                                                                         │
+/// └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+fn preserve_order_window_bench(
+    c: &mut Criterion,
+    rt: &Runtime,
+    fact_path: &str,
+    target_partitions: usize,
+) {
+    let query = "SELECT f_dkey, timestamp, value, \
+                        ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn \
+                 FROM fact \
+                 LIMIT 1000";
+
+    let file_sort_order = vec![vec![
+        col("f_dkey").sort(true, false),
+        col("timestamp").sort(true, false),
+    ]];
+
+    run_benchmark(
+        c,
+        rt,
+        "preserve_order_window",
+        fact_path,
+        None,
+        target_partitions,
+        query,
+        &Some(file_sort_order),
+    );
+}
+
+fn benchmark_main(c: &mut Criterion) {
+    let config = BenchConfig::from_env();
+    let hc_config = BenchConfig::high_cardinality(&config);
+
+    println!("\n=== Preserve File Partitioning Benchmark ===");
+    println!(
+        "Normal config: {} partitions × {} rows = {} total rows",
+        config.fact_partitions,
+        config.rows_per_partition,
+        config.total_rows()
+    );
+    println!(
+        "High-cardinality config: {} partitions × {} rows = {} total rows",
+        hc_config.fact_partitions,
+        hc_config.rows_per_partition,
+        hc_config.total_rows()
+    );
+    println!("Target partitions: {}\n", config.target_partitions);
+
+    let tmp_dir = TempDir::new().unwrap();
+    println!("Generating data...");
+
+    // High-cardinality fact table
+    generate_fact_table(
+        tmp_dir.path(),
+        hc_config.fact_partitions,
+        hc_config.rows_per_partition,
+    );
+    let hc_fact_dir = tmp_dir.path().join("fact_hc");
+    fs::rename(tmp_dir.path().join("fact"), &hc_fact_dir).unwrap();
+    let hc_fact_path = hc_fact_dir.to_str().unwrap().to_string();
+
+    // Normal fact table
+    generate_fact_table(
+        tmp_dir.path(),
+        config.fact_partitions,
+        config.rows_per_partition,
+    );
+    let fact_path = tmp_dir.path().join("fact").to_str().unwrap().to_string();
+
+    // Dimension table (for join)
+    generate_dimension_table(tmp_dir.path(), hc_config.fact_partitions);
+    let dim_path = tmp_dir
+        .path()
+        .join("dimension")
+        .to_str()
+        .unwrap()
+        .to_string();
+
+    println!("Done.\n");
+
+    let rt = Runtime::new().unwrap();
+
+    preserve_order_bench(c, &rt, &hc_fact_path, hc_config.target_partitions);
+    preserve_order_join_bench(
+        c,
+        &rt,
+        &hc_fact_path,
+        &dim_path,
+        hc_config.target_partitions,
+    );
+    preserve_order_window_bench(c, &rt, &fact_path, config.target_partitions);
+}
+
+criterion_group! {
+    name = benches;
+    config = {
+        let config = BenchConfig::from_env();
+        Criterion::default()
+            .measurement_time(std::time::Duration::from_secs(config.measurement_time_secs))
+            .sample_size(10)
+    };
+    targets = benchmark_main
+}
+criterion_main!(benches);
diff --git a/datafusion/core/benches/push_down_filter.rs b/datafusion/core/benches/push_down_filter.rs
index 139fb12c30947..3c2199c708de6 100644
--- a/datafusion/core/benches/push_down_filter.rs
+++ b/datafusion/core/benches/push_down_filter.rs
@@ -18,16 +18,16 @@
 use arrow::array::RecordBatch;
 use arrow::datatypes::{DataType, Field, Schema};
 use bytes::{BufMut, BytesMut};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::config::ConfigOptions;
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_physical_optimizer::filter_pushdown::FilterPushdown;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::filter_pushdown::FilterPushdown;
 use datafusion_physical_plan::ExecutionPlan;
+use object_store::ObjectStore;
 use object_store::memory::InMemory;
 use object_store::path::Path;
-use object_store::ObjectStore;
 use parquet::arrow::ArrowWriter;
 use std::sync::Arc;
 
diff --git a/datafusion/core/benches/range_and_generate_series.rs b/datafusion/core/benches/range_and_generate_series.rs
new file mode 100644
index 0000000000000..2b1463a21062a
--- /dev/null
+++ b/datafusion/core/benches/range_and_generate_series.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+extern crate datafusion;
+
+mod data_utils;
+
+use crate::criterion::Criterion;
+use datafusion::execution::context::SessionContext;
+use parking_lot::Mutex;
+use std::hint::black_box;
+use std::sync::Arc;
+use tokio::runtime::Runtime;
+
+#[expect(clippy::needless_pass_by_value)]
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
+    let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
+    black_box(rt.block_on(df.collect()).unwrap());
+}
+
+fn create_context() -> Arc<Mutex<SessionContext>> {
+    let ctx = SessionContext::new();
+    Arc::new(Mutex::new(ctx))
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let ctx = create_context();
+    let rt = Runtime::new().unwrap();
+
+    c.bench_function("range(1000000)", |b| {
+        b.iter(|| query(ctx.clone(), &rt, "SELECT value from range(1000000)"))
+    });
+
+    c.bench_function("generate_series(1000000)", |b| {
+        b.iter(|| {
+            query(
+                ctx.clone(),
+                &rt,
+                "SELECT value from generate_series(1000000)",
+            )
+        })
+    });
+
+    c.bench_function("range(0, 1000000, 5)", |b| {
+        b.iter(|| query(ctx.clone(), &rt, "SELECT value from range(0, 1000000, 5)"))
+    });
+
+    c.bench_function("generate_series(0, 1000000, 5)", |b| {
+        b.iter(|| {
+            query(
+                ctx.clone(),
+                &rt,
+                "SELECT value from generate_series(0, 1000000, 5)",
+            )
+        })
+    });
+
+    c.bench_function("range(1000000, 0, -5)", |b| {
+        b.iter(|| query(ctx.clone(), &rt, "SELECT value from range(1000000, 0, -5)"))
+    });
+
+    c.bench_function("generate_series(1000000, 0, -5)", |b| {
+        b.iter(|| {
+            query(
+                ctx.clone(),
+                &rt,
+                "SELECT value from generate_series(1000000, 0, -5)",
+            )
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/core/benches/scalar.rs b/datafusion/core/benches/scalar.rs
index 540f7212e96e9..d06ed3f28b743 100644
--- a/datafusion/core/benches/scalar.rs
+++ b/datafusion/core/benches/scalar.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::scalar::ScalarValue;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index 276151e253f7e..4ba57a1530e81 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -78,18 +78,18 @@ use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::{
     execution::context::TaskContext,
     physical_plan::{
+        ExecutionPlan, ExecutionPlanProperties,
         coalesce_partitions::CoalescePartitionsExec,
-        sorts::sort_preserving_merge::SortPreservingMergeExec, ExecutionPlan,
-        ExecutionPlanProperties,
+        sorts::sort_preserving_merge::SortPreservingMergeExec,
     },
     prelude::SessionContext,
 };
 use datafusion_datasource::memory::MemorySourceConfig;
-use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
+use datafusion_physical_expr::{PhysicalSortExpr, expressions::col};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 /// Benchmarks for SortPreservingMerge stream
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use futures::StreamExt;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
@@ -355,14 +355,14 @@ fn utf8_high_cardinality_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (utf8_low, utf8_low, utf8_high)
 fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
+    let mut data_gen = DataGenerator::new();
 
     // need to sort by the combined key, so combine them together
-    let mut tuples: Vec<_> = gen
+    let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_high_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_high_cardinality_values())
         .collect();
 
     if sorted {
@@ -388,14 +388,14 @@ fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (utf8_view_low, utf8_view_low, utf8_view_high)
 fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
+    let mut data_gen = DataGenerator::new();
 
     // need to sort by the combined key, so combine them together
-    let mut tuples: Vec<_> = gen
+    let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_high_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_high_cardinality_values())
         .collect();
 
     if sorted {
@@ -421,15 +421,15 @@ fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (f64, utf8_low, utf8_low, i64)
 fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
+    let mut data_gen = DataGenerator::new();
 
     // need to sort by the combined key, so combine them together
-    let mut tuples: Vec<_> = gen
+    let mut tuples: Vec<_> = data_gen
         .i64_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.i64_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.i64_values())
         .collect();
 
     if sorted {
@@ -459,15 +459,15 @@ fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (f64, utf8_view_low, utf8_view_low, i64)
 fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
+    let mut data_gen = DataGenerator::new();
 
     // need to sort by the combined key, so combine them together
-    let mut tuples: Vec<_> = gen
+    let mut tuples: Vec<_> = data_gen
         .i64_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.i64_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.i64_values())
         .collect();
 
     if sorted {
@@ -497,8 +497,8 @@ fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (utf8_dict)
 fn dictionary_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
-    let mut values = gen.utf8_low_cardinality_values();
+    let mut data_gen = DataGenerator::new();
+    let mut values = data_gen.utf8_low_cardinality_values();
     if sorted {
         values.sort_unstable();
     }
@@ -512,12 +512,12 @@ fn dictionary_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (utf8_dict, utf8_dict, utf8_dict)
 fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
-    let mut tuples: Vec<_> = gen
+    let mut data_gen = DataGenerator::new();
+    let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
         .collect();
 
     if sorted {
@@ -543,13 +543,13 @@ fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
 
 /// Create a batch of (utf8_dict, utf8_dict, utf8_dict, i64)
 fn mixed_dictionary_tuple_streams(sorted: bool) -> PartitionedBatches {
-    let mut gen = DataGenerator::new();
-    let mut tuples: Vec<_> = gen
+    let mut data_gen = DataGenerator::new();
+    let mut tuples: Vec<_> = data_gen
         .utf8_low_cardinality_values()
         .into_iter()
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.utf8_low_cardinality_values())
-        .zip(gen.i64_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.utf8_low_cardinality_values())
+        .zip(data_gen.i64_values())
         .collect();
 
     if sorted {
diff --git a/datafusion/core/benches/sort_limit_query_sql.rs b/datafusion/core/benches/sort_limit_query_sql.rs
index e535a018161f1..c18070fb7725e 100644
--- a/datafusion/core/benches/sort_limit_query_sql.rs
+++ b/datafusion/core/benches/sort_limit_query_sql.rs
@@ -37,6 +37,7 @@ use datafusion::execution::context::SessionContext;
 
 use tokio::runtime::Runtime;
 
+#[expect(clippy::needless_pass_by_value)]
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
@@ -97,8 +98,7 @@ fn create_context() -> Arc<Mutex<SessionContext>> {
         ctx_holder.lock().push(Arc::new(Mutex::new(ctx)))
     });
 
-    let ctx = ctx_holder.lock().first().unwrap().clone();
-    ctx
+    ctx_holder.lock().first().unwrap().clone()
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs
index ecc3f908d4b15..9db1306d2bd19 100644
--- a/datafusion/core/benches/spm.rs
+++ b/datafusion/core/benches/spm.rs
@@ -20,13 +20,13 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int32Array, Int64Array, RecordBatch, StringArray};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr::expressions::col;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion_physical_plan::{collect, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, collect};
 
 use criterion::async_executor::FuturesExecutor;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_datasource::memory::MemorySourceConfig;
 
 fn generate_spm_for_round_robin_tie_breaker(
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 6266a7184cf51..7cce7e0bd7db7 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -23,19 +23,23 @@ extern crate datafusion;
 mod data_utils;
 
 use crate::criterion::Criterion;
+use arrow::array::PrimitiveArray;
 use arrow::array::{ArrayRef, RecordBatch};
+use arrow::datatypes::ArrowNativeTypeOp;
+use arrow::datatypes::ArrowPrimitiveType;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
 use criterion::Bencher;
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::SessionContext;
-use datafusion_common::{config::Dialect, ScalarValue};
+use datafusion_common::{ScalarValue, config::Dialect};
 use datafusion_expr::col;
+use rand_distr::num_traits::NumCast;
 use std::hint::black_box;
 use std::path::PathBuf;
 use std::sync::Arc;
+use test_utils::TableDef;
 use test_utils::tpcds::tpcds_schemas;
 use test_utils::tpch::tpch_schemas;
-use test_utils::TableDef;
 use tokio::runtime::Runtime;
 
 const BENCHMARKS_PATH_1: &str = "../../benchmarks/";
@@ -89,6 +93,7 @@ fn create_context() -> SessionContext {
 
 /// Register the table definitions as a MemTable with the context and return the
 /// context
+#[expect(clippy::needless_pass_by_value)]
 fn register_defs(ctx: SessionContext, defs: Vec<TableDef>) -> SessionContext {
     defs.iter().for_each(|TableDef { name, schema }| {
         ctx.register_table(
@@ -155,18 +160,30 @@ fn benchmark_with_param_values_many_columns(
 /// 0,100...9900
 /// 0,200...19800
 /// 0,300...29700
-fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows: usize) {
-    // ("c0", [0, 0, ...])
-    // ("c1": [100, 200, ...])
-    // etc
-    let iter = (0..num_columns).map(|i| i as u64).map(|i| {
-        let array: ArrayRef = Arc::new(arrow::array::UInt64Array::from_iter_values(
-            (0..num_rows)
-                .map(|j| j as u64 * 100 + i)
-                .collect::<Vec<_>>(),
-        ));
+fn register_union_order_table_generic<T>(
+    ctx: &SessionContext,
+    num_columns: usize,
+    num_rows: usize,
+) where
+    T: ArrowPrimitiveType,
+    T::Native: ArrowNativeTypeOp + NumCast,
+{
+    let iter = (0..num_columns).map(|i| {
+        let array_data: Vec<T::Native> = (0..num_rows)
+            .map(|j| {
+                let value = (j as u64) * 100 + (i as u64);
+                <T::Native as NumCast>::from(value).unwrap_or_else(|| {
+                    panic!("Failed to cast numeric value to Native type")
+                })
+            })
+            .collect();
+
+        // Use PrimitiveArray which is generic over the ArrowPrimitiveType T
+        let array: ArrayRef = Arc::new(PrimitiveArray::<T>::from_iter_values(array_data));
+
         (format!("c{i}"), array)
     });
+
     let batch = RecordBatch::try_from_iter(iter).unwrap();
     let schema = batch.schema();
     let partitions = vec![vec![batch]];
@@ -183,7 +200,6 @@ fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows
 
     ctx.register_table("t", Arc::new(table)).unwrap();
 }
-
 /// return a query like
 /// ```sql
 /// select c1, 2 as c2, ... n as cn from t ORDER BY c1
@@ -226,8 +242,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     if !PathBuf::from(format!("{BENCHMARKS_PATH_1}{CLICKBENCH_DATA_PATH}")).exists()
         && !PathBuf::from(format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}")).exists()
     {
-        panic!("benchmarks/data/hits_partitioned/ could not be loaded. Please run \
-         'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark")
+        panic!(
+            "benchmarks/data/hits_partitioned/ could not be loaded. Please run \
+         'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark"
+        )
     }
 
     let ctx = create_context();
@@ -403,13 +421,40 @@ fn criterion_benchmark(c: &mut Criterion) {
 
     // -- Sorted Queries --
     // 100, 200 && 300 is taking too long - https://github.com/apache/datafusion/issues/18366
+    // Logical Plan for datatype Int64 and UInt64 differs, UInt64 Logical Plan's Union are wrapped
+    // up in Projection, and EliminateNestedUnion OptimezerRule is not applied leading to significantly
+    // longer execution time.
+    // https://github.com/apache/datafusion/issues/17261
+
     for column_count in [10, 50 /* 100, 200, 300 */] {
-        register_union_order_table(&ctx, column_count, 1000);
+        register_union_order_table_generic::<arrow::datatypes::Int64Type>(
+            &ctx,
+            column_count,
+            1000,
+        );
 
         // this query has many expressions in its sort order so stresses
         // order equivalence validation
         c.bench_function(
-            &format!("physical_sorted_union_order_by_{column_count}"),
+            &format!("physical_sorted_union_order_by_{column_count}_int64"),
+            |b| {
+                // SELECT ... UNION ALL ...
+                let query = union_orderby_query(column_count);
+                b.iter(|| physical_plan(&ctx, &rt, &query))
+            },
+        );
+
+        let _ = ctx.deregister_table("t");
+    }
+
+    for column_count in [10, 50 /* 100, 200, 300 */] {
+        register_union_order_table_generic::<arrow::datatypes::UInt64Type>(
+            &ctx,
+            column_count,
+            1000,
+        );
+        c.bench_function(
+            &format!("physical_sorted_union_order_by_{column_count}_uint64"),
             |b| {
                 // SELECT ... UNION ALL ...
                 let query = union_orderby_query(column_count);
@@ -477,9 +522,6 @@ fn criterion_benchmark(c: &mut Criterion) {
     };
 
     let raw_tpcds_sql_queries = (1..100)
-        // skip query 75 until it is fixed
-        // https://github.com/apache/datafusion/issues/17801
-        .filter(|q| *q != 75)
         .map(|q| std::fs::read_to_string(format!("{tests_path}tpc-ds/{q}.sql")).unwrap())
         .collect::<Vec<_>>();
 
diff --git a/datafusion/core/benches/sql_planner_extended.rs b/datafusion/core/benches/sql_planner_extended.rs
index aff7cb4d101d5..adaf3e5911e9b 100644
--- a/datafusion/core/benches/sql_planner_extended.rs
+++ b/datafusion/core/benches/sql_planner_extended.rs
@@ -18,7 +18,7 @@
 use arrow::array::{ArrayRef, RecordBatch};
 use arrow_schema::DataType;
 use arrow_schema::TimeUnit::Nanosecond;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::prelude::{DataFrame, SessionContext};
 use datafusion_catalog::MemTable;
 use datafusion_common::ScalarValue;
diff --git a/datafusion/core/benches/sql_query_with_io.rs b/datafusion/core/benches/sql_query_with_io.rs
index 58797dfed6b67..0c188f7ba1047 100644
--- a/datafusion/core/benches/sql_query_with_io.rs
+++ b/datafusion/core/benches/sql_query_with_io.rs
@@ -20,7 +20,7 @@ use std::{fmt::Write, sync::Arc, time::Duration};
 use arrow::array::{Int64Builder, RecordBatch, UInt64Builder};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use bytes::Bytes;
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
 use datafusion::{
     datasource::{
         file_format::parquet::ParquetFormat,
@@ -31,13 +31,13 @@ use datafusion::{
 use datafusion_execution::runtime_env::RuntimeEnv;
 use itertools::Itertools;
 use object_store::{
+    ObjectStore,
     memory::InMemory,
     path::Path,
     throttle::{ThrottleConfig, ThrottledStore},
-    ObjectStore,
 };
 use parquet::arrow::ArrowWriter;
-use rand::{rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rngs::StdRng};
 use tokio::runtime::Runtime;
 use url::Url;
 
diff --git a/datafusion/core/benches/struct_query_sql.rs b/datafusion/core/benches/struct_query_sql.rs
index 5c7b427310827..96434fc379ea6 100644
--- a/datafusion/core/benches/struct_query_sql.rs
+++ b/datafusion/core/benches/struct_query_sql.rs
@@ -20,7 +20,7 @@ use arrow::{
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
 use futures::executor::block_on;
diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs
index 9a5fb7163be5c..a4ae479de4d27 100644
--- a/datafusion/core/benches/topk_aggregate.rs
+++ b/datafusion/core/benches/topk_aggregate.rs
@@ -18,13 +18,13 @@
 mod data_utils;
 
 use arrow::util::pretty::pretty_format_batches;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use data_utils::make_data;
-use datafusion::physical_plan::{collect, displayable, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, collect, displayable};
 use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
-use datafusion_execution::config::SessionConfig;
 use datafusion_execution::TaskContext;
+use datafusion_execution::config::SessionConfig;
 use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
@@ -46,7 +46,9 @@ async fn create_context(
     opts.optimizer.enable_topk_aggregation = use_topk;
     let ctx = SessionContext::new_with_config(cfg);
     let _ = ctx.register_table("traces", mem_table)?;
-    let sql = format!("select trace_id, max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};");
+    let sql = format!(
+        "select max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};"
+    );
     let df = ctx.sql(sql.as_str()).await?;
     let physical_plan = df.create_physical_plan().await?;
     let actual_phys_plan = displayable(physical_plan.as_ref()).indent(true).to_string();
@@ -58,6 +60,7 @@ async fn create_context(
     Ok((physical_plan, ctx.task_ctx()))
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>, asc: bool) {
     black_box(rt.block_on(async { aggregate(plan.clone(), ctx.clone(), asc).await }))
         .unwrap();
@@ -75,20 +78,20 @@ async fn aggregate(
 
     let actual = format!("{}", pretty_format_batches(&batches)?).to_lowercase();
     let expected_asc = r#"
-+----------------------------------+--------------------------+
-| trace_id                         | max(traces.timestamp_ms) |
-+----------------------------------+--------------------------+
-| 5868861a23ed31355efc5200eb80fe74 | 16909009999999           |
-| 4040e64656804c3d77320d7a0e7eb1f0 | 16909009999998           |
-| 02801bbe533190a9f8713d75222f445d | 16909009999997           |
-| 9e31b3b5a620de32b68fefa5aeea57f1 | 16909009999996           |
-| 2d88a860e9bd1cfaa632d8e7caeaa934 | 16909009999995           |
-| a47edcef8364ab6f191dd9103e51c171 | 16909009999994           |
-| 36a3fa2ccfbf8e00337f0b1254384db6 | 16909009999993           |
-| 0756be84f57369012e10de18b57d8a2f | 16909009999992           |
-| d4d6bf9845fa5897710e3a8db81d5907 | 16909009999991           |
-| 3c2cc1abe728a66b61e14880b53482a0 | 16909009999990           |
-+----------------------------------+--------------------------+
++--------------------------+
+| max(traces.timestamp_ms) |
++--------------------------+
+| 16909009999999           |
+| 16909009999998           |
+| 16909009999997           |
+| 16909009999996           |
+| 16909009999995           |
+| 16909009999994           |
+| 16909009999993           |
+| 16909009999992           |
+| 16909009999991           |
+| 16909009999990           |
++--------------------------+
         "#
     .trim();
     if asc {
diff --git a/datafusion/core/benches/window_query_sql.rs b/datafusion/core/benches/window_query_sql.rs
index 6d83959f7eb3c..e4643567a0f0c 100644
--- a/datafusion/core/benches/window_query_sql.rs
+++ b/datafusion/core/benches/window_query_sql.rs
@@ -31,6 +31,7 @@ use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
+#[expect(clippy::needless_pass_by_value)]
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     black_box(rt.block_on(df.collect()).unwrap());
diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs
index 63387c023b11a..74a10bf079e61 100644
--- a/datafusion/core/src/bin/print_functions_docs.rs
+++ b/datafusion/core/src/bin/print_functions_docs.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use datafusion::execution::SessionStateDefaults;
-use datafusion_common::{not_impl_err, HashSet, Result};
+use datafusion_common::{HashSet, Result, not_impl_err};
 use datafusion_expr::{
-    aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
-    DocSection, Documentation, ScalarUDF, WindowUDF,
+    AggregateUDF, DocSection, Documentation, ScalarUDF, WindowUDF,
+    aggregate_doc_sections, scalar_doc_sections, window_doc_sections,
 };
 use itertools::Itertools;
 use std::env::args;
@@ -108,6 +108,7 @@ fn save_doc_code_text(documentation: &Documentation, name: &str) {
     file.write_all(attr_text.as_bytes()).unwrap();
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn print_docs(
     providers: Vec<Box<dyn DocProvider>>,
     doc_sections: Vec<DocSection>,
@@ -254,7 +255,9 @@ fn print_docs(
         for f in &providers_with_no_docs {
             eprintln!("  - {f}");
         }
-        not_impl_err!("Some functions do not have documentation. Please implement `documentation` for: {providers_with_no_docs:?}")
+        not_impl_err!(
+            "Some functions do not have documentation. Please implement `documentation` for: {providers_with_no_docs:?}"
+        )
     } else {
         Ok(docs)
     }
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 98804e424b407..0d060db3bf147 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -26,19 +26,19 @@ use crate::datasource::file_format::csv::CsvFormatFactory;
 use crate::datasource::file_format::format_as_file_type;
 use crate::datasource::file_format::json::JsonFormatFactory;
 use crate::datasource::{
-    provider_as_source, DefaultTableSource, MemTable, TableProvider,
+    DefaultTableSource, MemTable, TableProvider, provider_as_source,
 };
 use crate::error::Result;
-use crate::execution::context::{SessionState, TaskContext};
 use crate::execution::FunctionRegistry;
+use crate::execution::context::{SessionState, TaskContext};
 use crate::logical_expr::utils::find_window_exprs;
 use crate::logical_expr::{
-    col, ident, Expr, JoinType, LogicalPlan, LogicalPlanBuilder,
-    LogicalPlanBuilderOptions, Partitioning, TableType,
+    Expr, JoinType, LogicalPlan, LogicalPlanBuilder, LogicalPlanBuilderOptions,
+    Partitioning, TableType, col, ident,
 };
 use crate::physical_plan::{
-    collect, collect_partitioned, execute_stream, execute_stream_partitioned,
-    ExecutionPlan, SendableRecordBatchStream,
+    ExecutionPlan, SendableRecordBatchStream, collect, collect_partitioned,
+    execute_stream, execute_stream_partitioned,
 };
 use crate::prelude::SessionContext;
 use std::any::Any;
@@ -49,20 +49,20 @@ use std::sync::Arc;
 use arrow::array::{Array, ArrayRef, Int64Array, StringArray};
 use arrow::compute::{cast, concat};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow_schema::FieldRef;
 use datafusion_common::config::{CsvOptions, JsonOptions};
 use datafusion_common::{
-    exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err,
     Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError,
-    TableReference, UnnestOptions,
+    TableReference, UnnestOptions, exec_err, internal_datafusion_err, not_impl_err,
+    plan_datafusion_err, plan_err, unqualified_field_not_found,
 };
 use datafusion_expr::select_expr::SelectExpr;
 use datafusion_expr::{
-    case,
+    ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE, case,
     dml::InsertOp,
     expr::{Alias, ScalarFunction},
     is_null, lit,
     utils::COUNT_STAR_EXPANSION,
-    ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE,
 };
 use datafusion_functions::core::coalesce;
 use datafusion_functions_aggregate::expr_fn::{
@@ -310,11 +310,20 @@ impl DataFrame {
     pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
         let fields = columns
             .iter()
-            .flat_map(|name| {
-                self.plan
+            .map(|name| {
+                let fields = self
+                    .plan
                     .schema()
-                    .qualified_fields_with_unqualified_name(name)
+                    .qualified_fields_with_unqualified_name(name);
+                if fields.is_empty() {
+                    Err(unqualified_field_not_found(name, self.plan.schema()))
+                } else {
+                    Ok(fields)
+                }
             })
+            .collect::<Result<Vec<_>, _>>()?
+            .into_iter()
+            .flatten()
             .collect::<Vec<_>>();
         let expr: Vec<Expr> = fields
             .into_iter()
@@ -1655,7 +1664,7 @@ impl DataFrame {
     pub fn into_view(self) -> Arc<dyn TableProvider> {
         Arc::new(DataFrameTableProvider {
             plan: self.plan,
-            table_type: TableType::Temporary,
+            table_type: TableType::View,
         })
     }
 
@@ -2232,7 +2241,7 @@ impl DataFrame {
             .schema()
             .iter()
             .map(|(qualifier, field)| {
-                if qualifier.eq(&qualifier_rename) && field.as_ref() == field_rename {
+                if qualifier.eq(&qualifier_rename) && field == field_rename {
                     (
                         col(Column::from((qualifier, field)))
                             .alias_qualified(qualifier.cloned(), new_name),
@@ -2321,6 +2330,10 @@ impl DataFrame {
 
     /// Cache DataFrame as a memory table.
     ///
+    /// Default behavior could be changed using
+    /// a [`crate::execution::session_state::CacheFactory`]
+    /// configured via [`SessionState`].
+    ///
     /// ```
     /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
@@ -2335,14 +2348,20 @@ impl DataFrame {
     /// # }
     /// ```
     pub async fn cache(self) -> Result<DataFrame> {
-        let context = SessionContext::new_with_state((*self.session_state).clone());
-        // The schema is consistent with the output
-        let plan = self.clone().create_physical_plan().await?;
-        let schema = plan.schema();
-        let task_ctx = Arc::new(self.task_ctx());
-        let partitions = collect_partitioned(plan, task_ctx).await?;
-        let mem_table = MemTable::try_new(schema, partitions)?;
-        context.read_table(Arc::new(mem_table))
+        if let Some(cache_factory) = self.session_state.cache_factory() {
+            let new_plan =
+                cache_factory.create(self.plan, self.session_state.as_ref())?;
+            Ok(Self::new(*self.session_state, new_plan))
+        } else {
+            let context = SessionContext::new_with_state((*self.session_state).clone());
+            // The schema is consistent with the output
+            let plan = self.clone().create_physical_plan().await?;
+            let schema = plan.schema();
+            let task_ctx = Arc::new(self.task_ctx());
+            let partitions = collect_partitioned(plan, task_ctx).await?;
+            let mem_table = MemTable::try_new(schema, partitions)?;
+            context.read_table(Arc::new(mem_table))
+        }
     }
 
     /// Apply an alias to the DataFrame.
@@ -2383,6 +2402,7 @@ impl DataFrame {
     /// # Ok(())
     /// # }
     /// ```
+    #[expect(clippy::needless_pass_by_value)]
     pub fn fill_null(
         &self,
         value: ScalarValue,
@@ -2393,7 +2413,7 @@ impl DataFrame {
                 .schema()
                 .fields()
                 .iter()
-                .map(|f| f.as_ref().clone())
+                .map(Arc::clone)
                 .collect()
         } else {
             self.find_columns(&columns)?
@@ -2430,7 +2450,7 @@ impl DataFrame {
     }
 
     // Helper to find columns from names
-    fn find_columns(&self, names: &[String]) -> Result<Vec<Field>> {
+    fn find_columns(&self, names: &[String]) -> Result<Vec<FieldRef>> {
         let schema = self.logical_plan().schema();
         names
             .iter()
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index cb8a6cf29541b..6edf628e2d6d6 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -150,7 +150,7 @@ mod tests {
         let plan = df.explain(false, false)?.collect().await?;
         // Filters all the way to Parquet
         let formatted = pretty::pretty_format_batches(&plan)?.to_string();
-        assert!(formatted.contains("FilterExec: id@0 = 1"));
+        assert!(formatted.contains("FilterExec: id@0 = 1"), "{formatted}");
 
         Ok(())
     }
diff --git a/datafusion/core/src/datasource/dynamic_file.rs b/datafusion/core/src/datasource/dynamic_file.rs
index 256a11ba693b5..50ee96da3dff0 100644
--- a/datafusion/core/src/datasource/dynamic_file.rs
+++ b/datafusion/core/src/datasource/dynamic_file.rs
@@ -20,9 +20,9 @@
 
 use std::sync::Arc;
 
+use crate::datasource::TableProvider;
 use crate::datasource::listing::ListingTableConfigExt;
 use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
-use crate::datasource::TableProvider;
 use crate::error::Result;
 use crate::execution::context::SessionState;
 
diff --git a/datafusion/core/src/datasource/empty.rs b/datafusion/core/src/datasource/empty.rs
index 77686c5eb7c27..5aeca92b1626d 100644
--- a/datafusion/core/src/datasource/empty.rs
+++ b/datafusion/core/src/datasource/empty.rs
@@ -28,8 +28,8 @@ use datafusion_common::project_schema;
 use crate::datasource::{TableProvider, TableType};
 use crate::error::Result;
 use crate::logical_expr::Expr;
-use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::empty::EmptyExec;
 
 /// An empty plan that is useful for testing and generating plans
 /// without mapping them to actual data.
diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs
index 3428d08a6ae52..cad35d43db486 100644
--- a/datafusion/core/src/datasource/file_format/avro.rs
+++ b/datafusion/core/src/datasource/file_format/avro.rs
@@ -26,20 +26,21 @@ mod tests {
     use crate::{
         datasource::file_format::test_util::scan_format, prelude::SessionContext,
     };
-    use arrow::array::{as_string_array, Array};
+    use arrow::array::{Array, as_string_array};
     use datafusion_catalog::Session;
     use datafusion_common::test_util::batches_to_string;
     use datafusion_common::{
+        Result,
         cast::{
             as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
             as_int32_array, as_timestamp_microsecond_array,
         },
-        test_util, Result,
+        test_util,
     };
 
     use datafusion_datasource_avro::AvroFormat;
     use datafusion_execution::config::SessionConfig;
-    use datafusion_physical_plan::{collect, ExecutionPlan};
+    use datafusion_physical_plan::{ExecutionPlan, collect};
     use futures::StreamExt;
     use insta::assert_snapshot;
 
@@ -116,20 +117,20 @@ mod tests {
         let batches = collect(exec, task_ctx).await?;
         assert_eq!(batches.len(), 1);
 
-        assert_snapshot!(batches_to_string(&batches),@r###"
-            +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
-            | id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col  | string_col | timestamp_col       |
-            +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
-            | 4  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30332f30312f3039 | 30         | 2009-03-01T00:00:00 |
-            | 5  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30332f30312f3039 | 31         | 2009-03-01T00:01:00 |
-            | 6  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30342f30312f3039 | 30         | 2009-04-01T00:00:00 |
-            | 7  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30342f30312f3039 | 31         | 2009-04-01T00:01:00 |
-            | 2  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30322f30312f3039 | 30         | 2009-02-01T00:00:00 |
-            | 3  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30322f30312f3039 | 31         | 2009-02-01T00:01:00 |
-            | 0  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30312f30312f3039 | 30         | 2009-01-01T00:00:00 |
-            | 1  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30312f30312f3039 | 31         | 2009-01-01T00:01:00 |
-            +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
-        "###);
+        assert_snapshot!(batches_to_string(&batches),@r"
+        +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
+        | id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col  | string_col | timestamp_col       |
+        +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
+        | 4  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30332f30312f3039 | 30         | 2009-03-01T00:00:00 |
+        | 5  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30332f30312f3039 | 31         | 2009-03-01T00:01:00 |
+        | 6  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30342f30312f3039 | 30         | 2009-04-01T00:00:00 |
+        | 7  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30342f30312f3039 | 31         | 2009-04-01T00:01:00 |
+        | 2  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30322f30312f3039 | 30         | 2009-02-01T00:00:00 |
+        | 3  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30322f30312f3039 | 31         | 2009-02-01T00:01:00 |
+        | 0  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30312f30312f3039 | 30         | 2009-01-01T00:00:00 |
+        | 1  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30312f30312f3039 | 31         | 2009-01-01T00:01:00 |
+        +----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
+        ");
         Ok(())
     }
 
@@ -245,7 +246,10 @@ mod tests {
             values.push(array.value(i));
         }
 
-        assert_eq!("[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]", format!("{values:?}"));
+        assert_eq!(
+            "[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]",
+            format!("{values:?}")
+        );
 
         Ok(())
     }
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 52fb8ae904ebf..719bc4361ac91 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -32,12 +32,12 @@ mod tests {
     use crate::prelude::{CsvReadOptions, SessionConfig, SessionContext};
     use arrow_schema::{DataType, Field, Schema, SchemaRef};
     use datafusion_catalog::Session;
+    use datafusion_common::Result;
     use datafusion_common::cast::as_string_array;
     use datafusion_common::config::CsvOptions;
     use datafusion_common::internal_err;
     use datafusion_common::stats::Precision;
     use datafusion_common::test_util::{arrow_test_data, batches_to_string};
-    use datafusion_common::Result;
     use datafusion_datasource::decoder::{
         BatchDeserializer, DecoderDeserializer, DeserializerOutput,
     };
@@ -45,7 +45,7 @@ mod tests {
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::write::BatchSerializer;
     use datafusion_expr::{col, lit};
-    use datafusion_physical_plan::{collect, ExecutionPlan};
+    use datafusion_physical_plan::{ExecutionPlan, collect};
 
     use arrow::array::{
         Array, BooleanArray, Float64Array, Int32Array, RecordBatch, StringArray,
@@ -57,8 +57,8 @@ mod tests {
     use bytes::Bytes;
     use chrono::DateTime;
     use datafusion_common::parsers::CompressionTypeVariant;
-    use futures::stream::BoxStream;
     use futures::StreamExt;
+    use futures::stream::BoxStream;
     use insta::assert_snapshot;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
@@ -621,15 +621,15 @@ mod tests {
             .collect()
             .await?;
 
-        assert_snapshot!(batches_to_string(&record_batch), @r###"
-            +----+------+
-            | c2 | c3   |
-            +----+------+
-            | 5  | 36   |
-            | 5  | -31  |
-            | 5  | -101 |
-            +----+------+
-        "###);
+        assert_snapshot!(batches_to_string(&record_batch), @r"
+        +----+------+
+        | c2 | c3   |
+        +----+------+
+        | 5  | 36   |
+        | 5  | -31  |
+        | 5  | -101 |
+        +----+------+
+        ");
 
         Ok(())
     }
@@ -706,11 +706,11 @@ mod tests {
 
         let re = Regex::new(r"DataSourceExec: file_groups=\{(\d+) group").unwrap();
 
-        if let Some(captures) = re.captures(&plan) {
-            if let Some(match_) = captures.get(1) {
-                let n_partitions = match_.as_str().parse::<usize>().unwrap();
-                return Ok(n_partitions);
-            }
+        if let Some(captures) = re.captures(&plan)
+            && let Some(match_) = captures.get(1)
+        {
+            let n_partitions = match_.as_str().parse::<usize>().unwrap();
+            return Ok(n_partitions);
         }
 
         internal_err!("query contains no DataSourceExec")
@@ -736,13 +736,13 @@ mod tests {
         let query_result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
         +--------------+
         | sum(aggr.c2) |
         +--------------+
         | 285          |
         +--------------+
-        "###);
+        ");
         }
 
         assert_eq!(n_partitions, actual_partitions);
@@ -775,13 +775,13 @@ mod tests {
         let query_result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
         +--------------+
         | sum(aggr.c3) |
         +--------------+
         | 781          |
         +--------------+
-        "###);
+        ");
         }
 
         assert_eq!(1, actual_partitions); // Compressed csv won't be scanned in parallel
@@ -812,13 +812,13 @@ mod tests {
         let query_result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
         +--------------+
         | sum(aggr.c3) |
         +--------------+
         | 781          |
         +--------------+
-        "###);
+        ");
         }
 
         assert_eq!(1, actual_partitions); // csv won't be scanned in parallel when newlines_in_values is set
@@ -843,10 +843,10 @@ mod tests {
         let query = "select * from empty where random() > 0.5;";
         let query_result = ctx.sql(query).await?.collect().await?;
 
-        assert_snapshot!(batches_to_string(&query_result),@r###"
-            ++
-            ++
-        "###);
+        assert_snapshot!(batches_to_string(&query_result),@r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
@@ -868,10 +868,10 @@ mod tests {
         let query = "select * from empty where random() > 0.5;";
         let query_result = ctx.sql(query).await?.collect().await?;
 
-        assert_snapshot!(batches_to_string(&query_result),@r###"
-            ++
-            ++
-        "###);
+        assert_snapshot!(batches_to_string(&query_result),@r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
@@ -944,17 +944,19 @@ mod tests {
 
         let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect();
         assert_eq!(files.len(), 1);
-        assert!(files
-            .last()
-            .unwrap()
-            .as_ref()
-            .unwrap()
-            .path()
-            .file_name()
-            .unwrap()
-            .to_str()
-            .unwrap()
-            .ends_with(".csv.gz"));
+        assert!(
+            files
+                .last()
+                .unwrap()
+                .as_ref()
+                .unwrap()
+                .path()
+                .file_name()
+                .unwrap()
+                .to_str()
+                .unwrap()
+                .ends_with(".csv.gz")
+        );
 
         Ok(())
     }
@@ -983,17 +985,19 @@ mod tests {
 
         let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect();
         assert_eq!(files.len(), 1);
-        assert!(files
-            .last()
-            .unwrap()
-            .as_ref()
-            .unwrap()
-            .path()
-            .file_name()
-            .unwrap()
-            .to_str()
-            .unwrap()
-            .ends_with(".csv"));
+        assert!(
+            files
+                .last()
+                .unwrap()
+                .as_ref()
+                .unwrap()
+                .path()
+                .file_name()
+                .unwrap()
+                .to_str()
+                .unwrap()
+                .ends_with(".csv")
+        );
 
         Ok(())
     }
@@ -1032,10 +1036,10 @@ mod tests {
         let query = "select * from empty where random() > 0.5;";
         let query_result = ctx.sql(query).await?.collect().await?;
 
-        assert_snapshot!(batches_to_string(&query_result),@r###"
-            ++
-            ++
-        "###);
+        assert_snapshot!(batches_to_string(&query_result),@r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
@@ -1084,13 +1088,13 @@ mod tests {
         let query_result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
-            +---------------------+
-            | sum(empty.column_1) |
-            +---------------------+
-            | 10                  |
-            +---------------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
+        +---------------------+
+        | sum(empty.column_1) |
+        +---------------------+
+        | 10                  |
+        +---------------------+
+        ");}
 
         assert_eq!(n_partitions, actual_partitions); // Won't get partitioned if all files are empty
 
@@ -1132,13 +1136,13 @@ mod tests {
             file_size
         };
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
         +-----------------------+
         | sum(one_col.column_1) |
         +-----------------------+
         | 50                    |
         +-----------------------+
-        "###);
+        ");
         }
 
         assert_eq!(expected_partitions, actual_partitions);
@@ -1171,13 +1175,13 @@ mod tests {
         let query_result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
-            +---------------+
-            | sum_of_5_cols |
-            +---------------+
-            | 15            |
-            +---------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r"
+        +---------------+
+        | sum_of_5_cols |
+        +---------------+
+        | 15            |
+        +---------------+
+        ");}
 
         assert_eq!(n_partitions, actual_partitions);
 
@@ -1191,7 +1195,9 @@ mod tests {
     ) -> Result<()> {
         let schema = csv_schema();
         let generator = CsvBatchGenerator::new(batch_size, line_count);
-        let mut deserializer = csv_deserializer(batch_size, &schema);
+
+        let schema_clone = Arc::clone(&schema);
+        let mut deserializer = csv_deserializer(batch_size, &schema_clone);
 
         for data in generator {
             deserializer.digest(data);
@@ -1230,7 +1236,8 @@ mod tests {
     ) -> Result<()> {
         let schema = csv_schema();
         let generator = CsvBatchGenerator::new(batch_size, line_count);
-        let mut deserializer = csv_deserializer(batch_size, &schema);
+        let schema_clone = Arc::clone(&schema);
+        let mut deserializer = csv_deserializer(batch_size, &schema_clone);
 
         for data in generator {
             deserializer.digest(data);
@@ -1499,7 +1506,7 @@ mod tests {
 
         // Create a temp file with a .csv suffix so the reader accepts it
         let mut tmp = tempfile::Builder::new().suffix(".csv").tempfile()?; // ensures path ends with .csv
-                                                                           // CSV has header "a,b,c". First data row is truncated (only "1,2"), second row is complete.
+        // CSV has header "a,b,c". First data row is truncated (only "1,2"), second row is complete.
         write!(tmp, "a,b,c\n1,2\n3,4,5\n")?;
         let path = tmp.path().to_str().unwrap().to_string();
 
@@ -1529,4 +1536,32 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_infer_schema_with_zero_max_records() -> Result<()> {
+        let session_ctx = SessionContext::new();
+        let state = session_ctx.state();
+
+        let root = format!("{}/csv", arrow_test_data());
+        let format = CsvFormat::default()
+            .with_has_header(true)
+            .with_schema_infer_max_rec(0); // Set to 0 to disable inference
+        let exec = scan_format(
+            &state,
+            &format,
+            None,
+            &root,
+            "aggregate_test_100.csv",
+            None,
+            None,
+        )
+        .await?;
+
+        // related to https://github.com/apache/datafusion/issues/19417
+        for f in exec.schema().fields() {
+            assert_eq!(*f.data_type(), DataType::Utf8);
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 34d3d64f07fb2..4d5ed34399693 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -36,7 +36,7 @@ mod tests {
         BatchDeserializer, DecoderDeserializer, DeserializerOutput,
     };
     use datafusion_datasource::file_format::FileFormat;
-    use datafusion_physical_plan::{collect, ExecutionPlan};
+    use datafusion_physical_plan::{ExecutionPlan, collect};
 
     use arrow::compute::concat_batches;
     use arrow::datatypes::{DataType, Field};
@@ -187,11 +187,11 @@ mod tests {
 
         let re = Regex::new(r"file_groups=\{(\d+) group").unwrap();
 
-        if let Some(captures) = re.captures(&plan) {
-            if let Some(match_) = captures.get(1) {
-                let count = match_.as_str().parse::<usize>().unwrap();
-                return Ok(count);
-            }
+        if let Some(captures) = re.captures(&plan)
+            && let Some(match_) = captures.get(1)
+        {
+            let count = match_.as_str().parse::<usize>().unwrap();
+            return Ok(count);
         }
 
         internal_err!("Query contains no Exec: file_groups")
@@ -218,13 +218,13 @@ mod tests {
         let result = ctx.sql(query).await?.collect().await?;
         let actual_partitions = count_num_partitions(&ctx, query).await?;
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&result),@r###"
-            +----------------------+
-            | sum(json_parallel.a) |
-            +----------------------+
-            | -7                   |
-            +----------------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&result),@r"
+        +----------------------+
+        | sum(json_parallel.a) |
+        +----------------------+
+        | -7                   |
+        +----------------------+
+        ");}
 
         assert_eq!(n_partitions, actual_partitions);
 
@@ -249,10 +249,10 @@ mod tests {
 
         let result = ctx.sql(query).await?.collect().await?;
 
-        assert_snapshot!(batches_to_string(&result),@r###"
-            ++
-            ++
-        "###);
+        assert_snapshot!(batches_to_string(&result),@r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
@@ -284,15 +284,15 @@ mod tests {
         }
         assert_eq!(deserializer.next()?, DeserializerOutput::InputExhausted);
 
-        assert_snapshot!(batches_to_string(&[all_batches]),@r###"
-            +----+----+----+----+----+
-            | c1 | c2 | c3 | c4 | c5 |
-            +----+----+----+----+----+
-            | 1  | 2  | 3  | 4  | 5  |
-            | 6  | 7  | 8  | 9  | 10 |
-            | 11 | 12 | 13 | 14 | 15 |
-            +----+----+----+----+----+
-        "###);
+        assert_snapshot!(batches_to_string(&[all_batches]),@r"
+        +----+----+----+----+----+
+        | c1 | c2 | c3 | c4 | c5 |
+        +----+----+----+----+----+
+        | 1  | 2  | 3  | 4  | 5  |
+        | 6  | 7  | 8  | 9  | 10 |
+        | 11 | 12 | 13 | 14 | 15 |
+        +----+----+----+----+----+
+        ");
 
         Ok(())
     }
@@ -324,14 +324,14 @@ mod tests {
         }
         assert_eq!(deserializer.next()?, DeserializerOutput::RequiresMoreData);
 
-        insta::assert_snapshot!(fmt_batches(&[all_batches]),@r###"
-            +----+----+----+----+----+
-            | c1 | c2 | c3 | c4 | c5 |
-            +----+----+----+----+----+
-            | 1  | 2  | 3  | 4  | 5  |
-            | 6  | 7  | 8  | 9  | 10 |
-            +----+----+----+----+----+
-        "###);
+        insta::assert_snapshot!(fmt_batches(&[all_batches]),@r"
+        +----+----+----+----+----+
+        | c1 | c2 | c3 | c4 | c5 |
+        +----+----+----+----+----+
+        | 1  | 2  | 3  | 4  | 5  |
+        | 6  | 7  | 8  | 9  | 10 |
+        +----+----+----+----+----+
+        ");
 
         Ok(())
     }
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 4881783eeba69..6bbb63f6a17ad 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -39,8 +39,9 @@ pub(crate) mod test_util {
     use arrow_schema::SchemaRef;
     use datafusion_catalog::Session;
     use datafusion_common::Result;
+    use datafusion_datasource::TableSchema;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-    use datafusion_datasource::{file_format::FileFormat, PartitionedFile};
+    use datafusion_datasource::{PartitionedFile, file_format::FileFormat};
     use datafusion_execution::object_store::ObjectStoreUrl;
     use std::sync::Arc;
 
@@ -66,31 +67,34 @@ pub(crate) mod test_util {
                 .await?
         };
 
+        let table_schema = TableSchema::new(file_schema.clone(), vec![]);
+
         let statistics = format
             .infer_stats(state, &store, file_schema.clone(), &meta)
             .await?;
 
-        let file_groups = vec![vec![PartitionedFile {
-            object_meta: meta,
-            partition_values: vec![],
-            range: None,
-            statistics: None,
-            extensions: None,
-            metadata_size_hint: None,
-        }]
-        .into()];
+        let file_groups = vec![
+            vec![PartitionedFile {
+                object_meta: meta,
+                partition_values: vec![],
+                range: None,
+                statistics: None,
+                extensions: None,
+                metadata_size_hint: None,
+            }]
+            .into(),
+        ];
 
         let exec = format
             .create_physical_plan(
                 state,
                 FileScanConfigBuilder::new(
                     ObjectStoreUrl::local_filesystem(),
-                    file_schema,
-                    format.file_source(),
+                    format.file_source(table_schema),
                 )
                 .with_file_groups(file_groups)
                 .with_statistics(statistics)
-                .with_projection_indices(projection)
+                .with_projection_indices(projection)?
                 .with_limit(limit)
                 .build(),
             )
@@ -131,7 +135,10 @@ mod tests {
             .write_parquet(out_dir_url, DataFrameWriteOptions::new(), None)
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
+        assert_eq!(
+            e.strip_backtrace(),
+            "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"
+        );
         Ok(())
     }
 }
diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs
index e78c5f09553cc..146c5f6f5fd0f 100644
--- a/datafusion/core/src/datasource/file_format/options.rs
+++ b/datafusion/core/src/datasource/file_format/options.rs
@@ -25,9 +25,9 @@ use crate::datasource::file_format::avro::AvroFormat;
 #[cfg(feature = "parquet")]
 use crate::datasource::file_format::parquet::ParquetFormat;
 
+use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD;
 use crate::datasource::file_format::arrow::ArrowFormat;
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
-use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD;
 use crate::datasource::listing::ListingTableUrl;
 use crate::datasource::{file_format::csv::CsvFormat, listing::ListingOptions};
 use crate::error::Result;
@@ -523,6 +523,12 @@ impl<'a> NdJsonReadOptions<'a> {
         self.file_sort_order = file_sort_order;
         self
     }
+
+    /// Specify how many rows to read for schema inference
+    pub fn schema_infer_max_records(mut self, schema_infer_max_records: usize) -> Self {
+        self.schema_infer_max_records = schema_infer_max_records;
+        self
+    }
 }
 
 #[async_trait]
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 52c5393e10319..44cf09c1ae46e 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -107,8 +107,8 @@ pub(crate) mod test_util {
 mod tests {
 
     use std::fmt::{self, Display, Formatter};
-    use std::sync::atomic::{AtomicUsize, Ordering};
     use std::sync::Arc;
+    use std::sync::atomic::{AtomicUsize, Ordering};
     use std::time::Duration;
 
     use crate::datasource::file_format::parquet::test_util::store_parquet;
@@ -120,6 +120,7 @@ mod tests {
     use arrow::array::RecordBatch;
     use arrow_schema::Schema;
     use datafusion_catalog::Session;
+    use datafusion_common::ScalarValue::Utf8;
     use datafusion_common::cast::{
         as_binary_array, as_binary_view_array, as_boolean_array, as_float32_array,
         as_float64_array, as_int32_array, as_timestamp_nanosecond_array,
@@ -127,7 +128,6 @@ mod tests {
     use datafusion_common::config::{ParquetOptions, TableParquetOptions};
     use datafusion_common::stats::Precision;
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::ScalarValue::Utf8;
     use datafusion_common::{Result, ScalarValue};
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
@@ -135,33 +135,33 @@ mod tests {
     use datafusion_datasource_parquet::{
         ParquetFormat, ParquetFormatFactory, ParquetSink,
     };
+    use datafusion_execution::TaskContext;
     use datafusion_execution::object_store::ObjectStoreUrl;
     use datafusion_execution::runtime_env::RuntimeEnv;
-    use datafusion_execution::TaskContext;
     use datafusion_expr::dml::InsertOp;
     use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
-    use datafusion_physical_plan::{collect, ExecutionPlan};
+    use datafusion_physical_plan::{ExecutionPlan, collect};
 
     use crate::test_util::bounded_stream;
     use arrow::array::{
-        types::Int32Type, Array, ArrayRef, DictionaryArray, Int32Array, Int64Array,
-        StringArray,
+        Array, ArrayRef, DictionaryArray, Int32Array, Int64Array, StringArray,
+        types::Int32Type,
     };
     use arrow::datatypes::{DataType, Field};
     use async_trait::async_trait;
     use datafusion_datasource::file_groups::FileGroup;
     use datafusion_datasource_parquet::metadata::DFParquetMetadata;
-    use futures::stream::BoxStream;
     use futures::StreamExt;
+    use futures::stream::BoxStream;
     use insta::assert_snapshot;
-    use object_store::local::LocalFileSystem;
     use object_store::ObjectMeta;
+    use object_store::local::LocalFileSystem;
     use object_store::{
-        path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectStore,
-        PutMultipartOptions, PutOptions, PutPayload, PutResult,
+        GetOptions, GetResult, ListResult, MultipartUpload, ObjectStore,
+        PutMultipartOptions, PutOptions, PutPayload, PutResult, path::Path,
     };
-    use parquet::arrow::arrow_reader::ArrowReaderOptions;
     use parquet::arrow::ParquetRecordBatchStreamBuilder;
+    use parquet::arrow::arrow_reader::ArrowReaderOptions;
     use parquet::file::metadata::{
         KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
     };
@@ -724,7 +724,7 @@ mod tests {
         // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
         assert_eq!(
             exec.partition_statistics(None)?.total_byte_size,
-            Precision::Exact(671)
+            Precision::Absent,
         );
 
         Ok(())
@@ -770,10 +770,9 @@ mod tests {
             exec.partition_statistics(None)?.num_rows,
             Precision::Exact(8)
         );
-        // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
         assert_eq!(
             exec.partition_statistics(None)?.total_byte_size,
-            Precision::Exact(671)
+            Precision::Absent,
         );
         let batches = collect(exec, task_ctx).await?;
         assert_eq!(1, batches.len());
@@ -931,7 +930,10 @@ mod tests {
             values.push(array.value(i));
         }
 
-        assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{values:?}"));
+        assert_eq!(
+            "[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]",
+            format!("{values:?}")
+        );
 
         Ok(())
     }
@@ -1204,10 +1206,10 @@ mod tests {
 
         let result = df.collect().await?;
 
-        assert_snapshot!(batches_to_string(&result), @r###"
-            ++
-            ++
-       "###);
+        assert_snapshot!(batches_to_string(&result), @r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
@@ -1233,10 +1235,10 @@ mod tests {
 
         let result = df.collect().await?;
 
-        assert_snapshot!(batches_to_string(&result), @r###"
-            ++
-            ++
-       "###);
+        assert_snapshot!(batches_to_string(&result), @r"
+        ++
+        ++
+        ");
 
         Ok(())
     }
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 3333b70676203..93d77e10ba23c 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -113,8 +113,8 @@ mod tests {
     use crate::prelude::*;
     use crate::{
         datasource::{
-            file_format::csv::CsvFormat, file_format::json::JsonFormat,
-            provider_as_source, DefaultTableSource, MemTable,
+            DefaultTableSource, MemTable, file_format::csv::CsvFormat,
+            file_format::json::JsonFormat, provider_as_source,
         },
         execution::options::ArrowReadOptions,
         test::{
@@ -129,33 +129,26 @@ mod tests {
         ListingOptions, ListingTable, ListingTableConfig, SchemaSource,
     };
     use datafusion_common::{
-        assert_contains, plan_err,
+        DataFusionError, Result, ScalarValue, assert_contains,
         stats::Precision,
         test_util::{batches_to_string, datafusion_test_data},
-        ColumnStatistics, DataFusionError, Result, ScalarValue,
     };
+    use datafusion_datasource::ListingTableUrl;
     use datafusion_datasource::file_compression_type::FileCompressionType;
     use datafusion_datasource::file_format::FileFormat;
-    use datafusion_datasource::schema_adapter::{
-        SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
-    };
-    use datafusion_datasource::ListingTableUrl;
     use datafusion_expr::dml::InsertOp;
     use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator};
-    use datafusion_physical_expr::expressions::binary;
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_expr::expressions::binary;
     use datafusion_physical_expr_common::sort_expr::LexOrdering;
     use datafusion_physical_plan::empty::EmptyExec;
-    use datafusion_physical_plan::{collect, ExecutionPlanProperties};
-    use rstest::rstest;
+    use datafusion_physical_plan::{ExecutionPlanProperties, collect};
     use std::collections::HashMap;
     use std::io::Write;
     use std::sync::Arc;
     use tempfile::TempDir;
     use url::Url;
 
-    const DUMMY_NULL_COUNT: Precision<usize> = Precision::Exact(42);
-
     /// Creates a test schema with standard field types used in tests
     fn create_test_schema() -> SchemaRef {
         Arc::new(Schema::new(vec![
@@ -257,7 +250,7 @@ mod tests {
         );
         assert_eq!(
             exec.partition_statistics(None)?.total_byte_size,
-            Precision::Exact(671)
+            Precision::Absent,
         );
 
         Ok(())
@@ -289,32 +282,36 @@ mod tests {
             // sort expr, but non column
             (
                 vec![vec![col("int_col").add(lit(1)).sort(true, true)]],
-                Ok(vec![[PhysicalSortExpr {
-                    expr: binary(
-                        physical_col("int_col", &schema).unwrap(),
-                        Operator::Plus,
-                        physical_lit(1),
-                        &schema,
-                    )
-                    .unwrap(),
-                    options: SortOptions {
-                        descending: false,
-                        nulls_first: true,
-                    },
-                }]
-                .into()]),
+                Ok(vec![
+                    [PhysicalSortExpr {
+                        expr: binary(
+                            physical_col("int_col", &schema).unwrap(),
+                            Operator::Plus,
+                            physical_lit(1),
+                            &schema,
+                        )
+                        .unwrap(),
+                        options: SortOptions {
+                            descending: false,
+                            nulls_first: true,
+                        },
+                    }]
+                    .into(),
+                ]),
             ),
             // ok with one column
             (
                 vec![vec![col("string_col").sort(true, false)]],
-                Ok(vec![[PhysicalSortExpr {
-                    expr: physical_col("string_col", &schema).unwrap(),
-                    options: SortOptions {
-                        descending: false,
-                        nulls_first: false,
-                    },
-                }]
-                .into()]),
+                Ok(vec![
+                    [PhysicalSortExpr {
+                        expr: physical_col("string_col", &schema).unwrap(),
+                        options: SortOptions {
+                            descending: false,
+                            nulls_first: false,
+                        },
+                    }]
+                    .into(),
+                ]),
             ),
             // ok with two columns, different options
             (
@@ -322,19 +319,21 @@ mod tests {
                     col("string_col").sort(true, false),
                     col("int_col").sort(false, true),
                 ]],
-                Ok(vec![[
-                    PhysicalSortExpr::new_default(
-                        physical_col("string_col", &schema).unwrap(),
-                    )
-                    .asc()
-                    .nulls_last(),
-                    PhysicalSortExpr::new_default(
-                        physical_col("int_col", &schema).unwrap(),
-                    )
-                    .desc()
-                    .nulls_first(),
-                ]
-                .into()]),
+                Ok(vec![
+                    [
+                        PhysicalSortExpr::new_default(
+                            physical_col("string_col", &schema).unwrap(),
+                        )
+                        .asc()
+                        .nulls_last(),
+                        PhysicalSortExpr::new_default(
+                            physical_col("int_col", &schema).unwrap(),
+                        )
+                        .desc()
+                        .nulls_first(),
+                    ]
+                    .into(),
+                ]),
             ),
         ];
 
@@ -453,9 +452,9 @@ mod tests {
 
         let table = ListingTable::try_new(config)?;
 
-        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
 
-        assert_eq!(file_list.len(), output_partitioning);
+        assert_eq!(result.file_groups.len(), output_partitioning);
 
         Ok(())
     }
@@ -488,9 +487,9 @@ mod tests {
 
         let table = ListingTable::try_new(config)?;
 
-        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
 
-        assert_eq!(file_list.len(), output_partitioning);
+        assert_eq!(result.file_groups.len(), output_partitioning);
 
         Ok(())
     }
@@ -538,9 +537,9 @@ mod tests {
 
         let table = ListingTable::try_new(config)?;
 
-        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
 
-        assert_eq!(file_list.len(), output_partitioning);
+        assert_eq!(result.file_groups.len(), output_partitioning);
 
         Ok(())
     }
@@ -731,8 +730,8 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_insert_into_append_new_parquet_files_invalid_session_fails(
-    ) -> Result<()> {
+    async fn test_insert_into_append_new_parquet_files_invalid_session_fails()
+    -> Result<()> {
         let mut config_map: HashMap<String, String> = HashMap::new();
         config_map.insert(
             "datafusion.execution.parquet.compression".into(),
@@ -746,7 +745,10 @@ mod tests {
         )
         .await
         .expect_err("Example should fail!");
-        assert_eq!(e.strip_backtrace(), "Invalid or Unsupported Configuration: zstd compression requires specifying a level such as zstd(4)");
+        assert_eq!(
+            e.strip_backtrace(),
+            "Invalid or Unsupported Configuration: zstd compression requires specifying a level such as zstd(4)"
+        );
 
         Ok(())
     }
@@ -873,13 +875,13 @@ mod tests {
         let res = collect(plan, session_ctx.task_ctx()).await?;
         // Insert returns the number of rows written, in our case this would be 6.
 
-        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&res),@r###"
-            +-------+
-            | count |
-            +-------+
-            | 20    |
-            +-------+
-        "###);}
+        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&res),@r"
+        +-------+
+        | count |
+        +-------+
+        | 20    |
+        +-------+
+        ");}
 
         // Read the records in the table
         let batches = session_ctx
@@ -888,13 +890,13 @@ mod tests {
             .collect()
             .await?;
 
-        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r###"
-            +-------+
-            | count |
-            +-------+
-            | 20    |
-            +-------+
-        "###);}
+        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r"
+        +-------+
+        | count |
+        +-------+
+        | 20    |
+        +-------+
+        ");}
 
         // Assert that `target_partition_number` many files were added to the table.
         let num_files = tmp_dir.path().read_dir()?.count();
@@ -909,13 +911,13 @@ mod tests {
         // Again, execute the physical plan and collect the results
         let res = collect(plan, session_ctx.task_ctx()).await?;
 
-        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&res),@r###"
-            +-------+
-            | count |
-            +-------+
-            | 20    |
-            +-------+
-        "###);}
+        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&res),@r"
+        +-------+
+        | count |
+        +-------+
+        | 20    |
+        +-------+
+        ");}
 
         // Read the contents of the table
         let batches = session_ctx
@@ -924,13 +926,13 @@ mod tests {
             .collect()
             .await?;
 
-        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r###"
-            +-------+
-            | count |
-            +-------+
-            | 40    |
-            +-------+
-        "###);}
+        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r"
+        +-------+
+        | count |
+        +-------+
+        | 40    |
+        +-------+
+        ");}
 
         // Assert that another `target_partition_number` many files were added to the table.
         let num_files = tmp_dir.path().read_dir()?.count();
@@ -988,15 +990,15 @@ mod tests {
             .collect()
             .await?;
 
-        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r###"
-            +-----+-----+---+
-            | a   | b   | c |
-            +-----+-----+---+
-            | foo | bar | 1 |
-            | foo | bar | 2 |
-            | foo | bar | 3 |
-            +-----+-----+---+
-        "###);}
+        insta::allow_duplicates! {insta::assert_snapshot!(batches_to_string(&batches),@r"
+        +-----+-----+---+
+        | a   | b   | c |
+        +-----+-----+---+
+        | foo | bar | 1 |
+        | foo | bar | 2 |
+        | foo | bar | 3 |
+        +-----+-----+---+
+        ");}
 
         Ok(())
     }
@@ -1307,10 +1309,10 @@ mod tests {
 
         let table = ListingTable::try_new(config)?;
 
-        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
-        assert_eq!(file_list.len(), 1);
+        let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        assert_eq!(result.file_groups.len(), 1);
 
-        let files = file_list[0].clone();
+        let files = result.file_groups[0].clone();
 
         assert_eq!(
             files
@@ -1397,7 +1399,7 @@ mod tests {
         // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
         assert_eq!(
             exec_enabled.partition_statistics(None)?.total_byte_size,
-            Precision::Exact(671)
+            Precision::Absent,
         );
 
         Ok(())
@@ -1416,7 +1418,9 @@ mod tests {
         ];
 
         for (format, batch_size, soft_max_rows, expected_files) in test_cases {
-            println!("Testing insert with format: {format}, batch_size: {batch_size}, expected files: {expected_files}");
+            println!(
+                "Testing insert with format: {format}, batch_size: {batch_size}, expected files: {expected_files}"
+            );
 
             let mut config_map = HashMap::new();
             config_map.insert(
@@ -1449,33 +1453,10 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_statistics_mapping_with_custom_factory() -> Result<()> {
+    async fn test_basic_table_scan() -> Result<()> {
         let ctx = SessionContext::new();
-        let table = create_test_listing_table_with_json_and_adapter(
-            &ctx,
-            false,
-            // NullStatsAdapterFactory sets column_statistics null_count to DUMMY_NULL_COUNT
-            Arc::new(NullStatsAdapterFactory {}),
-        )?;
 
-        let (groups, stats) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
-
-        assert_eq!(stats.column_statistics[0].null_count, DUMMY_NULL_COUNT);
-        for g in groups {
-            if let Some(s) = g.file_statistics(None) {
-                assert_eq!(s.column_statistics[0].null_count, DUMMY_NULL_COUNT);
-            }
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_statistics_mapping_with_default_factory() -> Result<()> {
-        let ctx = SessionContext::new();
-
-        // Create a table without providing a custom schema adapter factory
-        // This should fall back to using DefaultSchemaAdapterFactory
+        // Test basic table creation and scanning
         let path = "table/file.json";
         register_test_store(&ctx, &[(path, 10)]);
 
@@ -1487,222 +1468,20 @@ mod tests {
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
             .with_schema(Arc::new(schema));
-        // Note: NOT calling .with_schema_adapter_factory() to test default behavior
 
         let table = ListingTable::try_new(config)?;
 
-        // Verify that no custom schema adapter factory is set
-        assert!(table.schema_adapter_factory().is_none());
-
-        // The scan should work correctly with the default schema adapter
+        // The scan should work correctly
         let scan_result = table.scan(&ctx.state(), None, &[], None).await;
-        assert!(
-            scan_result.is_ok(),
-            "Scan should succeed with default schema adapter"
-        );
+        assert!(scan_result.is_ok(), "Scan should succeed");
 
-        // Verify that the default adapter handles basic schema compatibility
-        let (groups, _stats) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        // Verify file listing works
+        let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
         assert!(
-            !groups.is_empty(),
-            "Should list files successfully with default adapter"
+            !result.file_groups.is_empty(),
+            "Should list files successfully"
         );
 
         Ok(())
     }
-
-    #[rstest]
-    #[case(MapSchemaError::TypeIncompatible, "Cannot map incompatible types")]
-    #[case(MapSchemaError::GeneralFailure, "Schema adapter mapping failed")]
-    #[case(
-        MapSchemaError::InvalidProjection,
-        "Invalid projection in schema mapping"
-    )]
-    #[tokio::test]
-    async fn test_schema_adapter_map_schema_errors(
-        #[case] error_type: MapSchemaError,
-        #[case] expected_error_msg: &str,
-    ) -> Result<()> {
-        let ctx = SessionContext::new();
-        let table = create_test_listing_table_with_json_and_adapter(
-            &ctx,
-            false,
-            Arc::new(FailingMapSchemaAdapterFactory { error_type }),
-        )?;
-
-        // The error should bubble up from the scan operation when schema mapping fails
-        let scan_result = table.scan(&ctx.state(), None, &[], None).await;
-
-        assert!(scan_result.is_err());
-        let error_msg = scan_result.unwrap_err().to_string();
-        assert!(
-            error_msg.contains(expected_error_msg),
-            "Expected error containing '{expected_error_msg}', got: {error_msg}"
-        );
-
-        Ok(())
-    }
-
-    // Test that errors during file listing also bubble up correctly
-    #[tokio::test]
-    async fn test_schema_adapter_error_during_file_listing() -> Result<()> {
-        let ctx = SessionContext::new();
-        let table = create_test_listing_table_with_json_and_adapter(
-            &ctx,
-            true,
-            Arc::new(FailingMapSchemaAdapterFactory {
-                error_type: MapSchemaError::TypeIncompatible,
-            }),
-        )?;
-
-        // The error should bubble up from list_files_for_scan when collecting statistics
-        let list_result = table.list_files_for_scan(&ctx.state(), &[], None).await;
-
-        assert!(list_result.is_err());
-        let error_msg = list_result.unwrap_err().to_string();
-        assert!(
-            error_msg.contains("Cannot map incompatible types"),
-            "Expected type incompatibility error during file listing, got: {error_msg}"
-        );
-
-        Ok(())
-    }
-
-    #[derive(Debug, Copy, Clone)]
-    enum MapSchemaError {
-        TypeIncompatible,
-        GeneralFailure,
-        InvalidProjection,
-    }
-
-    #[derive(Debug)]
-    struct FailingMapSchemaAdapterFactory {
-        error_type: MapSchemaError,
-    }
-
-    impl SchemaAdapterFactory for FailingMapSchemaAdapterFactory {
-        fn create(
-            &self,
-            projected_table_schema: SchemaRef,
-            _table_schema: SchemaRef,
-        ) -> Box<dyn SchemaAdapter> {
-            Box::new(FailingMapSchemaAdapter {
-                schema: projected_table_schema,
-                error_type: self.error_type,
-            })
-        }
-    }
-
-    #[derive(Debug)]
-    struct FailingMapSchemaAdapter {
-        schema: SchemaRef,
-        error_type: MapSchemaError,
-    }
-
-    impl SchemaAdapter for FailingMapSchemaAdapter {
-        fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-            let field = self.schema.field(index);
-            file_schema.fields.find(field.name()).map(|(i, _)| i)
-        }
-
-        fn map_schema(
-            &self,
-            _file_schema: &Schema,
-        ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-            // Always fail with different error types based on the configured error_type
-            match self.error_type {
-                MapSchemaError::TypeIncompatible => {
-                    plan_err!(
-                        "Cannot map incompatible types: Boolean cannot be cast to Utf8"
-                    )
-                }
-                MapSchemaError::GeneralFailure => {
-                    plan_err!("Schema adapter mapping failed due to internal error")
-                }
-                MapSchemaError::InvalidProjection => {
-                    plan_err!("Invalid projection in schema mapping: column index out of bounds")
-                }
-            }
-        }
-    }
-
-    #[derive(Debug)]
-    struct NullStatsAdapterFactory;
-
-    impl SchemaAdapterFactory for NullStatsAdapterFactory {
-        fn create(
-            &self,
-            projected_table_schema: SchemaRef,
-            _table_schema: SchemaRef,
-        ) -> Box<dyn SchemaAdapter> {
-            Box::new(NullStatsAdapter {
-                schema: projected_table_schema,
-            })
-        }
-    }
-
-    #[derive(Debug)]
-    struct NullStatsAdapter {
-        schema: SchemaRef,
-    }
-
-    impl SchemaAdapter for NullStatsAdapter {
-        fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-            let field = self.schema.field(index);
-            file_schema.fields.find(field.name()).map(|(i, _)| i)
-        }
-
-        fn map_schema(
-            &self,
-            file_schema: &Schema,
-        ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-            let projection = (0..file_schema.fields().len()).collect();
-            Ok((Arc::new(NullStatsMapper {}), projection))
-        }
-    }
-
-    #[derive(Debug)]
-    struct NullStatsMapper;
-
-    impl SchemaMapper for NullStatsMapper {
-        fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-            Ok(batch)
-        }
-
-        fn map_column_statistics(
-            &self,
-            stats: &[ColumnStatistics],
-        ) -> Result<Vec<ColumnStatistics>> {
-            Ok(stats
-                .iter()
-                .map(|s| {
-                    let mut s = s.clone();
-                    s.null_count = DUMMY_NULL_COUNT;
-                    s
-                })
-                .collect())
-        }
-    }
-
-    /// Helper function to create a test ListingTable with JSON format and custom schema adapter factory
-    fn create_test_listing_table_with_json_and_adapter(
-        ctx: &SessionContext,
-        collect_stat: bool,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<ListingTable> {
-        let path = "table/file.json";
-        register_test_store(ctx, &[(path, 10)]);
-
-        let format = JsonFormat::default();
-        let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(collect_stat);
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
-        let table_path = ListingTableUrl::parse("test:///table/")?;
-
-        let config = ListingTableConfig::new(table_path)
-            .with_listing_options(opt)
-            .with_schema(Arc::new(schema))
-            .with_schema_adapter_factory(schema_adapter_factory);
-
-        ListingTable::try_new(config)
-    }
 }
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index f98297d0e3f7f..3ca388af0c4c1 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -28,8 +28,8 @@ use crate::datasource::listing::{
 use crate::execution::context::SessionState;
 
 use arrow::datatypes::DataType;
-use datafusion_common::{arrow_datafusion_err, plan_err, DataFusionError, ToDFSchema};
-use datafusion_common::{config_datafusion_err, Result};
+use datafusion_common::{Result, config_datafusion_err};
+use datafusion_common::{ToDFSchema, arrow_datafusion_err, plan_err};
 use datafusion_expr::CreateExternalTable;
 
 use async_trait::async_trait;
@@ -190,6 +190,16 @@ impl TableProviderFactory for ListingTableFactory {
             .with_definition(cmd.definition.clone())
             .with_constraints(cmd.constraints.clone())
             .with_column_defaults(cmd.column_defaults.clone());
+
+        // Pre-warm statistics cache if collect_statistics is enabled
+        if session_state.config().collect_statistics() {
+            let filters = &[];
+            let limit = None;
+            if let Err(e) = table.list_files_for_scan(state, filters, limit).await {
+                log::warn!("Failed to pre-warm statistics cache: {e}");
+            }
+        }
+
         Ok(Arc::new(table))
     }
 }
@@ -205,19 +215,23 @@ fn get_extension(path: &str) -> String {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
+    use crate::{
+        datasource::file_format::csv::CsvFormat, execution::context::SessionContext,
+        test_util::parquet_test_data,
+    };
+    use datafusion_execution::cache::CacheAccessor;
+    use datafusion_execution::cache::cache_manager::CacheManagerConfig;
+    use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
     use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use glob::Pattern;
     use std::collections::HashMap;
     use std::fs;
     use std::path::PathBuf;
 
-    use super::*;
-    use crate::{
-        datasource::file_format::csv::CsvFormat, execution::context::SessionContext,
-    };
-
     use datafusion_common::parsers::CompressionTypeVariant;
-    use datafusion_common::{Constraints, DFSchema, TableReference};
+    use datafusion_common::{DFSchema, TableReference};
 
     #[tokio::test]
     async fn test_create_using_non_std_file_ext() {
@@ -231,22 +245,14 @@ mod tests {
         let context = SessionContext::new();
         let state = context.state();
         let name = TableReference::bare("foo");
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: csv_file.path().to_str().unwrap().to_string(),
-            file_type: "csv".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options: HashMap::from([("format.has_header".into(), "true".into())]),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            csv_file.path().to_str().unwrap().to_string(),
+            "csv",
+            Arc::new(DFSchema::empty()),
+        )
+        .with_options(HashMap::from([("format.has_header".into(), "true".into())]))
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -272,22 +278,14 @@ mod tests {
         let mut options = HashMap::new();
         options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
         options.insert("format.has_header".into(), "true".into());
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: csv_file.path().to_str().unwrap().to_string(),
-            file_type: "csv".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options,
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            csv_file.path().to_str().unwrap().to_string(),
+            "csv",
+            Arc::new(DFSchema::empty()),
+        )
+        .with_options(options)
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -317,22 +315,14 @@ mod tests {
         options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
         options.insert("format.has_header".into(), "true".into());
         options.insert("format.compression".into(), "gzip".into());
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: dir.path().to_str().unwrap().to_string(),
-            file_type: "csv".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options,
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            dir.path().to_str().unwrap().to_string(),
+            "csv",
+            Arc::new(DFSchema::empty()),
+        )
+        .with_options(options)
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -369,22 +359,14 @@ mod tests {
         let mut options = HashMap::new();
         options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
         options.insert("format.has_header".into(), "true".into());
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: dir.path().to_str().unwrap().to_string(),
-            file_type: "csv".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options,
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            dir.path().to_str().unwrap().to_string(),
+            "csv",
+            Arc::new(DFSchema::empty()),
+        )
+        .with_options(options)
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -413,22 +395,13 @@ mod tests {
         let state = context.state();
         let name = TableReference::bare("foo");
 
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: String::from(path.to_str().unwrap()),
-            file_type: "parquet".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options: HashMap::new(),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            String::from(path.to_str().unwrap()),
+            "parquet",
+            Arc::new(DFSchema::empty()),
+        )
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -453,22 +426,13 @@ mod tests {
         let state = context.state();
         let name = TableReference::bare("foo");
 
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: dir.path().to_str().unwrap().to_string(),
-            file_type: "parquet".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options: HashMap::new(),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            dir.path().to_str().unwrap(),
+            "parquet",
+            Arc::new(DFSchema::empty()),
+        )
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -494,22 +458,13 @@ mod tests {
         let state = context.state();
         let name = TableReference::bare("foo");
 
-        let cmd = CreateExternalTable {
+        let cmd = CreateExternalTable::builder(
             name,
-            location: dir.path().to_str().unwrap().to_string(),
-            file_type: "parquet".to_string(),
-            schema: Arc::new(DFSchema::empty()),
-            table_partition_cols: vec![],
-            if_not_exists: false,
-            or_replace: false,
-            temporary: false,
-            definition: None,
-            order_exprs: vec![],
-            unbounded: false,
-            options: HashMap::new(),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-        };
+            dir.path().to_str().unwrap().to_string(),
+            "parquet",
+            Arc::new(DFSchema::empty()),
+        )
+        .build();
         let table_provider = factory.create(&state, &cmd).await.unwrap();
         let listing_table = table_provider
             .as_any()
@@ -519,4 +474,75 @@ mod tests {
         let listing_options = listing_table.options();
         assert!(listing_options.table_partition_cols.is_empty());
     }
+
+    #[tokio::test]
+    async fn test_statistics_cache_prewarming() {
+        let factory = ListingTableFactory::new();
+
+        let location = PathBuf::from(parquet_test_data())
+            .join("alltypes_tiny_pages_plain.parquet")
+            .to_string_lossy()
+            .to_string();
+
+        // Test with collect_statistics enabled
+        let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default());
+        let cache_config = CacheManagerConfig::default()
+            .with_files_statistics_cache(Some(file_statistics_cache.clone()));
+        let runtime = RuntimeEnvBuilder::new()
+            .with_cache_manager(cache_config)
+            .build_arc()
+            .unwrap();
+
+        let mut config = SessionConfig::new();
+        config.options_mut().execution.collect_statistics = true;
+        let context = SessionContext::new_with_config_rt(config, runtime);
+        let state = context.state();
+        let name = TableReference::bare("test");
+
+        let cmd = CreateExternalTable::builder(
+            name,
+            location.clone(),
+            "parquet",
+            Arc::new(DFSchema::empty()),
+        )
+        .build();
+
+        let _table_provider = factory.create(&state, &cmd).await.unwrap();
+
+        assert!(
+            file_statistics_cache.len() > 0,
+            "Statistics cache should be pre-warmed when collect_statistics is enabled"
+        );
+
+        // Test with collect_statistics disabled
+        let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default());
+        let cache_config = CacheManagerConfig::default()
+            .with_files_statistics_cache(Some(file_statistics_cache.clone()));
+        let runtime = RuntimeEnvBuilder::new()
+            .with_cache_manager(cache_config)
+            .build_arc()
+            .unwrap();
+
+        let mut config = SessionConfig::new();
+        config.options_mut().execution.collect_statistics = false;
+        let context = SessionContext::new_with_config_rt(config, runtime);
+        let state = context.state();
+        let name = TableReference::bare("test");
+
+        let cmd = CreateExternalTable::builder(
+            name,
+            location,
+            "parquet",
+            Arc::new(DFSchema::empty()),
+        )
+        .build();
+
+        let _table_provider = factory.create(&state, &cmd).await.unwrap();
+
+        assert_eq!(
+            file_statistics_cache.len(),
+            0,
+            "Statistics cache should not be pre-warmed when collect_statistics is disabled"
+        );
+    }
 }
diff --git a/datafusion/core/src/datasource/memory_test.rs b/datafusion/core/src/datasource/memory_test.rs
index c16837c73b4f1..c7721cafb02ea 100644
--- a/datafusion/core/src/datasource/memory_test.rs
+++ b/datafusion/core/src/datasource/memory_test.rs
@@ -19,7 +19,7 @@
 mod tests {
 
     use crate::datasource::MemTable;
-    use crate::datasource::{provider_as_source, DefaultTableSource};
+    use crate::datasource::{DefaultTableSource, provider_as_source};
     use crate::physical_plan::collect;
     use crate::prelude::SessionContext;
     use arrow::array::{AsArray, Int32Array};
@@ -29,8 +29,8 @@ mod tests {
     use arrow_schema::SchemaRef;
     use datafusion_catalog::TableProvider;
     use datafusion_common::{DataFusionError, Result};
-    use datafusion_expr::dml::InsertOp;
     use datafusion_expr::LogicalPlanBuilder;
+    use datafusion_expr::dml::InsertOp;
     use futures::StreamExt;
     use std::collections::HashMap;
     use std::sync::Arc;
@@ -329,12 +329,11 @@ mod tests {
         );
         let col = batch.column(0).as_primitive::<UInt64Type>();
         assert_eq!(col.len(), 1, "expected 1 row, got {}", col.len());
-        let val = col
-            .iter()
+
+        col.iter()
             .next()
             .expect("had value")
-            .expect("expected non null");
-        val
+            .expect("expected non null")
     }
 
     // Test inserting a single batch of data into a single partition
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 37b9663111a53..aefda64d39367 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -31,7 +31,7 @@ mod view_test;
 
 // backwards compatibility
 pub use self::default_table_source::{
-    provider_as_source, source_as_provider, DefaultTableSource,
+    DefaultTableSource, provider_as_source, source_as_provider,
 };
 pub use self::memory::MemTable;
 pub use self::view::ViewTable;
@@ -53,32 +53,34 @@ pub use datafusion_physical_expr::create_ordering;
 mod tests {
 
     use crate::prelude::SessionContext;
-    use ::object_store::{path::Path, ObjectMeta};
+    use ::object_store::{ObjectMeta, path::Path};
     use arrow::{
-        array::{Int32Array, StringArray},
+        array::Int32Array,
         datatypes::{DataType, Field, Schema, SchemaRef},
         record_batch::RecordBatch,
     };
-    use datafusion_common::{record_batch, test_util::batches_to_sort_string};
+    use datafusion_common::{
+        Result, ScalarValue,
+        test_util::batches_to_sort_string,
+        tree_node::{Transformed, TransformedResult, TreeNode},
+    };
     use datafusion_datasource::{
-        file::FileSource,
-        file_scan_config::FileScanConfigBuilder,
-        schema_adapter::{
-            DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
-            SchemaMapper,
-        },
-        source::DataSourceExec,
-        PartitionedFile,
+        PartitionedFile, file_scan_config::FileScanConfigBuilder, source::DataSourceExec,
     };
     use datafusion_datasource_parquet::source::ParquetSource;
+    use datafusion_physical_expr::expressions::{Column, Literal};
+    use datafusion_physical_expr_adapter::{
+        PhysicalExprAdapter, PhysicalExprAdapterFactory,
+    };
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_plan::collect;
     use std::{fs, sync::Arc};
     use tempfile::TempDir;
 
     #[tokio::test]
-    async fn can_override_schema_adapter() {
-        // Test shows that SchemaAdapter can add a column that doesn't existing in the
-        // record batches returned from parquet.  This can be useful for schema evolution
+    async fn can_override_physical_expr_adapter() {
+        // Test shows that PhysicalExprAdapter can add a column that doesn't exist in the
+        // record batches returned from parquet. This can be useful for schema evolution
         // where older files may not have all columns.
 
         use datafusion_execution::object_store::ObjectStoreUrl;
@@ -124,16 +126,12 @@ mod tests {
         let f2 = Field::new("extra_column", DataType::Utf8, true);
 
         let schema = Arc::new(Schema::new(vec![f1.clone(), f2.clone()]));
-        let source = ParquetSource::default()
-            .with_schema_adapter_factory(Arc::new(TestSchemaAdapterFactory {}))
-            .unwrap();
-        let base_conf = FileScanConfigBuilder::new(
-            ObjectStoreUrl::local_filesystem(),
-            schema,
-            source,
-        )
-        .with_file(partitioned_file)
-        .build();
+        let source = Arc::new(ParquetSource::new(Arc::clone(&schema)));
+        let base_conf =
+            FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+                .with_file(partitioned_file)
+                .with_expr_adapter(Some(Arc::new(TestPhysicalExprAdapterFactory)))
+                .build();
 
         let parquet_exec = DataSourceExec::from_data_source(base_conf);
 
@@ -141,134 +139,52 @@ mod tests {
         let task_ctx = session_ctx.task_ctx();
         let read = collect(parquet_exec, task_ctx).await.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&read),@r"
         +----+--------------+
         | id | extra_column |
         +----+--------------+
         | 1  | foo          |
         +----+--------------+
-        "###);
-    }
-
-    #[test]
-    fn default_schema_adapter() {
-        let table_schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-        ]);
-
-        // file has a subset of the table schema fields and different type
-        let file_schema = Schema::new(vec![
-            Field::new("c", DataType::Float64, true), // not in table schema
-            Field::new("b", DataType::Float64, true),
-        ]);
-
-        let adapter = DefaultSchemaAdapterFactory::from_schema(Arc::new(table_schema));
-        let (mapper, indices) = adapter.map_schema(&file_schema).unwrap();
-        assert_eq!(indices, vec![1]);
-
-        let file_batch = record_batch!(("b", Float64, vec![1.0, 2.0])).unwrap();
-
-        let mapped_batch = mapper.map_batch(file_batch).unwrap();
-
-        // the mapped batch has the correct schema and the "b" column has been cast to Utf8
-        let expected_batch = record_batch!(
-            ("a", Int32, vec![None, None]), // missing column filled with nulls
-            ("b", Utf8, vec!["1.0", "2.0"])  // b was cast to string and order was changed
-        )
-        .unwrap();
-        assert_eq!(mapped_batch, expected_batch);
-    }
-
-    #[test]
-    fn default_schema_adapter_non_nullable_columns() {
-        let table_schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false), // "a"" is declared non nullable
-            Field::new("b", DataType::Utf8, true),
-        ]);
-        let file_schema = Schema::new(vec![
-            // since file doesn't have "a" it will be filled with nulls
-            Field::new("b", DataType::Float64, true),
-        ]);
-
-        let adapter = DefaultSchemaAdapterFactory::from_schema(Arc::new(table_schema));
-        let (mapper, indices) = adapter.map_schema(&file_schema).unwrap();
-        assert_eq!(indices, vec![0]);
-
-        let file_batch = record_batch!(("b", Float64, vec![1.0, 2.0])).unwrap();
-
-        // Mapping fails because it tries to fill in a non-nullable column with nulls
-        let err = mapper.map_batch(file_batch).unwrap_err().to_string();
-        assert!(err.contains("Invalid argument error: Column 'a' is declared as non-nullable but contains null values"), "{err}");
+        ");
     }
 
     #[derive(Debug)]
-    struct TestSchemaAdapterFactory;
+    struct TestPhysicalExprAdapterFactory;
 
-    impl SchemaAdapterFactory for TestSchemaAdapterFactory {
+    impl PhysicalExprAdapterFactory for TestPhysicalExprAdapterFactory {
         fn create(
             &self,
-            projected_table_schema: SchemaRef,
-            _table_schema: SchemaRef,
-        ) -> Box<dyn SchemaAdapter> {
-            Box::new(TestSchemaAdapter {
-                table_schema: projected_table_schema,
+            _logical_file_schema: SchemaRef,
+            physical_file_schema: SchemaRef,
+        ) -> Arc<dyn PhysicalExprAdapter> {
+            Arc::new(TestPhysicalExprAdapter {
+                physical_file_schema,
             })
         }
     }
 
-    struct TestSchemaAdapter {
-        /// Schema for the table
-        table_schema: SchemaRef,
+    #[derive(Debug)]
+    struct TestPhysicalExprAdapter {
+        physical_file_schema: SchemaRef,
     }
 
-    impl SchemaAdapter for TestSchemaAdapter {
-        fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-            let field = self.table_schema.field(index);
-            Some(file_schema.fields.find(field.name())?.0)
-        }
-
-        fn map_schema(
-            &self,
-            file_schema: &Schema,
-        ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-            let mut projection = Vec::with_capacity(file_schema.fields().len());
-
-            for (file_idx, file_field) in file_schema.fields.iter().enumerate() {
-                if self.table_schema.fields().find(file_field.name()).is_some() {
-                    projection.push(file_idx);
+    impl PhysicalExprAdapter for TestPhysicalExprAdapter {
+        fn rewrite(&self, expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
+            expr.transform(|e| {
+                if let Some(column) = e.as_any().downcast_ref::<Column>() {
+                    // If column is "extra_column" and missing from physical schema, inject "foo"
+                    if column.name() == "extra_column"
+                        && self.physical_file_schema.index_of("extra_column").is_err()
+                    {
+                        return Ok(Transformed::yes(Arc::new(Literal::new(
+                            ScalarValue::Utf8(Some("foo".to_string())),
+                        ))
+                            as Arc<dyn PhysicalExpr>));
+                    }
                 }
-            }
-
-            Ok((Arc::new(TestSchemaMapping {}), projection))
-        }
-    }
-
-    #[derive(Debug)]
-    struct TestSchemaMapping {}
-
-    impl SchemaMapper for TestSchemaMapping {
-        fn map_batch(
-            &self,
-            batch: RecordBatch,
-        ) -> datafusion_common::Result<RecordBatch> {
-            let f1 = Field::new("id", DataType::Int32, true);
-            let f2 = Field::new("extra_column", DataType::Utf8, true);
-
-            let schema = Arc::new(Schema::new(vec![f1, f2]));
-
-            let extra_column = Arc::new(StringArray::from(vec!["foo"]));
-            let mut new_columns = batch.columns().to_vec();
-            new_columns.push(extra_column);
-
-            Ok(RecordBatch::try_new(schema, new_columns).unwrap())
-        }
-
-        fn map_column_statistics(
-            &self,
-            _file_col_statistics: &[datafusion_common::ColumnStatistics],
-        ) -> datafusion_common::Result<Vec<datafusion_common::ColumnStatistics>> {
-            unimplemented!()
+                Ok(Transformed::no(e))
+            })
+            .data()
         }
     }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 9068c9758179d..2954a47403299 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -31,21 +31,21 @@ mod tests {
     use crate::test::object_store::local_unpartitioned_file;
     use arrow::datatypes::{DataType, Field, SchemaBuilder};
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::{test_util, Result, ScalarValue};
+    use datafusion_common::{Result, ScalarValue, test_util};
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-    use datafusion_datasource::PartitionedFile;
-    use datafusion_datasource_avro::source::AvroSource;
+    use datafusion_datasource::{PartitionedFile, TableSchema};
     use datafusion_datasource_avro::AvroFormat;
+    use datafusion_datasource_avro::source::AvroSource;
     use datafusion_execution::object_store::ObjectStoreUrl;
     use datafusion_physical_plan::ExecutionPlan;
 
     use datafusion_datasource::source::DataSourceExec;
     use futures::StreamExt;
     use insta::assert_snapshot;
+    use object_store::ObjectStore;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
-    use object_store::ObjectStore;
     use rstest::*;
     use url::Url;
 
@@ -81,15 +81,11 @@ mod tests {
             .infer_schema(&state, &store, std::slice::from_ref(&meta))
             .await?;
 
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(
-            ObjectStoreUrl::local_filesystem(),
-            file_schema,
-            source,
-        )
-        .with_file(meta.into())
-        .with_projection_indices(Some(vec![0, 1, 2]))
-        .build();
+        let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_file(meta.into())
+            .with_projection_indices(Some(vec![0, 1, 2]))?
+            .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
         assert_eq!(
@@ -109,20 +105,20 @@ mod tests {
             .expect("plan iterator empty")
             .expect("plan iterator returned an error");
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r###"
-            +----+----------+-------------+
-            | id | bool_col | tinyint_col |
-            +----+----------+-------------+
-            | 4  | true     | 0           |
-            | 5  | false    | 1           |
-            | 6  | true     | 0           |
-            | 7  | false    | 1           |
-            | 2  | true     | 0           |
-            | 3  | false    | 1           |
-            | 0  | true     | 0           |
-            | 1  | false    | 1           |
-            +----+----------+-------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r"
+        +----+----------+-------------+
+        | id | bool_col | tinyint_col |
+        +----+----------+-------------+
+        | 4  | true     | 0           |
+        | 5  | false    | 1           |
+        | 6  | true     | 0           |
+        | 7  | false    | 1           |
+        | 2  | true     | 0           |
+        | 3  | false    | 1           |
+        | 0  | true     | 0           |
+        | 1  | false    | 1           |
+        +----+----------+-------------+
+        ");}
 
         let batch = results.next().await;
         assert!(batch.is_none());
@@ -157,10 +153,10 @@ mod tests {
         // Include the missing column in the projection
         let projection = Some(vec![0, 1, 2, actual_schema.fields().len()]);
 
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file(meta.into())
-            .with_projection_indices(projection)
+            .with_projection_indices(projection)?
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
@@ -182,20 +178,20 @@ mod tests {
             .expect("plan iterator empty")
             .expect("plan iterator returned an error");
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r###"
-            +----+----------+-------------+-------------+
-            | id | bool_col | tinyint_col | missing_col |
-            +----+----------+-------------+-------------+
-            | 4  | true     | 0           |             |
-            | 5  | false    | 1           |             |
-            | 6  | true     | 0           |             |
-            | 7  | false    | 1           |             |
-            | 2  | true     | 0           |             |
-            | 3  | false    | 1           |             |
-            | 0  | true     | 0           |             |
-            | 1  | false    | 1           |             |
-            +----+----------+-------------+-------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r"
+        +----+----------+-------------+-------------+
+        | id | bool_col | tinyint_col | missing_col |
+        +----+----------+-------------+-------------+
+        | 4  | true     | 0           |             |
+        | 5  | false    | 1           |             |
+        | 6  | true     | 0           |             |
+        | 7  | false    | 1           |             |
+        | 2  | true     | 0           |             |
+        | 3  | false    | 1           |             |
+        | 0  | true     | 0           |             |
+        | 1  | false    | 1           |             |
+        +----+----------+-------------+-------------+
+        ");}
 
         let batch = results.next().await;
         assert!(batch.is_none());
@@ -227,13 +223,16 @@ mod tests {
         partitioned_file.partition_values = vec![ScalarValue::from("2021-10-26")];
 
         let projection = Some(vec![0, 1, file_schema.fields().len(), 2]);
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let table_schema = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+        );
+        let source = Arc::new(AvroSource::new(table_schema.clone()));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             // select specific columns of the files as well as the partitioning
             // column which is supposed to be the last column in the table schema.
-            .with_projection_indices(projection)
+            .with_projection_indices(projection)?
             .with_file(partitioned_file)
-            .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
@@ -256,20 +255,20 @@ mod tests {
             .expect("plan iterator empty")
             .expect("plan iterator returned an error");
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r###"
-            +----+----------+------------+-------------+
-            | id | bool_col | date       | tinyint_col |
-            +----+----------+------------+-------------+
-            | 4  | true     | 2021-10-26 | 0           |
-            | 5  | false    | 2021-10-26 | 1           |
-            | 6  | true     | 2021-10-26 | 0           |
-            | 7  | false    | 2021-10-26 | 1           |
-            | 2  | true     | 2021-10-26 | 0           |
-            | 3  | false    | 2021-10-26 | 1           |
-            | 0  | true     | 2021-10-26 | 0           |
-            | 1  | false    | 2021-10-26 | 1           |
-            +----+----------+------------+-------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r"
+        +----+----------+------------+-------------+
+        | id | bool_col | date       | tinyint_col |
+        +----+----------+------------+-------------+
+        | 4  | true     | 2021-10-26 | 0           |
+        | 5  | false    | 2021-10-26 | 1           |
+        | 6  | true     | 2021-10-26 | 0           |
+        | 7  | false    | 2021-10-26 | 1           |
+        | 2  | true     | 2021-10-26 | 0           |
+        | 3  | false    | 2021-10-26 | 1           |
+        | 0  | true     | 2021-10-26 | 0           |
+        | 1  | false    | 2021-10-26 | 1           |
+        +----+----------+------------+-------------+
+        ");}
 
         let batch = results.next().await;
         assert!(batch.is_none());
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 4f46a57d8b137..0e40ed2df2066 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -29,18 +29,21 @@ mod tests {
     use std::io::Write;
     use std::sync::Arc;
 
+    use datafusion_datasource::TableSchema;
     use datafusion_datasource_csv::CsvFormat;
     use object_store::ObjectStore;
 
+    use crate::datasource::file_format::FileFormat;
     use crate::prelude::CsvReadOptions;
     use crate::prelude::SessionContext;
     use crate::test::partitioned_file_groups;
+    use datafusion_common::config::CsvOptions;
     use datafusion_common::test_util::arrow_test_data;
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::{assert_batches_eq, Result};
+    use datafusion_common::{Result, assert_batches_eq};
     use datafusion_execution::config::SessionConfig;
-    use datafusion_physical_plan::metrics::MetricsSet;
     use datafusion_physical_plan::ExecutionPlan;
+    use datafusion_physical_plan::metrics::MetricsSet;
 
     #[cfg(feature = "compression")]
     use datafusion_datasource::file_compression_type::FileCompressionType;
@@ -94,32 +97,39 @@ mod tests {
     async fn csv_exec_with_projection(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema();
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_file_compression_type(file_compression_type)
-        .with_newlines_in_values(false)
-        .with_projection_indices(Some(vec![0, 2, 4]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type)
+                .with_projection_indices(Some(vec![0, 2, 4]))?
+                .build();
 
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
@@ -131,17 +141,17 @@ mod tests {
         assert_eq!(3, batch.num_columns());
         assert_eq!(100, batch.num_rows());
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r###"
-            +----+-----+------------+
-            | c1 | c3  | c5         |
-            +----+-----+------------+
-            | c  | 1   | 2033001162 |
-            | d  | -40 | 706441268  |
-            | b  | 29  | 994303988  |
-            | a  | -85 | 1171968280 |
-            | b  | -82 | 1824882165 |
-            +----+-----+------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r"
+        +----+-----+------------+
+        | c1 | c3  | c5         |
+        +----+-----+------------+
+        | c  | 1   | 2033001162 |
+        | d  | -40 | 706441268  |
+        | b  | 29  | 994303988  |
+        | a  | -85 | 1171968280 |
+        | b  | -82 | 1824882165 |
+        +----+-----+------------+
+        ");}
         Ok(())
     }
 
@@ -158,6 +168,8 @@ mod tests {
     async fn csv_exec_with_mixed_order_projection(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
         let session_ctx = SessionContext::new_with_config(cfg);
         let task_ctx = session_ctx.task_ctx();
@@ -165,26 +177,31 @@ mod tests {
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_projection_indices(Some(vec![4, 0, 2]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_projection_indices(Some(vec![4, 0, 2]))?
+                .build();
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(3, csv.schema().fields().len());
@@ -194,17 +211,17 @@ mod tests {
         assert_eq!(3, batch.num_columns());
         assert_eq!(100, batch.num_rows());
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r###"
-            +------------+----+-----+
-            | c5         | c1 | c3  |
-            +------------+----+-----+
-            | 2033001162 | c  | 1   |
-            | 706441268  | d  | -40 |
-            | 994303988  | b  | 29  |
-            | 1171968280 | a  | -85 |
-            | 1824882165 | b  | -82 |
-            +------------+----+-----+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r"
+        +------------+----+-----+
+        | c5         | c1 | c3  |
+        +------------+----+-----+
+        | 2033001162 | c  | 1   |
+        | 706441268  | d  | -40 |
+        | 994303988  | b  | 29  |
+        | 1171968280 | a  | -85 |
+        | 1824882165 | b  | -82 |
+        +------------+----+-----+
+        ");}
         Ok(())
     }
 
@@ -221,6 +238,7 @@ mod tests {
     async fn csv_exec_with_limit(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
         use futures::StreamExt;
 
         let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
@@ -230,26 +248,31 @@ mod tests {
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_limit(Some(5))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_limit(Some(5))
+                .build();
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(13, csv.schema().fields().len());
@@ -259,17 +282,17 @@ mod tests {
         assert_eq!(13, batch.num_columns());
         assert_eq!(5, batch.num_rows());
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r###"
-            +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
-            | c1 | c2 | c3  | c4     | c5         | c6                   | c7  | c8    | c9         | c10                  | c11         | c12                 | c13                            |
-            +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
-            | c  | 2  | 1   | 18109  | 2033001162 | -6513304855495910254 | 25  | 43062 | 1491205016 | 5863949479783605708  | 0.110830784 | 0.9294097332465232  | 6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW |
-            | d  | 5  | -40 | 22614  | 706441268  | -7542719935673075327 | 155 | 14337 | 3373581039 | 11720144131976083864 | 0.69632107  | 0.3114712539863804  | C2GT5KVyOPZpgKVl110TyZO0NcJ434 |
-            | b  | 1  | 29  | -18218 | 994303988  | 5983957848665088916  | 204 | 9489  | 3275293996 | 14857091259186476033 | 0.53840446  | 0.17909035118828576 | AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz |
-            | a  | 1  | -85 | -15154 | 1171968280 | 1919439543497968449  | 77  | 52286 | 774637006  | 12101411955859039553 | 0.12285209  | 0.6864391962767343  | 0keZ5G8BffGwgF2RwQD59TFzMStxCB |
-            | b  | 5  | -82 | 22080  | 1824882165 | 7373730676428214987  | 208 | 34331 | 3342719438 | 3330177516592499461  | 0.82634634  | 0.40975383525297016 | Ig1QcuKsjHXkproePdERo2w0mYzIqd |
-            +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch]), @r"
+        +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
+        | c1 | c2 | c3  | c4     | c5         | c6                   | c7  | c8    | c9         | c10                  | c11         | c12                 | c13                            |
+        +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
+        | c  | 2  | 1   | 18109  | 2033001162 | -6513304855495910254 | 25  | 43062 | 1491205016 | 5863949479783605708  | 0.110830784 | 0.9294097332465232  | 6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW |
+        | d  | 5  | -40 | 22614  | 706441268  | -7542719935673075327 | 155 | 14337 | 3373581039 | 11720144131976083864 | 0.69632107  | 0.3114712539863804  | C2GT5KVyOPZpgKVl110TyZO0NcJ434 |
+        | b  | 1  | 29  | -18218 | 994303988  | 5983957848665088916  | 204 | 9489  | 3275293996 | 14857091259186476033 | 0.53840446  | 0.17909035118828576 | AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz |
+        | a  | 1  | -85 | -15154 | 1171968280 | 1919439543497968449  | 77  | 52286 | 774637006  | 12101411955859039553 | 0.12285209  | 0.6864391962767343  | 0keZ5G8BffGwgF2RwQD59TFzMStxCB |
+        | b  | 5  | -82 | 22080  | 1824882165 | 7373730676428214987  | 208 | 34331 | 3342719438 | 3330177516592499461  | 0.82634634  | 0.40975383525297016 | Ig1QcuKsjHXkproePdERo2w0mYzIqd |
+        +----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+
+        ");}
 
         Ok(())
     }
@@ -287,32 +310,39 @@ mod tests {
     async fn csv_exec_with_missing_column(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema_with_missing_col();
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_limit(Some(5))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_limit(Some(5))
+                .build();
         assert_eq!(14, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(14, csv.schema().fields().len());
@@ -341,6 +371,7 @@ mod tests {
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
         use datafusion_common::ScalarValue;
+        use datafusion_datasource::TableSchema;
 
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
@@ -348,12 +379,13 @@ mod tests {
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let mut file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
@@ -362,19 +394,25 @@ mod tests {
 
         let num_file_schema_fields = file_schema.fields().len();
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
-        // We should be able to project on the partition column
-        // Which is supposed to be after the file fields
-        .with_projection_indices(Some(vec![0, num_file_schema_fields]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::new(
+            Arc::clone(&file_schema),
+            vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+        );
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type.to_owned())
+                // We should be able to project on the partition column
+                // Which is supposed to be after the file fields
+                .with_projection_indices(Some(vec![0, num_file_schema_fields]))?
+                .build();
 
         // we don't have `/date=xx/` in the path but that is ok because
         // partitions are resolved during scan anyway
@@ -388,17 +426,17 @@ mod tests {
         assert_eq!(2, batch.num_columns());
         assert_eq!(100, batch.num_rows());
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r###"
-            +----+------------+
-            | c1 | date       |
-            +----+------------+
-            | c  | 2021-10-26 |
-            | d  | 2021-10-26 |
-            | b  | 2021-10-26 |
-            | a  | 2021-10-26 |
-            | b  | 2021-10-26 |
-            +----+------------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&[batch.slice(0, 5)]), @r"
+        +----+------------+
+        | c1 | date       |
+        +----+------------+
+        | c  | 2021-10-26 |
+        | d  | 2021-10-26 |
+        | b  | 2021-10-26 |
+        | a  | 2021-10-26 |
+        | b  | 2021-10-26 |
+        +----+------------+
+        ");}
 
         let metrics = csv.metrics().expect("doesn't found metrics");
         let time_elapsed_processing = get_value(&metrics, "time_elapsed_processing");
@@ -452,26 +490,31 @@ mod tests {
         let path = format!("{}/csv", arrow_test_data());
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
+        let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
 
         let file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
-            Arc::new(CsvFormat::default()),
+            &csv_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )
         .unwrap();
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .build();
         let csv = DataSourceExec::from_data_source(config);
 
         let it = csv.execute(0, task_ctx).unwrap();
@@ -527,14 +570,14 @@ mod tests {
 
         let result = df.collect().await.unwrap();
 
-        assert_snapshot!(batches_to_string(&result), @r###"
-            +---+---+
-            | a | b |
-            +---+---+
-            | 1 | 2 |
-            | 3 | 4 |
-            +---+---+
-        "###);
+        assert_snapshot!(batches_to_string(&result), @r"
+        +---+---+
+        | a | b |
+        +---+---+
+        | 1 | 2 |
+        | 3 | 4 |
+        +---+---+
+        ");
     }
 
     #[tokio::test]
@@ -556,14 +599,14 @@ mod tests {
 
         let result = df.collect().await.unwrap();
 
-        assert_snapshot!(batches_to_string(&result),@r###"
-            +---+---+
-            | a | b |
-            +---+---+
-            | 1 | 2 |
-            | 3 | 4 |
-            +---+---+
-        "###);
+        assert_snapshot!(batches_to_string(&result),@r"
+        +---+---+
+        | a | b |
+        +---+---+
+        | 1 | 2 |
+        | 3 | 4 |
+        +---+---+
+        ");
 
         let e = session_ctx
             .read_csv("memory:///", CsvReadOptions::new().terminator(Some(b'\n')))
@@ -572,7 +615,10 @@ mod tests {
             .collect()
             .await
             .unwrap_err();
-        assert_eq!(e.strip_backtrace(), "Arrow error: Csv error: incorrect number of fields for line 1, expected 2 got more than 2")
+        assert_eq!(
+            e.strip_backtrace(),
+            "Arrow error: Csv error: incorrect number of fields for line 1, expected 2 got more than 2"
+        )
     }
 
     #[tokio::test]
@@ -593,22 +639,22 @@ mod tests {
         .await?;
 
         let df = ctx.sql(r#"select * from t1"#).await?.collect().await?;
-        assert_snapshot!(batches_to_string(&df),@r###"
-            +------+--------+
-            | col1 | col2   |
-            +------+--------+
-            | id0  | value0 |
-            | id1  | value1 |
-            | id2  | value2 |
-            | id3  | value3 |
-            +------+--------+
-        "###);
+        assert_snapshot!(batches_to_string(&df),@r"
+        +------+--------+
+        | col1 | col2   |
+        +------+--------+
+        | id0  | value0 |
+        | id1  | value1 |
+        | id2  | value2 |
+        | id3  | value3 |
+        +------+--------+
+        ");
         Ok(())
     }
 
     #[tokio::test]
-    async fn test_create_external_table_with_terminator_with_newlines_in_values(
-    ) -> Result<()> {
+    async fn test_create_external_table_with_terminator_with_newlines_in_values()
+    -> Result<()> {
         let ctx = SessionContext::new();
         ctx.sql(r#"
             CREATE EXTERNAL TABLE t1 (
@@ -658,7 +704,10 @@ mod tests {
             )
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
+        assert_eq!(
+            e.strip_backtrace(),
+            "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"
+        );
         Ok(())
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index f7d5c710bf48a..8de6a60258f08 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -34,9 +34,9 @@ mod tests {
     use crate::execution::SessionState;
     use crate::prelude::{CsvReadOptions, NdJsonReadOptions, SessionContext};
     use crate::test::partitioned_file_groups;
+    use datafusion_common::Result;
     use datafusion_common::cast::{as_int32_array, as_int64_array, as_string_array};
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::Result;
     use datafusion_datasource::file_compression_type::FileCompressionType;
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource_json::JsonFormat;
@@ -51,9 +51,9 @@ mod tests {
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
     use datafusion_datasource::source::DataSourceExec;
     use insta::assert_snapshot;
+    use object_store::ObjectStore;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
-    use object_store::ObjectStore;
     use rstest::*;
     use tempfile::TempDir;
     use url::Url;
@@ -69,11 +69,13 @@ mod tests {
         let store = state.runtime_env().object_store(&store_url).unwrap();
 
         let filename = "1.json";
+        let json_format: Arc<dyn FileFormat> = Arc::new(JsonFormat::default());
+
         let file_groups = partitioned_file_groups(
             TEST_DATA_BASE,
             filename,
             1,
-            Arc::new(JsonFormat::default()),
+            &json_format,
             file_compression_type.to_owned(),
             work_dir,
         )
@@ -104,11 +106,13 @@ mod tests {
         ctx.register_object_store(&url, store.clone());
         let filename = "1.json";
         let tmp_dir = TempDir::new()?;
+        let json_format: Arc<dyn FileFormat> = Arc::new(JsonFormat::default());
+
         let file_groups = partitioned_file_groups(
             TEST_DATA_BASE,
             filename,
             1,
-            Arc::new(JsonFormat::default()),
+            &json_format,
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )
@@ -138,16 +142,16 @@ mod tests {
         let frame = ctx.read_json(path, read_options).await.unwrap();
         let results = frame.collect().await.unwrap();
 
-        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&results), @r###"
-            +-----+------------------+---------------+------+
-            | a   | b                | c             | d    |
-            +-----+------------------+---------------+------+
-            | 1   | [2.0, 1.3, -6.1] | [false, true] | 4    |
-            | -10 | [2.0, 1.3, -6.1] | [true, true]  | 4    |
-            | 2   | [2.0, , -6.1]    | [false, ]     | text |
-            |     |                  |               |      |
-            +-----+------------------+---------------+------+
-        "###);}
+        insta::allow_duplicates! {assert_snapshot!(batches_to_string(&results), @r"
+        +-----+------------------+---------------+------+
+        | a   | b                | c             | d    |
+        +-----+------------------+---------------+------+
+        | 1   | [2.0, 1.3, -6.1] | [false, true] | 4    |
+        | -10 | [2.0, 1.3, -6.1] | [true, true]  | 4    |
+        | 2   | [2.0, , -6.1]    | [false, ]     | text |
+        |     |                  |               |      |
+        +-----+------------------+---------------+------+
+        ");}
 
         Ok(())
     }
@@ -176,8 +180,8 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_limit(Some(3))
             .with_file_compression_type(file_compression_type.to_owned())
@@ -251,8 +255,8 @@ mod tests {
         let file_schema = Arc::new(builder.finish());
         let missing_field_idx = file_schema.fields.len() - 1;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_limit(Some(3))
             .with_file_compression_type(file_compression_type.to_owned())
@@ -294,10 +298,11 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_projection_indices(Some(vec![0, 2]))
+            .unwrap()
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
@@ -342,10 +347,10 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
-            .with_projection_indices(Some(vec![3, 0, 2]))
+            .with_projection_indices(Some(vec![3, 0, 2]))?
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
@@ -494,7 +499,10 @@ mod tests {
             .write_json(out_dir_url, DataFrameWriteOptions::new(), None)
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
+        assert_eq!(
+            e.strip_backtrace(),
+            "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"
+        );
         Ok(())
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 1ac292e260fdf..04c8ea129d05c 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -43,146 +43,11 @@ pub use datafusion_datasource::file::FileSource;
 pub use datafusion_datasource::file_groups::FileGroup;
 pub use datafusion_datasource::file_groups::FileGroupPartitioner;
 pub use datafusion_datasource::file_scan_config::{
-    wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig,
-    FileScanConfigBuilder,
+    FileScanConfig, FileScanConfigBuilder, wrap_partition_type_in_dict,
+    wrap_partition_value_in_dict,
 };
 pub use datafusion_datasource::file_sink_config::*;
 
 pub use datafusion_datasource::file_stream::{
     FileOpenFuture, FileOpener, FileStream, OnError,
 };
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{
-        cast::AsArray,
-        types::{Float32Type, Float64Type, UInt32Type},
-        BinaryArray, BooleanArray, Float32Array, Int32Array, Int64Array, RecordBatch,
-        StringArray, UInt64Array,
-    };
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow_schema::SchemaRef;
-
-    use crate::datasource::schema_adapter::{
-        DefaultSchemaAdapterFactory, SchemaAdapterFactory,
-    };
-
-    #[test]
-    fn schema_mapping_map_batch() {
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Utf8, true),
-            Field::new("c2", DataType::UInt32, true),
-            Field::new("c3", DataType::Float64, true),
-        ]));
-
-        let adapter = DefaultSchemaAdapterFactory
-            .create(table_schema.clone(), table_schema.clone());
-
-        let file_schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, true),
-            Field::new("c2", DataType::UInt64, true),
-            Field::new("c3", DataType::Float32, true),
-        ]);
-
-        let (mapping, _) = adapter.map_schema(&file_schema).expect("map schema failed");
-
-        let c1 = StringArray::from(vec!["hello", "world"]);
-        let c2 = UInt64Array::from(vec![9_u64, 5_u64]);
-        let c3 = Float32Array::from(vec![2.0_f32, 7.0_f32]);
-        let batch = RecordBatch::try_new(
-            Arc::new(file_schema),
-            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3)],
-        )
-        .unwrap();
-
-        let mapped_batch = mapping.map_batch(batch).unwrap();
-
-        assert_eq!(mapped_batch.schema(), table_schema);
-        assert_eq!(mapped_batch.num_columns(), 3);
-        assert_eq!(mapped_batch.num_rows(), 2);
-
-        let c1 = mapped_batch.column(0).as_string::<i32>();
-        let c2 = mapped_batch.column(1).as_primitive::<UInt32Type>();
-        let c3 = mapped_batch.column(2).as_primitive::<Float64Type>();
-
-        assert_eq!(c1.value(0), "hello");
-        assert_eq!(c1.value(1), "world");
-        assert_eq!(c2.value(0), 9_u32);
-        assert_eq!(c2.value(1), 5_u32);
-        assert_eq!(c3.value(0), 2.0_f64);
-        assert_eq!(c3.value(1), 7.0_f64);
-    }
-
-    #[test]
-    fn schema_adapter_map_schema_with_projection() {
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("c0", DataType::Utf8, true),
-            Field::new("c1", DataType::Utf8, true),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::Int32, true),
-            Field::new("c4", DataType::Float32, true),
-        ]));
-
-        let file_schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, true),
-            Field::new("c1", DataType::Boolean, true),
-            Field::new("c2", DataType::Float32, true),
-            Field::new("c3", DataType::Binary, true),
-            Field::new("c4", DataType::Int64, true),
-        ]);
-
-        let indices = vec![1, 2, 4];
-        let schema = SchemaRef::from(table_schema.project(&indices).unwrap());
-        let adapter = DefaultSchemaAdapterFactory.create(schema, table_schema.clone());
-        let (mapping, projection) = adapter.map_schema(&file_schema).unwrap();
-
-        let id = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
-        let c1 = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
-        let c2 = Float32Array::from(vec![Some(2.0_f32), Some(7.0_f32), Some(3.0_f32)]);
-        let c3 = BinaryArray::from_opt_vec(vec![
-            Some(b"hallo"),
-            Some(b"danke"),
-            Some(b"super"),
-        ]);
-        let c4 = Int64Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(
-            Arc::new(file_schema),
-            vec![
-                Arc::new(id),
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-            ],
-        )
-        .unwrap();
-        let rows_num = batch.num_rows();
-        let projected = batch.project(&projection).unwrap();
-        let mapped_batch = mapping.map_batch(projected).unwrap();
-
-        assert_eq!(
-            mapped_batch.schema(),
-            Arc::new(table_schema.project(&indices).unwrap())
-        );
-        assert_eq!(mapped_batch.num_columns(), indices.len());
-        assert_eq!(mapped_batch.num_rows(), rows_num);
-
-        let c1 = mapped_batch.column(0).as_string::<i32>();
-        let c2 = mapped_batch.column(1).as_primitive::<Float64Type>();
-        let c4 = mapped_batch.column(2).as_primitive::<Float32Type>();
-
-        assert_eq!(c1.value(0), "true");
-        assert_eq!(c1.value(1), "false");
-        assert_eq!(c1.value(2), "true");
-
-        assert_eq!(c2.value(0), 2.0_f64);
-        assert_eq!(c2.value(1), 7.0_f64);
-        assert_eq!(c2.value(2), 3.0_f64);
-
-        assert_eq!(c4.value(0), 1.0_f32);
-        assert_eq!(c4.value(1), 2.0_f32);
-        assert_eq!(c4.value(2), 3.0_f32);
-    }
-}
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index 0ffb252a66052..4703b55ecc0de 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -38,7 +38,7 @@ mod tests {
     use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
     use crate::test::object_store::local_unpartitioned_file;
     use arrow::array::{
-        ArrayRef, AsArray, Date64Array, Int32Array, Int64Array, Int8Array, StringArray,
+        ArrayRef, AsArray, Date64Array, Int8Array, Int32Array, Int64Array, StringArray,
         StringViewArray, StructArray, TimestampNanosecondArray,
     };
     use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaBuilder};
@@ -48,7 +48,7 @@ mod tests {
     use bytes::{BufMut, BytesMut};
     use datafusion_common::config::TableParquetOptions;
     use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
-    use datafusion_common::{assert_contains, Result, ScalarValue};
+    use datafusion_common::{Result, ScalarValue, assert_contains};
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
     use datafusion_datasource::source::DataSourceExec;
@@ -60,7 +60,7 @@ mod tests {
         DefaultParquetFileReaderFactory, ParquetFileReaderFactory, ParquetFormat,
     };
     use datafusion_execution::object_store::ObjectStoreUrl;
-    use datafusion_expr::{col, lit, when, Expr};
+    use datafusion_expr::{Expr, col, lit, when};
     use datafusion_physical_expr::planner::logical2physical;
     use datafusion_physical_plan::analyze::AnalyzeExec;
     use datafusion_physical_plan::collect;
@@ -161,7 +161,7 @@ mod tests {
                 .as_ref()
                 .map(|p| logical2physical(p, &table_schema));
 
-            let mut source = ParquetSource::default();
+            let mut source = ParquetSource::new(table_schema);
             if let Some(predicate) = predicate {
                 source = source.with_predicate(predicate);
             }
@@ -186,23 +186,20 @@ mod tests {
                 source = source.with_bloom_filter_on_read(false);
             }
 
-            source.with_schema(TableSchema::new(Arc::clone(&table_schema), vec![]))
+            Arc::new(source)
         }
 
         fn build_parquet_exec(
             &self,
-            file_schema: SchemaRef,
             file_group: FileGroup,
             source: Arc<dyn FileSource>,
         ) -> Arc<DataSourceExec> {
-            let base_config = FileScanConfigBuilder::new(
-                ObjectStoreUrl::local_filesystem(),
-                file_schema,
-                source,
-            )
-            .with_file_group(file_group)
-            .with_projection_indices(self.projection.clone())
-            .build();
+            let base_config =
+                FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+                    .with_file_group(file_group)
+                    .with_projection_indices(self.projection.clone())
+                    .unwrap()
+                    .build();
             DataSourceExec::from_data_source(base_config)
         }
 
@@ -231,11 +228,8 @@ mod tests {
 
             // build a ParquetExec to return the results
             let parquet_source = self.build_file_source(Arc::clone(table_schema));
-            let parquet_exec = self.build_parquet_exec(
-                Arc::clone(table_schema),
-                file_group.clone(),
-                Arc::clone(&parquet_source),
-            );
+            let parquet_exec =
+                self.build_parquet_exec(file_group.clone(), Arc::clone(&parquet_source));
 
             let analyze_exec = Arc::new(AnalyzeExec::new(
                 false,
@@ -243,7 +237,6 @@ mod tests {
                 vec![MetricType::SUMMARY, MetricType::DEV],
                 // use a new ParquetSource to avoid sharing execution metrics
                 self.build_parquet_exec(
-                    Arc::clone(table_schema),
                     file_group.clone(),
                     self.build_file_source(Arc::clone(table_schema)),
                 ),
@@ -313,7 +306,7 @@ mod tests {
 
         let batch = RecordBatch::try_new(file_schema.clone(), vec![c1]).unwrap();
 
-        // Since c2 is missing from the file and we didn't supply a custom `SchemaAdapterFactory`,
+        // Since c2 is missing from the file and we didn't supply a custom `PhysicalExprAdapterFactory`,
         // the default behavior is to fill in missing columns with nulls.
         // Thus this predicate will come back as false.
         let filter = col("c2").eq(lit(1_i32));
@@ -344,13 +337,13 @@ mod tests {
             .await;
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+
         | c1 | c2 |
         +----+----+
         | 1  |    |
         +----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -371,7 +364,7 @@ mod tests {
 
         let batch = RecordBatch::try_new(file_schema.clone(), vec![c1]).unwrap();
 
-        // Since c2 is missing from the file and we didn't supply a custom `SchemaAdapterFactory`,
+        // Since c2 is missing from the file and we didn't supply a custom `PhysicalExprAdapterFactory`,
         // the default behavior is to fill in missing columns with nulls.
         // Thus this predicate will come back as false.
         let filter = col("c2").eq(lit("abc"));
@@ -402,13 +395,13 @@ mod tests {
             .await;
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+
         | c1 | c2 |
         +----+----+
         | 1  |    |
         +----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -433,7 +426,7 @@ mod tests {
 
         let batch = RecordBatch::try_new(file_schema.clone(), vec![c1, c3]).unwrap();
 
-        // Since c2 is missing from the file and we didn't supply a custom `SchemaAdapterFactory`,
+        // Since c2 is missing from the file and we didn't supply a custom `PhysicalExprAdapterFactory`,
         // the default behavior is to fill in missing columns with nulls.
         // Thus this predicate will come back as false.
         let filter = col("c2").eq(lit("abc"));
@@ -464,13 +457,13 @@ mod tests {
             .await;
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+----+
         | c1 | c2 | c3 |
         +----+----+----+
         | 1  |    | 7  |
         +----+----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -495,7 +488,7 @@ mod tests {
         let batch =
             RecordBatch::try_new(file_schema.clone(), vec![c3.clone(), c3]).unwrap();
 
-        // Since c2 is missing from the file and we didn't supply a custom `SchemaAdapterFactory`,
+        // Since c2 is missing from the file and we didn't supply a custom `PhysicalExprAdapterFactory`,
         // the default behavior is to fill in missing columns with nulls.
         // Thus this predicate will come back as false.
         let filter = col("c2").eq(lit("abc"));
@@ -526,13 +519,13 @@ mod tests {
             .await;
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+----+
         | c1 | c2 | c3 |
         +----+----+----+
         |    |    | 7  |
         +----+----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -575,13 +568,13 @@ mod tests {
 
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+----+
         | c1 | c2 | c3 |
         +----+----+----+
         | 1  |    | 10 |
         +----+----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -605,7 +598,7 @@ mod tests {
 
         let batches = rt.batches.unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&batches),@r###"
+        insta::assert_snapshot!(batches_to_sort_string(&batches),@r"
         +----+----+----+
         | c1 | c2 | c3 |
         +----+----+----+
@@ -613,7 +606,7 @@ mod tests {
         | 4  |    | 40 |
         | 5  |    | 50 |
         +----+----+----+
-        "###);
+        ");
 
         let metrics = rt.parquet_exec.metrics().unwrap();
         let metric = get_value(&metrics, "pushdown_rows_pruned");
@@ -642,7 +635,7 @@ mod tests {
             .await
             .unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read), @r###"
+        insta::assert_snapshot!(batches_to_sort_string(&read), @r"
         +-----+----+----+
         | c1  | c2 | c3 |
         +-----+----+----+
@@ -656,7 +649,7 @@ mod tests {
         | bar |    |    |
         | bar |    |    |
         +-----+----+----+
-        "###);
+        ");
     }
 
     #[tokio::test]
@@ -757,18 +750,18 @@ mod tests {
             .await
             .unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read),@r###"
-            +-----+----+----+
-            | c1  | c3 | c2 |
-            +-----+----+----+
-            |     |    |    |
-            |     | 10 | 1  |
-            |     | 20 |    |
-            |     | 20 | 2  |
-            | Foo | 10 |    |
-            | bar |    |    |
-            +-----+----+----+
-        "###);
+        insta::assert_snapshot!(batches_to_sort_string(&read),@r"
+        +-----+----+----+
+        | c1  | c3 | c2 |
+        +-----+----+----+
+        |     |    |    |
+        |     | 10 | 1  |
+        |     | 20 |    |
+        |     | 20 | 2  |
+        | Foo | 10 |    |
+        | bar |    |    |
+        +-----+----+----+
+        ");
     }
 
     #[tokio::test]
@@ -789,14 +782,14 @@ mod tests {
             .round_trip(vec![batch1, batch2])
             .await;
 
-        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r###"
+        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r"
         +----+----+----+
         | c1 | c3 | c2 |
         +----+----+----+
         |    | 10 | 1  |
         |    | 20 | 2  |
         +----+----+----+
-        "###);
+        ");
         let metrics = rt.parquet_exec.metrics().unwrap();
         // Note there are were 6 rows in total (across three batches)
         assert_eq!(get_value(&metrics, "pushdown_rows_pruned"), 4);
@@ -832,7 +825,7 @@ mod tests {
             .await
             .unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read), @r###"
+        insta::assert_snapshot!(batches_to_sort_string(&read), @r"
         +-----+-----+
         | c1  | c4  |
         +-----+-----+
@@ -843,7 +836,7 @@ mod tests {
         | bar |     |
         | bar |     |
         +-----+-----+
-        "###);
+        ");
     }
 
     #[tokio::test]
@@ -1056,18 +1049,18 @@ mod tests {
         // In a real query where this predicate was pushed down from a filter stage instead of created directly in the `DataSourceExec`,
         // the filter stage would be preserved as a separate execution plan stage so the actual query results would be as expected.
 
-        insta::assert_snapshot!(batches_to_sort_string(&read),@r###"
-            +-----+----+
-            | c1  | c2 |
-            +-----+----+
-            |     |    |
-            |     |    |
-            |     | 1  |
-            |     | 2  |
-            | Foo |    |
-            | bar |    |
-            +-----+----+
-        "###);
+        insta::assert_snapshot!(batches_to_sort_string(&read),@r"
+        +-----+----+
+        | c1  | c2 |
+        +-----+----+
+        |     |    |
+        |     |    |
+        |     | 1  |
+        |     | 2  |
+        | Foo |    |
+        | bar |    |
+        +-----+----+
+        ");
     }
 
     #[tokio::test]
@@ -1092,13 +1085,13 @@ mod tests {
             .round_trip(vec![batch1, batch2])
             .await;
 
-        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r###"
+        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r"
         +----+----+
         | c1 | c2 |
         +----+----+
         |    | 1  |
         +----+----+
-        "###);
+        ");
         let metrics = rt.parquet_exec.metrics().unwrap();
         // Note there are were 6 rows in total (across three batches)
         assert_eq!(get_value(&metrics, "pushdown_rows_pruned"), 5);
@@ -1152,7 +1145,7 @@ mod tests {
             .round_trip(vec![batch1, batch2, batch3, batch4])
             .await;
 
-        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r###"
+        insta::assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()), @r"
         +------+----+
         | c1   | c2 |
         +------+----+
@@ -1169,7 +1162,7 @@ mod tests {
         | Foo2 |    |
         | Foo3 |    |
         +------+----+
-        "###);
+        ");
         let metrics = rt.parquet_exec.metrics().unwrap();
 
         // There are 4 rows pruned in each of batch2, batch3, and
@@ -1201,14 +1194,14 @@ mod tests {
             .await
             .unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read),@r###"
-            +-----+----+
-            | c1  | c2 |
-            +-----+----+
-            | Foo | 1  |
-            | bar |    |
-            +-----+----+
-        "###);
+        insta::assert_snapshot!(batches_to_sort_string(&read),@r"
+        +-----+----+
+        | c1  | c2 |
+        +-----+----+
+        | Foo | 1  |
+        | bar |    |
+        +-----+----+
+        ");
     }
 
     #[tokio::test]
@@ -1231,15 +1224,15 @@ mod tests {
             .await
             .unwrap();
 
-        insta::assert_snapshot!(batches_to_sort_string(&read),@r###"
-            +-----+----+
-            | c1  | c2 |
-            +-----+----+
-            |     | 2  |
-            | Foo | 1  |
-            | bar |    |
-            +-----+----+
-        "###);
+        insta::assert_snapshot!(batches_to_sort_string(&read),@r"
+        +-----+----+
+        | c1  | c2 |
+        +-----+----+
+        |     | 2  |
+        | Foo | 1  |
+        | bar |    |
+        +-----+----+
+        ");
     }
 
     #[tokio::test]
@@ -1264,7 +1257,7 @@ mod tests {
             ("c3", c3.clone()),
         ]);
 
-        // batch2: c3(int8), c2(int64), c1(string), c4(string)
+        // batch2: c3(date64), c2(int64), c1(string)
         let batch2 = create_batch(vec![("c3", c4), ("c2", c2), ("c1", c1)]);
 
         let table_schema = Schema::new(vec![
@@ -1278,8 +1271,10 @@ mod tests {
             .with_table_schema(Arc::new(table_schema))
             .round_trip_to_batches(vec![batch1, batch2])
             .await;
-        assert_contains!(read.unwrap_err().to_string(),
-            "Cannot cast file schema field c3 of type Date64 to table schema field of type Int8");
+        assert_contains!(
+            read.unwrap_err().to_string(),
+            "Cannot cast column 'c3' from 'Date64' (physical data type) to 'Int8' (logical data type)"
+        );
     }
 
     #[tokio::test]
@@ -1329,7 +1324,7 @@ mod tests {
     async fn parquet_exec_with_int96_from_spark() -> Result<()> {
         // arrow-rs relies on the chrono library to convert between timestamps and strings, so
         // instead compare as Int64. The underlying type should be a PrimitiveArray of Int64
-        // anyway, so this should be a zero-copy non-modifying cast at the SchemaAdapter.
+        // anyway, so this should be a zero-copy non-modifying cast.
 
         let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, true)]));
         let testdata = datafusion_common::test_util::parquet_test_data();
@@ -1550,8 +1545,7 @@ mod tests {
         ) -> Result<()> {
             let config = FileScanConfigBuilder::new(
                 ObjectStoreUrl::local_filesystem(),
-                file_schema,
-                Arc::new(ParquetSource::default()),
+                Arc::new(ParquetSource::new(file_schema)),
             )
             .with_file_groups(file_groups)
             .build();
@@ -1653,23 +1647,27 @@ mod tests {
             ),
         ]);
 
-        let source = Arc::new(ParquetSource::default());
-        let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source)
-            .with_file(partitioned_file)
-            // file has 10 cols so index 12 should be month and 13 should be day
-            .with_projection_indices(Some(vec![0, 1, 2, 12, 13]))
-            .with_table_partition_cols(vec![
-                Field::new("year", DataType::Utf8, false),
-                Field::new("month", DataType::UInt8, false),
-                Field::new(
+        let table_schema = TableSchema::new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Field::new("year", DataType::Utf8, false)),
+                Arc::new(Field::new("month", DataType::UInt8, false)),
+                Arc::new(Field::new(
                     "day",
                     DataType::Dictionary(
                         Box::new(DataType::UInt16),
                         Box::new(DataType::Utf8),
                     ),
                     false,
-                ),
-            ])
+                )),
+            ],
+        );
+        let source = Arc::new(ParquetSource::new(table_schema.clone()));
+        let config = FileScanConfigBuilder::new(object_store_url, source)
+            .with_file(partitioned_file)
+            // file has 10 cols so index 12 should be month and 13 should be day
+            .with_projection_indices(Some(vec![0, 1, 2, 12, 13]))
+            .unwrap()
             .build();
 
         let parquet_exec = DataSourceExec::from_data_source(config);
@@ -1684,20 +1682,20 @@ mod tests {
         let batch = results.next().await.unwrap()?;
         assert_eq!(batch.schema().as_ref(), &expected_schema);
 
-        assert_snapshot!(batches_to_string(&[batch]),@r###"
-            +----+----------+-------------+-------+-----+
-            | id | bool_col | tinyint_col | month | day |
-            +----+----------+-------------+-------+-----+
-            | 4  | true     | 0           | 10    | 26  |
-            | 5  | false    | 1           | 10    | 26  |
-            | 6  | true     | 0           | 10    | 26  |
-            | 7  | false    | 1           | 10    | 26  |
-            | 2  | true     | 0           | 10    | 26  |
-            | 3  | false    | 1           | 10    | 26  |
-            | 0  | true     | 0           | 10    | 26  |
-            | 1  | false    | 1           | 10    | 26  |
-            +----+----------+-------------+-------+-----+
-        "###);
+        assert_snapshot!(batches_to_string(&[batch]),@r"
+        +----+----------+-------------+-------+-----+
+        | id | bool_col | tinyint_col | month | day |
+        +----+----------+-------------+-------+-----+
+        | 4  | true     | 0           | 10    | 26  |
+        | 5  | false    | 1           | 10    | 26  |
+        | 6  | true     | 0           | 10    | 26  |
+        | 7  | false    | 1           | 10    | 26  |
+        | 2  | true     | 0           | 10    | 26  |
+        | 3  | false    | 1           | 10    | 26  |
+        | 0  | true     | 0           | 10    | 26  |
+        | 1  | false    | 1           | 10    | 26  |
+        +----+----------+-------------+-------+-----+
+        ");
 
         let batch = results.next().await;
         assert!(batch.is_none());
@@ -1731,8 +1729,7 @@ mod tests {
         let file_schema = Arc::new(Schema::empty());
         let config = FileScanConfigBuilder::new(
             ObjectStoreUrl::local_filesystem(),
-            file_schema,
-            Arc::new(ParquetSource::default()),
+            Arc::new(ParquetSource::new(file_schema)),
         )
         .with_file(partitioned_file)
         .build();
@@ -1770,14 +1767,14 @@ mod tests {
 
         let metrics = rt.parquet_exec.metrics().unwrap();
 
-        assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()),@r###"
-            +-----+
-            | int |
-            +-----+
-            | 4   |
-            | 5   |
-            +-----+
-        "###);
+        assert_snapshot!(batches_to_sort_string(&rt.batches.unwrap()),@r"
+        +-----+
+        | int |
+        +-----+
+        | 4   |
+        | 5   |
+        +-----+
+        ");
         let (page_index_pruned, page_index_matched) =
             get_pruning_metric(&metrics, "page_index_rows_pruned");
         assert_eq!(page_index_pruned, 4);
@@ -1823,14 +1820,14 @@ mod tests {
         let metrics = rt.parquet_exec.metrics().unwrap();
 
         // assert the batches and some metrics
-        assert_snapshot!(batches_to_string(&rt.batches.unwrap()),@r###"
-            +-----+
-            | c1  |
-            +-----+
-            | Foo |
-            | zzz |
-            +-----+
-        "###);
+        assert_snapshot!(batches_to_string(&rt.batches.unwrap()),@r"
+        +-----+
+        | c1  |
+        +-----+
+        | Foo |
+        | zzz |
+        +-----+
+        ");
 
         // pushdown predicates have eliminated all 4 bar rows and the
         // null row for 5 rows total
@@ -1879,6 +1876,100 @@ mod tests {
         assert_contains!(&explain, "projection=[c1]");
     }
 
+    #[tokio::test]
+    async fn parquet_exec_metrics_with_multiple_predicates() {
+        // Test that metrics are correctly calculated when multiple predicates
+        // are pushed down (connected with AND). This ensures we don't double-count
+        // rows when multiple predicates filter the data sequentially.
+
+        // Create a batch with two columns: c1 (string) and c2 (int32)
+        // Total: 10 rows
+        let c1: ArrayRef = Arc::new(StringArray::from(vec![
+            Some("foo"), // 0 - passes c1 filter, fails c2 filter (5 <= 10)
+            Some("bar"), // 1 - fails c1 filter
+            Some("bar"), // 2 - fails c1 filter
+            Some("baz"), // 3 - passes both filters (20 > 10)
+            Some("foo"), // 4 - passes both filters (12 > 10)
+            Some("bar"), // 5 - fails c1 filter
+            Some("baz"), // 6 - passes both filters (25 > 10)
+            Some("foo"), // 7 - passes c1 filter, fails c2 filter (7 <= 10)
+            Some("bar"), // 8 - fails c1 filter
+            Some("qux"), // 9 - passes both filters (30 > 10)
+        ]));
+
+        let c2: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(5),
+            Some(15),
+            Some(8),
+            Some(20),
+            Some(12),
+            Some(9),
+            Some(25),
+            Some(7),
+            Some(18),
+            Some(30),
+        ]));
+
+        let batch = create_batch(vec![("c1", c1), ("c2", c2)]);
+
+        // Create filter: c1 != 'bar' AND c2 > 10
+        //
+        // First predicate (c1 != 'bar'):
+        //   - Rows passing: 0, 3, 4, 6, 7, 9 (6 rows)
+        //   - Rows pruned: 1, 2, 5, 8 (4 rows)
+        //
+        // Second predicate (c2 > 10) on remaining 6 rows:
+        //   - Rows passing: 3, 4, 6, 9 (4 rows with c2 = 20, 12, 25, 30)
+        //   - Rows pruned: 0, 7 (2 rows with c2 = 5, 7)
+        //
+        // Expected final metrics:
+        //   - pushdown_rows_matched: 4 (final result)
+        //   - pushdown_rows_pruned: 4 + 2 = 6 (cumulative)
+        //   - Total: 4 + 6 = 10
+
+        let filter = col("c1").not_eq(lit("bar")).and(col("c2").gt(lit(10)));
+
+        let rt = RoundTrip::new()
+            .with_predicate(filter)
+            .with_pushdown_predicate()
+            .round_trip(vec![batch])
+            .await;
+
+        let metrics = rt.parquet_exec.metrics().unwrap();
+
+        // Verify the result rows
+        assert_snapshot!(batches_to_string(&rt.batches.unwrap()),@r"
+        +-----+----+
+        | c1  | c2 |
+        +-----+----+
+        | baz | 20 |
+        | foo | 12 |
+        | baz | 25 |
+        | qux | 30 |
+        +-----+----+
+        ");
+
+        // Verify metrics - this is the key test
+        let pushdown_rows_matched = get_value(&metrics, "pushdown_rows_matched");
+        let pushdown_rows_pruned = get_value(&metrics, "pushdown_rows_pruned");
+
+        assert_eq!(
+            pushdown_rows_matched, 4,
+            "Expected 4 rows to pass both predicates"
+        );
+        assert_eq!(
+            pushdown_rows_pruned, 6,
+            "Expected 6 rows to be pruned (4 by first predicate + 2 by second predicate)"
+        );
+
+        // The sum should equal the total number of rows
+        assert_eq!(
+            pushdown_rows_matched + pushdown_rows_pruned,
+            10,
+            "matched + pruned should equal total rows"
+        );
+    }
+
     #[tokio::test]
     async fn parquet_exec_has_no_pruning_predicate_if_can_not_prune() {
         // batch1: c1(string)
@@ -2119,13 +2210,13 @@ mod tests {
         let sql = "select * from base_table where name='test02'";
         let batch = ctx.sql(sql).await.unwrap().collect().await.unwrap();
         assert_eq!(batch.len(), 1);
-        insta::assert_snapshot!(batches_to_string(&batch),@r###"
-            +---------------------+----+--------+
-            | struct              | id | name   |
-            +---------------------+----+--------+
-            | {id: 4, name: aaa2} | 2  | test02 |
-            +---------------------+----+--------+
-        "###);
+        insta::assert_snapshot!(batches_to_string(&batch),@r"
+        +---------------------+----+--------+
+        | struct              | id | name   |
+        +---------------------+----+--------+
+        | {id: 4, name: aaa2} | 2  | test02 |
+        +---------------------+----+--------+
+        ");
         Ok(())
     }
 
@@ -2148,13 +2239,13 @@ mod tests {
         let sql = "select * from base_table where name='test02'";
         let batch = ctx.sql(sql).await.unwrap().collect().await.unwrap();
         assert_eq!(batch.len(), 1);
-        insta::assert_snapshot!(batches_to_string(&batch),@r###"
-            +---------------------+----+--------+
-            | struct              | id | name   |
-            +---------------------+----+--------+
-            | {id: 4, name: aaa2} | 2  | test02 |
-            +---------------------+----+--------+
-        "###);
+        insta::assert_snapshot!(batches_to_string(&batch),@r"
+        +---------------------+----+--------+
+        | struct              | id | name   |
+        +---------------------+----+--------+
+        | {id: 4, name: aaa2} | 2  | test02 |
+        +---------------------+----+--------+
+        ");
         Ok(())
     }
 
@@ -2279,11 +2370,11 @@ mod tests {
         let size_hint_calls = reader_factory.metadata_size_hint_calls.clone();
 
         let source = Arc::new(
-            ParquetSource::default()
+            ParquetSource::new(Arc::clone(&schema))
                 .with_parquet_file_reader_factory(reader_factory)
                 .with_metadata_size_hint(456),
         );
-        let config = FileScanConfigBuilder::new(store_url, schema, source)
+        let config = FileScanConfigBuilder::new(store_url, source)
             .with_file(
                 PartitionedFile {
                     object_meta: ObjectMeta {
diff --git a/datafusion/core/src/datasource/view_test.rs b/datafusion/core/src/datasource/view_test.rs
index 85ad9ff664ade..35418d6dea632 100644
--- a/datafusion/core/src/datasource/view_test.rs
+++ b/datafusion/core/src/datasource/view_test.rs
@@ -46,13 +46,13 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---+
         | b |
         +---+
         | 2 |
         +---+
-        "###);
+        ");
 
         Ok(())
     }
@@ -96,14 +96,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+---------+---------+
         | column1 | column2 | column3 |
         +---------+---------+---------+
         | 1       | 2       | 3       |
         | 4       | 5       | 6       |
         +---------+---------+---------+
-        "###);
+        ");
 
         let view_sql =
             "CREATE VIEW replace_xyz AS SELECT * REPLACE (column1*2 as column1) FROM xyz";
@@ -115,14 +115,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+---------+---------+
         | column1 | column2 | column3 |
         +---------+---------+---------+
         | 2       | 2       | 3       |
         | 8       | 5       | 6       |
         +---------+---------+---------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -146,14 +146,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------------+
         | column1_alias |
         +---------------+
         | 1             |
         | 4             |
         +---------------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -177,14 +177,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------------+---------------+
         | column2_alias | column1_alias |
         +---------------+---------------+
         | 2             | 1             |
         | 5             | 4             |
         +---------------+---------------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -213,14 +213,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+
         | column1 |
         +---------+
         | 1       |
         | 4       |
         +---------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -249,13 +249,13 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+
         | column1 |
         +---------+
         | 4       |
         +---------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -287,14 +287,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+---------+---------+
         | column2 | column1 | column3 |
         +---------+---------+---------+
         | 2       | 1       | 3       |
         | 5       | 4       | 6       |
         +---------+---------+---------+
-        "###);
+        ");
 
         Ok(())
     }
@@ -358,7 +358,10 @@ mod tests {
             .to_string();
         assert!(formatted.contains("DataSourceExec: "));
         assert!(formatted.contains("file_type=parquet"));
-        assert!(formatted.contains("projection=[bool_col, int_col], limit=10"));
+        assert!(
+            formatted.contains("projection=[bool_col, int_col], limit=10"),
+            "{formatted}"
+        );
         Ok(())
     }
 
@@ -442,14 +445,14 @@ mod tests {
             .collect()
             .await?;
 
-        insta::assert_snapshot!(batches_to_string(&results),@r###"
+        insta::assert_snapshot!(batches_to_string(&results),@r"
         +---------+
         | column1 |
         +---------+
         | 1       |
         | 4       |
         +---------+
-        "###);
+        ");
 
         Ok(())
     }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 687779787ab50..a769bb01b4354 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -20,6 +20,7 @@
 use std::collections::HashSet;
 use std::fmt::Debug;
 use std::sync::{Arc, Weak};
+use std::time::Duration;
 
 use super::options::ReadOptions;
 use crate::datasource::dynamic_file::DynamicListTableFactory;
@@ -33,20 +34,20 @@ use crate::{
     datasource::listing::{
         ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
     },
-    datasource::{provider_as_source, MemTable, ViewTable},
+    datasource::{MemTable, ViewTable, provider_as_source},
     error::Result,
     execution::{
+        FunctionRegistry,
         options::ArrowReadOptions,
         runtime_env::{RuntimeEnv, RuntimeEnvBuilder},
-        FunctionRegistry,
     },
     logical_expr::AggregateUDF,
     logical_expr::ScalarUDF,
     logical_expr::{
         CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
         CreateMemoryTable, CreateView, DropCatalogSchema, DropFunction, DropTable,
-        DropView, Execute, LogicalPlan, LogicalPlanBuilder, Prepare, SetVariable,
-        TableType, UNNAMED_TABLE,
+        DropView, Execute, LogicalPlan, LogicalPlanBuilder, Prepare, ResetVariable,
+        SetVariable, TableType, UNNAMED_TABLE,
     },
     physical_expr::PhysicalExpr,
     physical_plan::ExecutionPlan,
@@ -58,32 +59,43 @@ pub use crate::execution::session_state::SessionState;
 
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion_catalog::memory::MemorySchemaProvider;
 use datafusion_catalog::MemoryCatalogProvider;
+use datafusion_catalog::memory::MemorySchemaProvider;
 use datafusion_catalog::{
     DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory,
 };
-use datafusion_common::config::ConfigOptions;
+use datafusion_common::config::{ConfigField, ConfigOptions};
 use datafusion_common::metadata::ScalarAndMetadata;
 use datafusion_common::{
+    DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference,
     config::{ConfigExtension, TableOptions},
     exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err,
     plan_datafusion_err, plan_err,
     tree_node::{TreeNodeRecursion, TreeNodeVisitor},
-    DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference,
+};
+pub use datafusion_execution::TaskContext;
+use datafusion_execution::cache::cache_manager::{
+    DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, DEFAULT_LIST_FILES_CACHE_TTL,
+    DEFAULT_METADATA_CACHE_LIMIT,
 };
 pub use datafusion_execution::config::SessionConfig;
+use datafusion_execution::disk_manager::{
+    DEFAULT_MAX_TEMP_DIRECTORY_SIZE, DiskManagerBuilder,
+};
 use datafusion_execution::registry::SerializerRegistry;
-pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
+#[cfg(feature = "sql")]
+use datafusion_expr::planner::RelationPlanner;
+use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::{
+    Expr, UserDefinedLogicalNode, WindowUDF,
     expr_rewriter::FunctionRewrite,
     logical_plan::{DdlStatement, Statement},
     planner::ExprPlanner,
-    Expr, UserDefinedLogicalNode, WindowUDF,
 };
-use datafusion_optimizer::analyzer::type_coercion::TypeCoercion;
 use datafusion_optimizer::Analyzer;
+use datafusion_optimizer::analyzer::type_coercion::TypeCoercion;
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use datafusion_optimizer::{AnalyzerRule, OptimizerRule};
 use datafusion_session::SessionStore;
 
@@ -476,6 +488,11 @@ impl SessionContext {
         self.state.write().append_optimizer_rule(optimizer_rule);
     }
 
+    /// Removes an optimizer rule by name, returning `true` if it existed.
+    pub fn remove_optimizer_rule(&self, name: &str) -> bool {
+        self.state.write().remove_optimizer_rule(name)
+    }
+
     /// Adds an analyzer rule to the end of the existing rules.
     ///
     /// See [`SessionState`] for more control of when the rule is applied.
@@ -678,7 +695,7 @@ impl SessionContext {
                 match ddl {
                     DdlStatement::CreateExternalTable(cmd) => {
                         (Box::pin(async move { self.create_external_table(&cmd).await })
-                            as std::pin::Pin<Box<dyn futures::Future<Output = _> + Send>>)
+                            as std::pin::Pin<Box<dyn Future<Output = _> + Send>>)
                             .await
                     }
                     DdlStatement::CreateMemoryTable(cmd) => {
@@ -709,7 +726,12 @@ impl SessionContext {
             }
             // TODO what about the other statements (like TransactionStart and TransactionEnd)
             LogicalPlan::Statement(Statement::SetVariable(stmt)) => {
-                self.set_variable(stmt).await
+                self.set_variable(stmt).await?;
+                self.return_empty_dataframe()
+            }
+            LogicalPlan::Statement(Statement::ResetVariable(stmt)) => {
+                self.reset_variable(stmt).await?;
+                self.return_empty_dataframe()
             }
             LogicalPlan::Statement(Statement::Prepare(Prepare {
                 name,
@@ -774,7 +796,7 @@ impl SessionContext {
     /// * [`SessionState::create_physical_expr`] for a lower level API
     ///
     /// [simplified]: datafusion_optimizer::simplify_expressions
-    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
     pub fn create_physical_expr(
         &self,
         expr: Expr,
@@ -1052,22 +1074,22 @@ impl SessionContext {
             } else if allow_missing {
                 return self.return_empty_dataframe();
             } else {
-                return self.schema_doesnt_exist_err(name);
+                return self.schema_doesnt_exist_err(&name);
             }
         };
         let dereg = catalog.deregister_schema(name.schema_name(), cascade)?;
         match (dereg, allow_missing) {
             (None, true) => self.return_empty_dataframe(),
-            (None, false) => self.schema_doesnt_exist_err(name),
+            (None, false) => self.schema_doesnt_exist_err(&name),
             (Some(_), _) => self.return_empty_dataframe(),
         }
     }
 
-    fn schema_doesnt_exist_err(&self, schemaref: SchemaReference) -> Result<DataFrame> {
+    fn schema_doesnt_exist_err(&self, schemaref: &SchemaReference) -> Result<DataFrame> {
         exec_err!("Schema '{schemaref}' doesn't exist.")
     }
 
-    async fn set_variable(&self, stmt: SetVariable) -> Result<DataFrame> {
+    async fn set_variable(&self, stmt: SetVariable) -> Result<()> {
         let SetVariable {
             variable, value, ..
         } = stmt;
@@ -1097,11 +1119,37 @@ impl SessionContext {
             for udf in udfs_to_update {
                 state.register_udf(udf)?;
             }
+        }
 
-            drop(state);
+        Ok(())
+    }
+
+    async fn reset_variable(&self, stmt: ResetVariable) -> Result<()> {
+        let variable = stmt.variable;
+        if variable.starts_with("datafusion.runtime.") {
+            return self.reset_runtime_variable(&variable);
         }
 
-        self.return_empty_dataframe()
+        let mut state = self.state.write();
+        state.config_mut().options_mut().reset(&variable)?;
+
+        // Refresh UDFs to ensure configuration-dependent behavior updates
+        let config_options = state.config().options();
+        let udfs_to_update: Vec<_> = state
+            .scalar_functions()
+            .values()
+            .filter_map(|udf| {
+                udf.inner()
+                    .with_updated_config(config_options)
+                    .map(Arc::new)
+            })
+            .collect();
+
+        for udf in udfs_to_update {
+            state.register_udf(udf)?;
+        }
+
+        Ok(())
     }
 
     fn set_runtime_variable(&self, variable: &str, value: &str) -> Result<()> {
@@ -1124,6 +1172,53 @@ impl SessionContext {
                 let limit = Self::parse_memory_limit(value)?;
                 builder.with_metadata_cache_limit(limit)
             }
+            "list_files_cache_limit" => {
+                let limit = Self::parse_memory_limit(value)?;
+                builder.with_object_list_cache_limit(limit)
+            }
+            "list_files_cache_ttl" => {
+                let duration = Self::parse_duration(value)?;
+                builder.with_object_list_cache_ttl(Some(duration))
+            }
+            _ => return plan_err!("Unknown runtime configuration: {variable}"),
+            // Remember to update `reset_runtime_variable()` when adding new options
+        };
+
+        *state = SessionStateBuilder::from(state.clone())
+            .with_runtime_env(Arc::new(builder.build()?))
+            .build();
+
+        Ok(())
+    }
+
+    fn reset_runtime_variable(&self, variable: &str) -> Result<()> {
+        let key = variable.strip_prefix("datafusion.runtime.").unwrap();
+
+        let mut state = self.state.write();
+
+        let mut builder = RuntimeEnvBuilder::from_runtime_env(state.runtime_env());
+        match key {
+            "memory_limit" => {
+                builder.memory_pool = None;
+            }
+            "max_temp_directory_size" => {
+                builder =
+                    builder.with_max_temp_directory_size(DEFAULT_MAX_TEMP_DIRECTORY_SIZE);
+            }
+            "temp_directory" => {
+                builder.disk_manager_builder = Some(DiskManagerBuilder::default());
+            }
+            "metadata_cache_limit" => {
+                builder = builder.with_metadata_cache_limit(DEFAULT_METADATA_CACHE_LIMIT);
+            }
+            "list_files_cache_limit" => {
+                builder = builder
+                    .with_object_list_cache_limit(DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT);
+            }
+            "list_files_cache_ttl" => {
+                builder =
+                    builder.with_object_list_cache_ttl(DEFAULT_LIST_FILES_CACHE_TTL);
+            }
             _ => return plan_err!("Unknown runtime configuration: {variable}"),
         };
 
@@ -1164,6 +1259,36 @@ impl SessionContext {
         }
     }
 
+    fn parse_duration(duration: &str) -> Result<Duration> {
+        let mut minutes = None;
+        let mut seconds = None;
+
+        for duration in duration.split_inclusive(&['m', 's']) {
+            let (number, unit) = duration.split_at(duration.len() - 1);
+            let number: u64 = number.parse().map_err(|_| {
+                plan_datafusion_err!("Failed to parse number from duration '{duration}'")
+            })?;
+
+            match unit {
+                "m" if minutes.is_none() && seconds.is_none() => minutes = Some(number),
+                "s" if seconds.is_none() => seconds = Some(number),
+                _ => plan_err!(
+                    "Invalid duration, unit must be either 'm' (minutes), or 's' (seconds), and be in the correct order"
+                )?,
+            }
+        }
+
+        let duration = Duration::from_secs(
+            minutes.unwrap_or_default() * 60 + seconds.unwrap_or_default(),
+        );
+
+        if duration.is_zero() {
+            return plan_err!("Duration must be greater than 0 seconds");
+        }
+
+        Ok(duration)
+    }
+
     async fn create_custom_table(
         &self,
         cmd: &CreateExternalTable,
@@ -1197,13 +1322,12 @@ impl SessionContext {
                 .and_then(|c| c.schema(&resolved.schema))
         };
 
-        if let Some(schema) = maybe_schema {
-            if let Some(table_provider) = schema.table(&table).await? {
-                if table_provider.table_type() == table_type {
-                    schema.deregister_table(&table)?;
-                    return Ok(true);
-                }
-            }
+        if let Some(schema) = maybe_schema
+            && let Some(table_provider) = schema.table(&table).await?
+            && table_provider.table_type() == table_type
+        {
+            schema.deregister_table(&table)?;
+            return Ok(true);
         }
 
         Ok(false)
@@ -1219,7 +1343,7 @@ impl SessionContext {
                 _ => {
                     return Err(DataFusionError::Configuration(
                         "Function factory has not been configured".to_string(),
-                    ))
+                    ));
                 }
             }
         };
@@ -1269,14 +1393,18 @@ impl SessionContext {
             exec_datafusion_err!("Prepared statement '{}' does not exist", name)
         })?;
 
+        let state = self.state.read();
+        let context = SimplifyContext::new(state.execution_props());
+        let simplifier = ExprSimplifier::new(context);
+
         // Only allow literals as parameters for now.
         let mut params: Vec<ScalarAndMetadata> = parameters
             .into_iter()
-            .map(|e| match e {
+            .map(|e| match simplifier.simplify(e)? {
                 Expr::Literal(scalar, metadata) => {
                     Ok(ScalarAndMetadata::new(scalar, metadata))
                 }
-                _ => not_impl_err!("Unsupported parameter type: {}", e),
+                e => not_impl_err!("Unsupported parameter type: {e}"),
             })
             .collect::<Result<_>>()?;
 
@@ -1359,6 +1487,18 @@ impl SessionContext {
         self.state.write().register_udwf(Arc::new(f)).ok();
     }
 
+    #[cfg(feature = "sql")]
+    /// Registers a [`RelationPlanner`] to customize SQL table-factor planning.
+    ///
+    /// Planners are invoked in reverse registration order, allowing newer
+    /// planners to take precedence over existing ones.
+    pub fn register_relation_planner(
+        &self,
+        planner: Arc<dyn RelationPlanner>,
+    ) -> Result<()> {
+        self.state.write().register_relation_planner(planner)
+    }
+
     /// Deregisters a UDF within this context.
     pub fn deregister_udf(&self, name: &str) {
         self.state.write().deregister_udf(name).ok();
@@ -1788,6 +1928,12 @@ impl FunctionRegistry for SessionContext {
     }
 }
 
+impl datafusion_execution::TaskContextProvider for SessionContext {
+    fn task_ctx(&self) -> Arc<TaskContext> {
+        SessionContext::task_ctx(self)
+    }
+}
+
 /// Create a new task context instance from SessionContext
 impl From<&SessionContext> for TaskContext {
     fn from(session: &SessionContext) -> Self {
@@ -1831,7 +1977,7 @@ pub trait QueryPlanner: Debug {
 /// because the implementation and requirements vary widely. Please see
 /// [function_factory example] for a reference implementation.
 ///
-/// [function_factory example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/function_factory.rs
+/// [function_factory example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/function_factory.rs
 ///
 /// # Examples of syntax that can be supported
 ///
@@ -2531,4 +2677,69 @@ mod tests {
             }
         }
     }
+
+    #[tokio::test]
+    async fn remove_optimizer_rule() -> Result<()> {
+        let get_optimizer_rules = |ctx: &SessionContext| {
+            ctx.state()
+                .optimizer()
+                .rules
+                .iter()
+                .map(|r| r.name().to_owned())
+                .collect::<HashSet<_>>()
+        };
+
+        let ctx = SessionContext::new();
+        assert!(get_optimizer_rules(&ctx).contains("simplify_expressions"));
+
+        // default plan
+        let plan = ctx
+            .sql("select 1 + 1")
+            .await?
+            .into_optimized_plan()?
+            .to_string();
+        assert_snapshot!(plan, @r"
+        Projection: Int64(2) AS Int64(1) + Int64(1)
+          EmptyRelation: rows=1
+        ");
+
+        assert!(ctx.remove_optimizer_rule("simplify_expressions"));
+        assert!(!get_optimizer_rules(&ctx).contains("simplify_expressions"));
+
+        // plan without the simplify_expressions rule
+        let plan = ctx
+            .sql("select 1 + 1")
+            .await?
+            .into_optimized_plan()?
+            .to_string();
+        assert_snapshot!(plan, @r"
+        Projection: Int64(1) + Int64(1)
+          EmptyRelation: rows=1
+        ");
+
+        // attempting to remove a non-existing rule returns false
+        assert!(!ctx.remove_optimizer_rule("simplify_expressions"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_duration() {
+        // Valid durations
+        for (duration, want) in [
+            ("1s", Duration::from_secs(1)),
+            ("1m", Duration::from_secs(60)),
+            ("1m0s", Duration::from_secs(60)),
+            ("1m1s", Duration::from_secs(61)),
+        ] {
+            let have = SessionContext::parse_duration(duration).unwrap();
+            assert_eq!(want, have);
+        }
+
+        // Invalid durations
+        for duration in ["0s", "0m", "1s0m", "1s1m"] {
+            let have = SessionContext::parse_duration(duration);
+            assert!(have.is_err());
+        }
+    }
 }
diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs
index 731f7e59ecfaf..823dc946ea732 100644
--- a/datafusion/core/src/execution/context/parquet.rs
+++ b/datafusion/core/src/execution/context/parquet.rs
@@ -113,7 +113,7 @@ mod tests {
     };
     use datafusion_execution::config::SessionConfig;
 
-    use tempfile::{tempdir, TempDir};
+    use tempfile::{TempDir, tempdir};
 
     #[tokio::test]
     async fn read_with_glob_path() -> Result<()> {
@@ -355,7 +355,9 @@ mod tests {
         let expected_path = binding[0].as_str();
         assert_eq!(
             read_df.unwrap_err().strip_backtrace(),
-            format!("Execution error: File path '{expected_path}' does not match the expected extension '.parquet'")
+            format!(
+                "Execution error: File path '{expected_path}' does not match the expected extension '.parquet'"
+            )
         );
 
         // Read the dataframe from 'output3.parquet.snappy.parquet' with the correct file extension.
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index c15b7eae08432..6a9ebcdf51250 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -27,14 +27,14 @@ use crate::catalog::{CatalogProviderList, SchemaProvider, TableProviderFactory};
 use crate::datasource::file_format::FileFormatFactory;
 #[cfg(feature = "sql")]
 use crate::datasource::provider_as_source;
-use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::execution::SessionStateDefaults;
+use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
 use arrow_schema::{DataType, FieldRef};
+use datafusion_catalog::MemoryCatalogProviderList;
 use datafusion_catalog::information_schema::{
-    InformationSchemaProvider, INFORMATION_SCHEMA,
+    INFORMATION_SCHEMA, InformationSchemaProvider,
 };
-use datafusion_catalog::MemoryCatalogProviderList;
 use datafusion_catalog::{TableFunction, TableFunctionImpl};
 use datafusion_common::alias::AliasGenerator;
 #[cfg(feature = "sql")]
@@ -43,21 +43,21 @@ use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
 use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
-    config_err, exec_err, plan_datafusion_err, DFSchema, DataFusionError,
-    ResolvedTableReference, TableReference,
+    DFSchema, DataFusionError, ResolvedTableReference, TableReference, config_err,
+    exec_err, plan_datafusion_err,
 };
+use datafusion_execution::TaskContext;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_execution::TaskContext;
+#[cfg(feature = "sql")]
+use datafusion_expr::TableSource;
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr_rewriter::FunctionRewrite;
 use datafusion_expr::planner::ExprPlanner;
 #[cfg(feature = "sql")]
-use datafusion_expr::planner::TypePlanner;
+use datafusion_expr::planner::{RelationPlanner, TypePlanner};
 use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry};
 use datafusion_expr::simplify::SimplifyInfo;
-#[cfg(feature = "sql")]
-use datafusion_expr::TableSource;
 use datafusion_expr::{
     AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, WindowUDF,
 };
@@ -67,8 +67,8 @@ use datafusion_optimizer::{
 };
 use datafusion_physical_expr::create_physical_expr;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_optimizer::optimizer::PhysicalOptimizer;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::optimizer::PhysicalOptimizer;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_session::Session;
 #[cfg(feature = "sql")]
@@ -139,6 +139,8 @@ pub struct SessionState {
     analyzer: Analyzer,
     /// Provides support for customizing the SQL planner, e.g. to add support for custom operators like `->>` or `?`
     expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    #[cfg(feature = "sql")]
+    relation_planners: Vec<Arc<dyn RelationPlanner>>,
     /// Provides support for customizing the SQL type planning
     #[cfg(feature = "sql")]
     type_planner: Option<Arc<dyn TypePlanner>>,
@@ -185,6 +187,7 @@ pub struct SessionState {
     /// It will be invoked on `CREATE FUNCTION` statements.
     /// thus, changing dialect o PostgreSql is required
     function_factory: Option<Arc<dyn FunctionFactory>>,
+    cache_factory: Option<Arc<dyn CacheFactory>>,
     /// Cache logical plans of prepared statements for later execution.
     /// Key is the prepared statement name.
     prepared_plans: HashMap<String, Arc<PreparedPlan>>,
@@ -206,8 +209,12 @@ impl Debug for SessionState {
             .field("table_options", &self.table_options)
             .field("table_factories", &self.table_factories)
             .field("function_factory", &self.function_factory)
+            .field("cache_factory", &self.cache_factory)
             .field("expr_planners", &self.expr_planners);
 
+        #[cfg(feature = "sql")]
+        let ret = ret.field("relation_planners", &self.relation_planners);
+
         #[cfg(feature = "sql")]
         let ret = ret.field("type_planner", &self.type_planner);
 
@@ -345,6 +352,13 @@ impl SessionState {
         self.optimizer.rules.push(optimizer_rule);
     }
 
+    /// Removes an optimizer rule by name, returning `true` if it existed.
+    pub(crate) fn remove_optimizer_rule(&mut self, name: &str) -> bool {
+        let original_len = self.optimizer.rules.len();
+        self.optimizer.rules.retain(|r| r.name() != name);
+        self.optimizer.rules.len() < original_len
+    }
+
     /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements
     pub fn set_function_factory(&mut self, function_factory: Arc<dyn FunctionFactory>) {
         self.function_factory = Some(function_factory);
@@ -355,6 +369,16 @@ impl SessionState {
         self.function_factory.as_ref()
     }
 
+    /// Register a [`CacheFactory`] for custom caching strategy
+    pub fn set_cache_factory(&mut self, cache_factory: Arc<dyn CacheFactory>) {
+        self.cache_factory = Some(cache_factory);
+    }
+
+    /// Get the cache factory
+    pub fn cache_factory(&self) -> Option<&Arc<dyn CacheFactory>> {
+        self.cache_factory.as_ref()
+    }
+
     /// Get the table factories
     pub fn table_factories(&self) -> &HashMap<String, Arc<dyn TableProviderFactory>> {
         &self.table_factories
@@ -480,10 +504,10 @@ impl SessionState {
             let resolved = self.resolve_table_ref(reference);
             if let Entry::Vacant(v) = provider.tables.entry(resolved) {
                 let resolved = v.key();
-                if let Ok(schema) = self.schema_for_ref(resolved.clone()) {
-                    if let Some(table) = schema.table(&resolved.table).await? {
-                        v.insert(provider_as_source(table));
-                    }
+                if let Ok(schema) = self.schema_for_ref(resolved.clone())
+                    && let Some(table) = schema.table(&resolved.table).await?
+                {
+                    v.insert(provider_as_source(table));
                 }
             }
         }
@@ -547,6 +571,16 @@ impl SessionState {
 
         let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?;
 
+        self.create_logical_expr_from_sql_expr(sql_expr, df_schema)
+    }
+
+    /// Creates a datafusion style AST [`Expr`] from a SQL expression.
+    #[cfg(feature = "sql")]
+    pub fn create_logical_expr_from_sql_expr(
+        &self,
+        sql_expr: SQLExprWithAlias,
+        df_schema: &DFSchema,
+    ) -> datafusion_common::Result<Expr> {
         let provider = SessionContextProvider {
             state: self,
             tables: HashMap::new(),
@@ -571,6 +605,24 @@ impl SessionState {
         &self.expr_planners
     }
 
+    #[cfg(feature = "sql")]
+    /// Returns the registered relation planners in priority order.
+    pub fn relation_planners(&self) -> &[Arc<dyn RelationPlanner>] {
+        &self.relation_planners
+    }
+
+    #[cfg(feature = "sql")]
+    /// Registers a [`RelationPlanner`] to customize SQL relation planning.
+    ///
+    /// Newly registered planners are given higher priority than existing ones.
+    pub fn register_relation_planner(
+        &mut self,
+        planner: Arc<dyn RelationPlanner>,
+    ) -> datafusion_common::Result<()> {
+        self.relation_planners.insert(0, planner);
+        Ok(())
+    }
+
     /// Returns the [`QueryPlanner`] for this session
     pub fn query_planner(&self) -> &Arc<dyn QueryPlanner + Send + Sync> {
         &self.query_planner
@@ -685,7 +737,7 @@ impl SessionState {
     /// * [`create_physical_expr`] for a lower-level API
     ///
     /// [simplified]: datafusion_optimizer::simplify_expressions
-    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
     /// [`SessionContext::create_physical_expr`]: crate::execution::context::SessionContext::create_physical_expr
     pub fn create_physical_expr(
         &self,
@@ -788,10 +840,18 @@ impl SessionState {
         overwrite: bool,
     ) -> Result<(), DataFusionError> {
         let ext = file_format.get_ext().to_lowercase();
-        match (self.file_formats.entry(ext.clone()), overwrite){
-            (Entry::Vacant(e), _) => {e.insert(file_format);},
-            (Entry::Occupied(mut e), true)  => {e.insert(file_format);},
-            (Entry::Occupied(_), false) => return config_err!("File type already registered for extension {ext}. Set overwrite to true to replace this extension."),
+        match (self.file_formats.entry(ext.clone()), overwrite) {
+            (Entry::Vacant(e), _) => {
+                e.insert(file_format);
+            }
+            (Entry::Occupied(mut e), true) => {
+                e.insert(file_format);
+            }
+            (Entry::Occupied(_), false) => {
+                return config_err!(
+                    "File type already registered for extension {ext}. Set overwrite to true to replace this extension."
+                );
+            }
         };
         Ok(())
     }
@@ -914,6 +974,8 @@ pub struct SessionStateBuilder {
     analyzer: Option<Analyzer>,
     expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
     #[cfg(feature = "sql")]
+    relation_planners: Option<Vec<Arc<dyn RelationPlanner>>>,
+    #[cfg(feature = "sql")]
     type_planner: Option<Arc<dyn TypePlanner>>,
     optimizer: Option<Optimizer>,
     physical_optimizers: Option<PhysicalOptimizer>,
@@ -931,6 +993,7 @@ pub struct SessionStateBuilder {
     table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
     runtime_env: Option<Arc<RuntimeEnv>>,
     function_factory: Option<Arc<dyn FunctionFactory>>,
+    cache_factory: Option<Arc<dyn CacheFactory>>,
     // fields to support convenience functions
     analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
     optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
@@ -951,6 +1014,8 @@ impl SessionStateBuilder {
             analyzer: None,
             expr_planners: None,
             #[cfg(feature = "sql")]
+            relation_planners: None,
+            #[cfg(feature = "sql")]
             type_planner: None,
             optimizer: None,
             physical_optimizers: None,
@@ -968,6 +1033,7 @@ impl SessionStateBuilder {
             table_factories: None,
             runtime_env: None,
             function_factory: None,
+            cache_factory: None,
             // fields to support convenience functions
             analyzer_rules: None,
             optimizer_rules: None,
@@ -1001,6 +1067,8 @@ impl SessionStateBuilder {
             analyzer: Some(existing.analyzer),
             expr_planners: Some(existing.expr_planners),
             #[cfg(feature = "sql")]
+            relation_planners: Some(existing.relation_planners),
+            #[cfg(feature = "sql")]
             type_planner: existing.type_planner,
             optimizer: Some(existing.optimizer),
             physical_optimizers: Some(existing.physical_optimizers),
@@ -1020,7 +1088,7 @@ impl SessionStateBuilder {
             table_factories: Some(existing.table_factories),
             runtime_env: Some(existing.runtime_env),
             function_factory: existing.function_factory,
-
+            cache_factory: existing.cache_factory,
             // fields to support convenience functions
             analyzer_rules: None,
             optimizer_rules: None,
@@ -1141,6 +1209,16 @@ impl SessionStateBuilder {
         self
     }
 
+    #[cfg(feature = "sql")]
+    /// Sets the [`RelationPlanner`]s used to customize SQL relation planning.
+    pub fn with_relation_planners(
+        mut self,
+        relation_planners: Vec<Arc<dyn RelationPlanner>>,
+    ) -> Self {
+        self.relation_planners = Some(relation_planners);
+        self
+    }
+
     /// Set the [`TypePlanner`] used to customize the behavior of the SQL planner.
     #[cfg(feature = "sql")]
     pub fn with_type_planner(mut self, type_planner: Arc<dyn TypePlanner>) -> Self {
@@ -1309,6 +1387,15 @@ impl SessionStateBuilder {
         self
     }
 
+    /// Set a [`CacheFactory`] for custom caching strategy
+    pub fn with_cache_factory(
+        mut self,
+        cache_factory: Option<Arc<dyn CacheFactory>>,
+    ) -> Self {
+        self.cache_factory = cache_factory;
+        self
+    }
+
     /// Register an `ObjectStore` to the [`RuntimeEnv`]. See [`RuntimeEnv::register_object_store`]
     /// for more details.
     ///
@@ -1355,6 +1442,8 @@ impl SessionStateBuilder {
             analyzer,
             expr_planners,
             #[cfg(feature = "sql")]
+            relation_planners,
+            #[cfg(feature = "sql")]
             type_planner,
             optimizer,
             physical_optimizers,
@@ -1372,6 +1461,7 @@ impl SessionStateBuilder {
             table_factories,
             runtime_env,
             function_factory,
+            cache_factory,
             analyzer_rules,
             optimizer_rules,
             physical_optimizer_rules,
@@ -1385,6 +1475,8 @@ impl SessionStateBuilder {
             analyzer: analyzer.unwrap_or_default(),
             expr_planners: expr_planners.unwrap_or_default(),
             #[cfg(feature = "sql")]
+            relation_planners: relation_planners.unwrap_or_default(),
+            #[cfg(feature = "sql")]
             type_planner,
             optimizer: optimizer.unwrap_or_default(),
             physical_optimizers: physical_optimizers.unwrap_or_default(),
@@ -1408,6 +1500,7 @@ impl SessionStateBuilder {
             table_factories: table_factories.unwrap_or_default(),
             runtime_env,
             function_factory,
+            cache_factory,
             prepared_plans: HashMap::new(),
         };
 
@@ -1521,6 +1614,12 @@ impl SessionStateBuilder {
         &mut self.expr_planners
     }
 
+    #[cfg(feature = "sql")]
+    /// Returns a mutable reference to the current [`RelationPlanner`] list.
+    pub fn relation_planners(&mut self) -> &mut Option<Vec<Arc<dyn RelationPlanner>>> {
+        &mut self.relation_planners
+    }
+
     /// Returns the current type_planner value
     #[cfg(feature = "sql")]
     pub fn type_planner(&mut self) -> &mut Option<Arc<dyn TypePlanner>> {
@@ -1611,6 +1710,11 @@ impl SessionStateBuilder {
         &mut self.function_factory
     }
 
+    /// Returns the cache factory
+    pub fn cache_factory(&mut self) -> &mut Option<Arc<dyn CacheFactory>> {
+        &mut self.cache_factory
+    }
+
     /// Returns the current analyzer_rules value
     pub fn analyzer_rules(
         &mut self,
@@ -1649,6 +1753,7 @@ impl Debug for SessionStateBuilder {
             .field("table_options", &self.table_options)
             .field("table_factories", &self.table_factories)
             .field("function_factory", &self.function_factory)
+            .field("cache_factory", &self.cache_factory)
             .field("expr_planners", &self.expr_planners);
         #[cfg(feature = "sql")]
         let ret = ret.field("type_planner", &self.type_planner);
@@ -1695,6 +1800,10 @@ impl ContextProvider for SessionContextProvider<'_> {
         self.state.expr_planners()
     }
 
+    fn get_relation_planners(&self) -> &[Arc<dyn RelationPlanner>] {
+        self.state.relation_planners()
+    }
+
     fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
         if let Some(type_planner) = &self.state.type_planner {
             Some(Arc::clone(type_planner))
@@ -1764,7 +1873,7 @@ impl ContextProvider for SessionContextProvider<'_> {
     }
 
     fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType> {
-        use datafusion_expr::var_provider::{is_system_variables, VarType};
+        use datafusion_expr::var_provider::{VarType, is_system_variables};
 
         if variable_names.is_empty() {
             return None;
@@ -1947,6 +2056,12 @@ impl FunctionRegistry for SessionState {
     }
 }
 
+impl datafusion_execution::TaskContextProvider for SessionState {
+    fn task_ctx(&self) -> Arc<TaskContext> {
+        SessionState::task_ctx(self)
+    }
+}
+
 impl OptimizerConfig for SessionState {
     fn query_execution_start_time(&self) -> DateTime<Utc> {
         self.execution_props.query_execution_start_time
@@ -2037,14 +2152,27 @@ pub(crate) struct PreparedPlan {
     pub(crate) plan: Arc<LogicalPlan>,
 }
 
+/// A [`CacheFactory`] can be registered via [`SessionState`]
+/// to create a custom logical plan for [`crate::dataframe::DataFrame::cache`].
+/// Additionally, a custom [`crate::physical_planner::ExtensionPlanner`]/[`QueryPlanner`]
+/// may need to be implemented to handle such plans.
+pub trait CacheFactory: Debug + Send + Sync {
+    /// Create a logical plan for caching
+    fn create(
+        &self,
+        plan: LogicalPlan,
+        session_state: &SessionState,
+    ) -> datafusion_common::Result<LogicalPlan>;
+}
+
 #[cfg(test)]
 mod tests {
     use super::{SessionContextProvider, SessionStateBuilder};
     use crate::common::assert_contains;
     use crate::config::ConfigOptions;
+    use crate::datasource::MemTable;
     use crate::datasource::empty::EmptyTable;
     use crate::datasource::provider_as_source;
-    use crate::datasource::MemTable;
     use crate::execution::context::SessionState;
     use crate::logical_expr::planner::ExprPlanner;
     use crate::logical_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
@@ -2054,13 +2182,13 @@ mod tests {
     use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_catalog::MemoryCatalogProviderList;
-    use datafusion_common::config::Dialect;
     use datafusion_common::DFSchema;
     use datafusion_common::Result;
+    use datafusion_common::config::Dialect;
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::Expr;
-    use datafusion_optimizer::optimizer::OptimizerRule;
     use datafusion_optimizer::Optimizer;
+    use datafusion_optimizer::optimizer::OptimizerRule;
     use datafusion_physical_plan::display::DisplayableExecutionPlan;
     use datafusion_sql::planner::{PlannerContext, SqlToRel};
     use std::collections::HashMap;
@@ -2097,6 +2225,36 @@ mod tests {
         assert!(sql_to_expr(&state).is_err())
     }
 
+    #[test]
+    #[cfg(feature = "sql")]
+    fn test_create_logical_expr_from_sql_expr() {
+        let state = SessionStateBuilder::new().with_default_features().build();
+
+        let provider = SessionContextProvider {
+            state: &state,
+            tables: HashMap::new(),
+        };
+
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let df_schema = DFSchema::try_from(schema).unwrap();
+        let dialect = state.config.options().sql_parser.dialect;
+        let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
+
+        for sql in ["[1,2,3]", "a > 10", "SUM(a)"] {
+            let sql_expr = state.sql_to_expr(sql, &dialect).unwrap();
+            let from_str = query
+                .sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
+                .unwrap();
+
+            let sql_expr_with_alias =
+                state.sql_to_expr_with_alias(sql, &dialect).unwrap();
+            let from_expr = state
+                .create_logical_expr_from_sql_expr(sql_expr_with_alias, &df_schema)
+                .unwrap();
+            assert_eq!(from_str, from_expr);
+        }
+    }
+
     #[test]
     fn test_from_existing() -> Result<()> {
         fn employee_batch() -> RecordBatch {
@@ -2137,13 +2295,15 @@ mod tests {
             .table_exist("employee");
         assert!(is_exist);
         let new_state = SessionStateBuilder::new_from_existing(session_state).build();
-        assert!(new_state
-            .catalog_list()
-            .catalog(default_catalog.as_str())
-            .unwrap()
-            .schema(default_schema.as_str())
-            .unwrap()
-            .table_exist("employee"));
+        assert!(
+            new_state
+                .catalog_list()
+                .catalog(default_catalog.as_str())
+                .unwrap()
+                .schema(default_schema.as_str())
+                .unwrap()
+                .table_exist("employee")
+        );
 
         // if `with_create_default_catalog_and_schema` is disabled, the new one shouldn't create default catalog and schema
         let disable_create_default =
@@ -2151,10 +2311,12 @@ mod tests {
         let without_default_state = SessionStateBuilder::new()
             .with_config(disable_create_default)
             .build();
-        assert!(without_default_state
-            .catalog_list()
-            .catalog(&default_catalog)
-            .is_none());
+        assert!(
+            without_default_state
+                .catalog_list()
+                .catalog(&default_catalog)
+                .is_none()
+        );
         let new_state =
             SessionStateBuilder::new_from_existing(without_default_state).build();
         assert!(new_state.catalog_list().catalog(&default_catalog).is_none());
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
index 62a575541a5d8..721710d4e057e 100644
--- a/datafusion/core/src/execution/session_state_defaults.rs
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -17,6 +17,7 @@
 
 use crate::catalog::listing_schema::ListingSchemaProvider;
 use crate::catalog::{CatalogProvider, TableProviderFactory};
+use crate::datasource::file_format::FileFormatFactory;
 use crate::datasource::file_format::arrow::ArrowFormatFactory;
 #[cfg(feature = "avro")]
 use crate::datasource::file_format::avro::AvroFormatFactory;
@@ -24,7 +25,6 @@ use crate::datasource::file_format::csv::CsvFormatFactory;
 use crate::datasource::file_format::json::JsonFormatFactory;
 #[cfg(feature = "parquet")]
 use crate::datasource::file_format::parquet::ParquetFormatFactory;
-use crate::datasource::file_format::FileFormatFactory;
 use crate::datasource::provider::DefaultTableFactory;
 use crate::execution::context::SessionState;
 #[cfg(feature = "nested_expressions")]
@@ -103,7 +103,7 @@ impl SessionStateDefaults {
 
     /// returns the list of default [`ScalarUDF`]s
     pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
-        #[cfg_attr(not(feature = "nested_expressions"), allow(unused_mut))]
+        #[cfg_attr(not(feature = "nested_expressions"), expect(unused_mut))]
         let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
 
         #[cfg(feature = "nested_expressions")]
@@ -155,7 +155,7 @@ impl SessionStateDefaults {
     }
 
     /// registers all the builtin array functions
-    #[cfg_attr(not(feature = "nested_expressions"), allow(unused_variables))]
+    #[cfg_attr(not(feature = "nested_expressions"), expect(unused_variables))]
     pub fn register_array_functions(state: &mut SessionState) {
         // register crate of array expressions (if enabled)
         #[cfg(feature = "nested_expressions")]
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 381dd5e9e8482..e83934a8e281d 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![deny(clippy::allow_attributes)]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -35,6 +36,9 @@
     )
 )]
 #![warn(missing_docs, clippy::needless_borrow)]
+// Use `allow` instead of `expect` for test configuration to explicitly
+// disable the lint for all test code rather than expecting violations
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! [DataFusion] is an extensible query engine written in Rust that
 //! uses [Apache Arrow] as its in-memory format. DataFusion's target users are
@@ -358,7 +362,7 @@
 //! [`TreeNode`]: datafusion_common::tree_node::TreeNode
 //! [`tree_node module`]: datafusion_expr::logical_plan::tree_node
 //! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier
-//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
 //!
 //! ### Physical Plans
 //!
@@ -647,7 +651,7 @@
 //!
 //! [Tokio]:  https://tokio.rs
 //! [`Runtime`]: tokio::runtime::Runtime
-//! [thread_pools example]: https://github.com/apache/datafusion/tree/main/datafusion-examples/examples/thread_pools.rs
+//! [thread_pools example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/thread_pools.rs
 //! [`task`]: tokio::task
 //! [Using Rustlang’s Async Tokio Runtime for CPU-Bound Tasks]: https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/
 //! [`RepartitionExec`]: physical_plan::repartition::RepartitionExec
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index c280b50a9f07a..9eaf1403e5757 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use crate::datasource::file_format::file_type_to_format;
 use crate::datasource::listing::ListingTableUrl;
 use crate::datasource::physical_plan::FileSinkConfig;
-use crate::datasource::{source_as_provider, DefaultTableSource};
+use crate::datasource::{DefaultTableSource, source_as_provider};
 use crate::error::{DataFusionError, Result};
 use crate::execution::context::{ExecutionProps, SessionState};
 use crate::logical_expr::utils::generate_sort_key;
@@ -52,29 +52,32 @@ use crate::physical_plan::union::UnionExec;
 use crate::physical_plan::unnest::UnnestExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use crate::physical_plan::{
-    displayable, windows, ExecutionPlan, ExecutionPlanProperties, InputOrderMode,
-    Partitioning, PhysicalExpr, WindowExpr,
+    ExecutionPlan, ExecutionPlanProperties, InputOrderMode, Partitioning, PhysicalExpr,
+    WindowExpr, displayable, windows,
 };
 use crate::schema_equivalence::schema_satisfied_by;
 
-use arrow::array::{builder::StringBuilder, RecordBatch};
+use arrow::array::{RecordBatch, builder::StringBuilder};
 use arrow::compute::SortOptions;
 use arrow::datatypes::Schema;
+use arrow_schema::Field;
 use datafusion_catalog::ScanArgs;
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::format::ExplainAnalyzeLevel;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
-use datafusion_common::TableReference;
 use datafusion_common::{
-    exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema,
-    ScalarValue,
+    DFSchema, ScalarValue, exec_err, internal_datafusion_err, internal_err, not_impl_err,
+    plan_err,
+};
+use datafusion_common::{
+    TableReference, assert_eq_or_internal_err, assert_or_internal_err,
 };
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::memory::MemorySourceConfig;
 use datafusion_expr::dml::{CopyTo, InsertOp};
 use datafusion_expr::expr::{
-    physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet,
-    NullTreatment, WindowFunction, WindowFunctionParams,
+    AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, NullTreatment,
+    WindowFunction, WindowFunctionParams, physical_name,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
@@ -87,7 +90,7 @@ use datafusion_expr::{
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr::{
-    create_physical_sort_exprs, LexOrdering, PhysicalSortExpr,
+    LexOrdering, PhysicalSortExpr, create_physical_sort_exprs,
 };
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
@@ -101,7 +104,7 @@ use datafusion_physical_plan::unnest::ListUnnest;
 use async_trait::async_trait;
 use datafusion_physical_plan::async_func::{AsyncFuncExec, AsyncMapper};
 use futures::{StreamExt, TryStreamExt};
-use itertools::{multiunzip, Itertools};
+use itertools::{Itertools, multiunzip};
 use log::debug;
 use tokio::sync::Mutex;
 
@@ -347,11 +350,11 @@ impl DefaultPhysicalPlanner {
             .flatten()
             .collect::<Vec<_>>();
         // Ideally this never happens if we have a valid LogicalPlan tree
-        if outputs.len() != 1 {
-            return internal_err!(
-                "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected"
-            );
-        }
+        assert_eq_or_internal_err!(
+            outputs.len(),
+            1,
+            "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected"
+        );
         let plan = outputs.pop().unwrap();
         Ok(plan)
     }
@@ -496,7 +499,7 @@ impl DefaultPhysicalPlanner {
                 output_schema,
             }) => {
                 let output_schema = Arc::clone(output_schema.inner());
-                self.plan_describe(Arc::clone(schema), output_schema)?
+                self.plan_describe(&Arc::clone(schema), output_schema)?
             }
 
             // 1 Child
@@ -525,12 +528,22 @@ impl DefaultPhysicalPlanner {
 
                 let keep_partition_by_columns = match source_option_tuples
                     .get("execution.keep_partition_by_columns")
-                    .map(|v| v.trim()) {
-                    None => session_state.config().options().execution.keep_partition_by_columns,
+                    .map(|v| v.trim())
+                {
+                    None => {
+                        session_state
+                            .config()
+                            .options()
+                            .execution
+                            .keep_partition_by_columns
+                    }
                     Some("true") => true,
                     Some("false") => false,
-                    Some(value) =>
-                        return Err(DataFusionError::Configuration(format!("provided value for 'execution.keep_partition_by_columns' was not recognized: \"{value}\""))),
+                    Some(value) => {
+                        return Err(DataFusionError::Configuration(format!(
+                            "provided value for 'execution.keep_partition_by_columns' was not recognized: \"{value}\""
+                        )));
+                    }
                 };
 
                 let sink_format = file_type_to_format(file_type)?
@@ -588,17 +601,18 @@ impl DefaultPhysicalPlanner {
                 }
             }
             LogicalPlan::Window(Window { window_expr, .. }) => {
-                if window_expr.is_empty() {
-                    return internal_err!("Impossibly got empty window expression");
-                }
+                assert_or_internal_err!(
+                    !window_expr.is_empty(),
+                    "Impossibly got empty window expression"
+                );
 
                 let input_exec = children.one()?;
 
                 let get_sort_keys = |expr: &Expr| match expr {
                     Expr::WindowFunction(window_fun) => {
                         let WindowFunctionParams {
-                            ref partition_by,
-                            ref order_by,
+                            partition_by,
+                            order_by,
                             ..
                         } = &window_fun.as_ref().params;
                         generate_sort_key(partition_by, order_by)
@@ -608,8 +622,8 @@ impl DefaultPhysicalPlanner {
                         match &**expr {
                             Expr::WindowFunction(window_fun) => {
                                 let WindowFunctionParams {
-                                    ref partition_by,
-                                    ref order_by,
+                                    partition_by,
+                                    order_by,
                                     ..
                                 } = &window_fun.as_ref().params;
                                 generate_sort_key(partition_by, order_by)
@@ -622,11 +636,11 @@ impl DefaultPhysicalPlanner {
                 let sort_keys = get_sort_keys(&window_expr[0])?;
                 if window_expr.len() > 1 {
                     debug_assert!(
-                            window_expr[1..]
-                                .iter()
-                                .all(|expr| get_sort_keys(expr).unwrap() == sort_keys),
-                            "all window expressions shall have the same sort keys, as guaranteed by logical planning"
-                        );
+                        window_expr[1..]
+                            .iter()
+                            .all(|expr| get_sort_keys(expr).unwrap() == sort_keys),
+                        "all window expressions shall have the same sort keys, as guaranteed by logical planning"
+                    );
                 }
 
                 let logical_schema = node.schema();
@@ -683,6 +697,17 @@ impl DefaultPhysicalPlanner {
                     )
                 {
                     let mut differences = Vec::new();
+
+                    if physical_input_schema.metadata()
+                        != physical_input_schema_from_logical.metadata()
+                    {
+                        differences.push(format!(
+                            "schema metadata differs: (physical) {:?} vs (logical) {:?}",
+                            physical_input_schema.metadata(),
+                            physical_input_schema_from_logical.metadata()
+                        ));
+                    }
+
                     if physical_input_schema.fields().len()
                         != physical_input_schema_from_logical.fields().len()
                     {
@@ -712,11 +737,20 @@ impl DefaultPhysicalPlanner {
                         if physical_field.is_nullable() && !logical_field.is_nullable() {
                             differences.push(format!("field nullability at index {} [{}]: (physical) {} vs (logical) {}", i, physical_field.name(), physical_field.is_nullable(), logical_field.is_nullable()));
                         }
+                        if physical_field.metadata() != logical_field.metadata() {
+                            differences.push(format!(
+                                "field metadata at index {} [{}]: (physical) {:?} vs (logical) {:?}",
+                                i,
+                                physical_field.name(),
+                                physical_field.metadata(),
+                                logical_field.metadata()
+                            ));
+                        }
                     }
-                    return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences
-                        .iter()
-                        .map(|s| format!("\n\t- {s}"))
-                        .join(""));
+                    return internal_err!(
+                        "Physical input schema should be the same as the one converted from logical input schema. Differences: {}",
+                        differences.iter().map(|s| format!("\n\t- {s}")).join("")
+                    );
                 }
 
                 let groups = self.create_grouping_physical_expr(
@@ -776,7 +810,7 @@ impl DefaultPhysicalPlanner {
                         _ => {
                             return internal_err!(
                                 "Unexpected result from try_plan_async_exprs"
-                            )
+                            );
                         }
                     }
                 }
@@ -850,6 +884,7 @@ impl DefaultPhysicalPlanner {
                 )? {
                     PlanAsyncExpr::Sync(PlannedExprResult::Expr(runtime_expr)) => {
                         FilterExec::try_new(Arc::clone(&runtime_expr[0]), physical_input)?
+                            .with_batch_size(session_state.config().batch_size())?
                     }
                     PlanAsyncExpr::Async(
                         async_map,
@@ -868,11 +903,12 @@ impl DefaultPhysicalPlanner {
                         .with_projection(Some(
                             (0..input.schema().fields().len()).collect(),
                         ))?
+                        .with_batch_size(session_state.config().batch_size())?
                     }
                     _ => {
                         return internal_err!(
                             "Unexpected result from try_plan_async_exprs"
-                        )
+                        );
                     }
                 };
 
@@ -1207,7 +1243,7 @@ impl DefaultPhysicalPlanner {
                         let filter_df_fields = filter_df_fields
                             .into_iter()
                             .map(|(qualifier, field)| {
-                                (qualifier.cloned(), Arc::new(field.clone()))
+                                (qualifier.cloned(), Arc::clone(field))
                             })
                             .collect();
 
@@ -1463,19 +1499,24 @@ impl DefaultPhysicalPlanner {
                 }
 
                 let plan = match maybe_plan {
-                        Some(v) => Ok(v),
-                        _ => plan_err!("No installed planner was able to convert the custom node to an execution plan: {:?}", node)
-                    }?;
+                    Some(v) => Ok(v),
+                    _ => plan_err!(
+                        "No installed planner was able to convert the custom node to an execution plan: {:?}",
+                        node
+                    ),
+                }?;
 
                 // Ensure the ExecutionPlan's schema matches the
                 // declared logical schema to catch and warn about
                 // logic errors when creating user defined plans.
                 if !node.schema().matches_arrow_schema(&plan.schema()) {
                     return plan_err!(
-                            "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \
+                        "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \
                             LogicalPlan schema: {:?}, ExecutionPlan schema: {:?}",
-                            node, node.schema(), plan.schema()
-                        );
+                        node,
+                        node.schema(),
+                        plan.schema()
+                    );
                 } else {
                     plan
                 }
@@ -1502,17 +1543,17 @@ impl DefaultPhysicalPlanner {
             LogicalPlan::Explain(_) => {
                 return internal_err!(
                     "Unsupported logical plan: Explain must be root of the plan"
-                )
+                );
             }
             LogicalPlan::Distinct(_) => {
                 return internal_err!(
                     "Unsupported logical plan: Distinct should be replaced to Aggregate"
-                )
+                );
             }
             LogicalPlan::Analyze(_) => {
                 return internal_err!(
                     "Unsupported logical plan: Analyze must be root of the plan"
-                )
+                );
             }
         };
         Ok(exec_node)
@@ -1556,7 +1597,8 @@ impl DefaultPhysicalPlanner {
             }
         } else if group_expr.is_empty() {
             // No GROUP BY clause - create empty PhysicalGroupBy
-            Ok(PhysicalGroupBy::new(vec![], vec![], vec![]))
+            // no expressions, no null expressions and no grouping expressions
+            Ok(PhysicalGroupBy::new(vec![], vec![], vec![], false))
         } else {
             Ok(PhysicalGroupBy::new_single(
                 group_expr
@@ -1628,6 +1670,7 @@ fn merge_grouping_set_physical_expr(
         grouping_set_expr,
         null_exprs,
         merged_sets,
+        true,
     ))
 }
 
@@ -1670,7 +1713,7 @@ fn create_cube_physical_expr(
         }
     }
 
-    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))
+    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))
 }
 
 /// Expand and align a ROLLUP expression. This is a special case of GROUPING SETS
@@ -1715,7 +1758,7 @@ fn create_rollup_physical_expr(
         groups.push(group)
     }
 
-    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))
+    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))
 }
 
 /// For a given logical expr, get a properly typed NULL ScalarValue physical expression
@@ -1752,11 +1795,11 @@ fn qualify_join_schema_sides(
     let join_fields = join_schema.fields();
 
     // Validate lengths
-    if join_fields.len() != left_fields.len() + right_fields.len() {
-        return internal_err!(
-            "Join schema field count must match left and right field count."
-        );
-    }
+    assert_eq_or_internal_err!(
+        join_fields.len(),
+        left_fields.len() + right_fields.len(),
+        "Join schema field count must match left and right field count."
+    );
 
     // Validate field names match
     for (i, (field, expected)) in join_fields
@@ -1764,14 +1807,12 @@ fn qualify_join_schema_sides(
         .zip(left_fields.iter().chain(right_fields.iter()))
         .enumerate()
     {
-        if field.name() != expected.name() {
-            return internal_err!(
-                "Field name mismatch at index {}: expected '{}', found '{}'",
-                i,
-                expected.name(),
-                field.name()
-            );
-        }
+        assert_eq_or_internal_err!(
+            field.name(),
+            expected.name(),
+            "Field name mismatch at index {}",
+            i
+        );
     }
 
     // qualify sides
@@ -1858,9 +1899,10 @@ pub fn create_window_expr_with_name(
 
             if !is_window_frame_bound_valid(window_frame) {
                 return plan_err!(
-                        "Invalid window frame: start bound ({}) cannot be larger than end bound ({})",
-                        window_frame.start_bound, window_frame.end_bound
-                    );
+                    "Invalid window frame: start bound ({}) cannot be larger than end bound ({})",
+                    window_frame.start_bound,
+                    window_frame.end_bound
+                );
             }
 
             let window_frame = Arc::new(window_frame.clone());
@@ -2243,6 +2285,7 @@ impl DefaultPhysicalPlanner {
 
     /// Optimize a physical plan by applying each physical optimizer,
     /// calling observer(plan, optimizer after each one)
+    #[expect(clippy::needless_pass_by_value)]
     pub fn optimize_physical_plan<F>(
         &self,
         plan: Arc<dyn ExecutionPlan>,
@@ -2277,7 +2320,7 @@ impl DefaultPhysicalPlanner {
 
             // This only checks the schema in release build, and performs additional checks in debug mode.
             OptimizationInvariantChecker::new(optimizer)
-                .check(&new_plan, before_schema)?;
+                .check(&new_plan, &before_schema)?;
 
             debug!(
                 "Optimized physical plan by {}:\n{}\n",
@@ -2310,7 +2353,7 @@ impl DefaultPhysicalPlanner {
     // return an record_batch which describes a table's schema.
     fn plan_describe(
         &self,
-        table_schema: Arc<Schema>,
+        table_schema: &Arc<Schema>,
         output_schema: Arc<Schema>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let mut column_names = StringBuilder::new();
@@ -2513,11 +2556,14 @@ impl<'a> OptimizationInvariantChecker<'a> {
     pub fn check(
         &mut self,
         plan: &Arc<dyn ExecutionPlan>,
-        previous_schema: Arc<Schema>,
+        previous_schema: &Arc<Schema>,
     ) -> Result<()> {
         // if the rule is not permitted to change the schema, confirm that it did not change.
-        if self.rule.schema_check() && plan.schema() != previous_schema {
-            internal_err!("PhysicalOptimizer rule '{}' failed. Schema mismatch. Expected original schema: {:?}, got new schema: {:?}",
+        if self.rule.schema_check()
+            && !is_allowed_schema_change(previous_schema.as_ref(), plan.schema().as_ref())
+        {
+            internal_err!(
+                "PhysicalOptimizer rule '{}' failed. Schema mismatch. Expected original schema: {:?}, got new schema: {:?}",
                 self.rule.name(),
                 previous_schema,
                 plan.schema()
@@ -2532,6 +2578,38 @@ impl<'a> OptimizationInvariantChecker<'a> {
     }
 }
 
+/// Checks if the change from `old` schema to `new` is allowed or not.
+///
+/// The current implementation only allows nullability of individual fields to change
+/// from 'nullable' to 'not nullable'. This can happen due to physical expressions knowing
+/// more about their null-ness than their logical counterparts.
+/// This change is allowed because for any field the non-nullable domain `F` is a strict subset
+/// of the nullable domain `F ∪ { NULL }`. A physical schema that guarantees a stricter subset
+/// of values will not violate any assumptions made based on the less strict schema.
+fn is_allowed_schema_change(old: &Schema, new: &Schema) -> bool {
+    if new.metadata != old.metadata {
+        return false;
+    }
+
+    if new.fields.len() != old.fields.len() {
+        return false;
+    }
+
+    let new_fields = new.fields.iter().map(|f| f.as_ref());
+    let old_fields = old.fields.iter().map(|f| f.as_ref());
+    old_fields
+        .zip(new_fields)
+        .all(|(old, new)| is_allowed_field_change(old, new))
+}
+
+fn is_allowed_field_change(old_field: &Field, new_field: &Field) -> bool {
+    new_field.name() == old_field.name()
+        && new_field.data_type() == old_field.data_type()
+        && new_field.metadata() == old_field.metadata()
+        && (new_field.is_nullable() == old_field.is_nullable()
+            || !new_field.is_nullable())
+}
+
 impl<'n> TreeNodeVisitor<'n> for OptimizationInvariantChecker<'_> {
     type Node = Arc<dyn ExecutionPlan>;
 
@@ -2580,11 +2658,11 @@ mod tests {
     use std::ops::{BitAnd, Not};
 
     use super::*;
-    use crate::datasource::file_format::options::CsvReadOptions;
     use crate::datasource::MemTable;
+    use crate::datasource::file_format::options::CsvReadOptions;
     use crate::physical_plan::{
-        expressions, DisplayAs, DisplayFormatType, PlanProperties,
-        SendableRecordBatchStream,
+        DisplayAs, DisplayFormatType, PlanProperties, SendableRecordBatchStream,
+        expressions,
     };
     use crate::prelude::{SessionConfig, SessionContext};
     use crate::test_util::{scan_empty, scan_empty_with_partitions};
@@ -2595,12 +2673,12 @@ mod tests {
     use arrow_schema::SchemaRef;
     use datafusion_common::config::ConfigOptions;
     use datafusion_common::{
-        assert_contains, DFSchemaRef, TableReference, ToDFSchema as _,
+        DFSchemaRef, TableReference, ToDFSchema as _, assert_contains,
     };
-    use datafusion_execution::runtime_env::RuntimeEnv;
     use datafusion_execution::TaskContext;
+    use datafusion_execution::runtime_env::RuntimeEnv;
     use datafusion_expr::builder::subquery_alias;
-    use datafusion_expr::{col, lit, LogicalPlanBuilder, UserDefinedLogicalNodeCore};
+    use datafusion_expr::{LogicalPlanBuilder, UserDefinedLogicalNodeCore, col, lit};
     use datafusion_functions_aggregate::count::count_all;
     use datafusion_functions_aggregate::expr_fn::sum;
     use datafusion_physical_expr::EquivalenceProperties;
@@ -2773,6 +2851,7 @@ mod tests {
                         true,
                     ],
                 ],
+                has_grouping_set: true,
             },
         )
         "#);
@@ -2883,6 +2962,7 @@ mod tests {
                         false,
                     ],
                 ],
+                has_grouping_set: true,
             },
         )
         "#);
@@ -3000,8 +3080,7 @@ mod tests {
             .create_physical_plan(&logical_plan, &session_state)
             .await;
 
-        let expected_error =
-            "No installed planner was able to convert the custom node to an execution plan: NoOp";
+        let expected_error = "No installed planner was able to convert the custom node to an execution plan: NoOp";
         match plan {
             Ok(_) => panic!("Expected planning failure"),
             Err(e) => assert!(
@@ -3067,7 +3146,7 @@ mod tests {
 
         assert_contains!(
             &e,
-            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct("foo": Boolean), Utf8]"#
+            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct("foo": non-null Boolean), Utf8]"#
         );
 
         Ok(())
@@ -3258,18 +3337,27 @@ mod tests {
         if let Some(plan) = plan.as_any().downcast_ref::<ExplainExec>() {
             let stringified_plans = plan.stringified_plans();
             assert!(stringified_plans.len() >= 4);
-            assert!(stringified_plans
-                .iter()
-                .any(|p| matches!(p.plan_type, PlanType::FinalLogicalPlan)));
-            assert!(stringified_plans
-                .iter()
-                .any(|p| matches!(p.plan_type, PlanType::InitialPhysicalPlan)));
-            assert!(stringified_plans
-                .iter()
-                .any(|p| matches!(p.plan_type, PlanType::OptimizedPhysicalPlan { .. })));
-            assert!(stringified_plans
-                .iter()
-                .any(|p| matches!(p.plan_type, PlanType::FinalPhysicalPlan)));
+            assert!(
+                stringified_plans
+                    .iter()
+                    .any(|p| matches!(p.plan_type, PlanType::FinalLogicalPlan))
+            );
+            assert!(
+                stringified_plans
+                    .iter()
+                    .any(|p| matches!(p.plan_type, PlanType::InitialPhysicalPlan))
+            );
+            assert!(
+                stringified_plans.iter().any(|p| matches!(
+                    p.plan_type,
+                    PlanType::OptimizedPhysicalPlan { .. }
+                ))
+            );
+            assert!(
+                stringified_plans
+                    .iter()
+                    .any(|p| matches!(p.plan_type, PlanType::FinalPhysicalPlan))
+            );
         } else {
             panic!(
                 "Plan was not an explain plan: {}",
@@ -3636,8 +3724,12 @@ digraph {
         }
         fn check_invariants(&self, check: InvariantLevel) -> Result<()> {
             match check {
-                InvariantLevel::Always => plan_err!("extension node failed it's user-defined always-invariant check"),
-                InvariantLevel::Executable => panic!("the OptimizationInvariantChecker should not be checking for executableness"),
+                InvariantLevel::Always => plan_err!(
+                    "extension node failed it's user-defined always-invariant check"
+                ),
+                InvariantLevel::Executable => panic!(
+                    "the OptimizationInvariantChecker should not be checking for executableness"
+                ),
             }
         }
         fn schema(&self) -> SchemaRef {
@@ -3706,24 +3798,26 @@ digraph {
 
         // Test: check should pass with same schema
         let equal_schema = ok_plan.schema();
-        OptimizationInvariantChecker::new(&rule).check(&ok_plan, equal_schema)?;
+        OptimizationInvariantChecker::new(&rule).check(&ok_plan, &equal_schema)?;
 
         // Test: should fail with schema changed
         let different_schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, false)]));
         let expected_err = OptimizationInvariantChecker::new(&rule)
-            .check(&ok_plan, different_schema)
+            .check(&ok_plan, &different_schema)
             .unwrap_err();
         assert!(expected_err.to_string().contains("PhysicalOptimizer rule 'OptimizerRuleWithSchemaCheck' failed. Schema mismatch. Expected original schema"));
 
         // Test: should fail when extension node fails it's own invariant check
         let failing_node: Arc<dyn ExecutionPlan> = Arc::new(InvariantFailsExtensionNode);
         let expected_err = OptimizationInvariantChecker::new(&rule)
-            .check(&failing_node, ok_plan.schema())
+            .check(&failing_node, &ok_plan.schema())
             .unwrap_err();
-        assert!(expected_err
-            .to_string()
-            .contains("extension node failed it's user-defined always-invariant check"));
+        assert!(
+            expected_err.to_string().contains(
+                "extension node failed it's user-defined always-invariant check"
+            )
+        );
 
         // Test: should fail when descendent extension node fails
         let failing_node: Arc<dyn ExecutionPlan> = Arc::new(InvariantFailsExtensionNode);
@@ -3732,11 +3826,13 @@ digraph {
             Arc::clone(&child),
         ])?;
         let expected_err = OptimizationInvariantChecker::new(&rule)
-            .check(&invalid_plan, ok_plan.schema())
+            .check(&invalid_plan, &ok_plan.schema())
             .unwrap_err();
-        assert!(expected_err
-            .to_string()
-            .contains("extension node failed it's user-defined always-invariant check"));
+        assert!(
+            expected_err.to_string().contains(
+                "extension node failed it's user-defined always-invariant check"
+            )
+        );
 
         Ok(())
     }
@@ -3879,4 +3975,229 @@ digraph {
 
         Ok(())
     }
+
+    // --- Tests for aggregate schema mismatch error messages ---
+
+    use crate::catalog::TableProvider;
+    use datafusion_catalog::Session;
+    use datafusion_expr::TableType;
+
+    /// A TableProvider that returns schemas for logical planning vs physical planning.
+    /// Used to test schema mismatch error messages.
+    #[derive(Debug)]
+    struct MockSchemaTableProvider {
+        logical_schema: SchemaRef,
+        physical_schema: SchemaRef,
+    }
+
+    #[async_trait]
+    impl TableProvider for MockSchemaTableProvider {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn schema(&self) -> SchemaRef {
+            Arc::clone(&self.logical_schema)
+        }
+
+        fn table_type(&self) -> TableType {
+            TableType::Base
+        }
+
+        async fn scan(
+            &self,
+            _state: &dyn Session,
+            _projection: Option<&Vec<usize>>,
+            _filters: &[Expr],
+            _limit: Option<usize>,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            Ok(Arc::new(NoOpExecutionPlan::new(Arc::clone(
+                &self.physical_schema,
+            ))))
+        }
+    }
+
+    /// Attempts to plan a query with potentially mismatched schemas.
+    async fn plan_with_schemas(
+        logical_schema: SchemaRef,
+        physical_schema: SchemaRef,
+        query: &str,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let provider = MockSchemaTableProvider {
+            logical_schema,
+            physical_schema,
+        };
+        let ctx = SessionContext::new();
+        ctx.register_table("test", Arc::new(provider)).unwrap();
+
+        ctx.sql(query).await.unwrap().create_physical_plan().await
+    }
+
+    #[tokio::test]
+    // When schemas match, planning proceeds past the schema_satisfied_by check.
+    // It then panics on unimplemented error in NoOpExecutionPlan.
+    #[should_panic(expected = "NoOpExecutionPlan")]
+    async fn test_aggregate_schema_check_passes() {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+
+        plan_with_schemas(
+            Arc::clone(&schema),
+            schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_metadata() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema = Arc::new(
+            Schema::new(vec![Field::new("c1", DataType::Int32, false)])
+                .with_metadata(HashMap::from([("key".into(), "value".into())])),
+        );
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "schema metadata differs");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_field_count() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema = Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Int32, false),
+            Field::new("c2", DataType::Int32, false),
+        ]));
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "Different number of fields");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_field_name() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema = Arc::new(Schema::new(vec![Field::new(
+            "different_name",
+            DataType::Int32,
+            false,
+        )]));
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "field name at index");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_field_type() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int64, false)]));
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "field data type at index");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_field_nullability() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "field nullability at index");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_field_metadata() {
+        let logical_schema =
+            Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
+        let physical_schema = Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Int32, false)
+                .with_metadata(HashMap::from([("key".into(), "value".into())])),
+        ]));
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        assert_contains!(err.to_string(), "field metadata at index");
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_schema_mismatch_multiple() {
+        let logical_schema = Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Int32, false),
+            Field::new("c2", DataType::Utf8, false),
+        ]));
+        let physical_schema = Arc::new(
+            Schema::new(vec![
+                Field::new("c1", DataType::Int64, true)
+                    .with_metadata(HashMap::from([("key".into(), "value".into())])),
+                Field::new("c2", DataType::Utf8, false),
+            ])
+            .with_metadata(HashMap::from([(
+                "schema_key".into(),
+                "schema_value".into(),
+            )])),
+        );
+
+        let err = plan_with_schemas(
+            logical_schema,
+            physical_schema,
+            "SELECT count(*) FROM test GROUP BY c1",
+        )
+        .await
+        .unwrap_err();
+
+        // Verify all applicable error fragments are present
+        let err_str = err.to_string();
+        assert_contains!(&err_str, "schema metadata differs");
+        assert_contains!(&err_str, "field data type at index");
+        assert_contains!(&err_str, "field nullability at index");
+        assert_contains!(&err_str, "field metadata at index");
+    }
 }
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index d723620d32323..50e4a2649c923 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -34,10 +34,10 @@ pub use crate::execution::options::{
 
 pub use datafusion_common::Column;
 pub use datafusion_expr::{
+    Expr,
     expr_fn::*,
     lit, lit_timestamp_nano,
     logical_plan::{JoinType, Partitioning},
-    Expr,
 };
 pub use datafusion_functions::expr_fn::*;
 #[cfg(feature = "nested_expressions")]
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 68f83e7f1f115..717182f1d3d5b 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -25,9 +25,9 @@ use std::io::{BufReader, BufWriter};
 use std::path::Path;
 use std::sync::Arc;
 
+use crate::datasource::file_format::FileFormat;
 use crate::datasource::file_format::csv::CsvFormat;
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
-use crate::datasource::file_format::FileFormat;
 
 use crate::datasource::physical_plan::CsvSource;
 use crate::datasource::{MemTable, TableProvider};
@@ -35,28 +35,31 @@ use crate::error::Result;
 use crate::logical_expr::LogicalPlan;
 use crate::test_util::{aggr_test_schema, arrow_test_data};
 
+use datafusion_common::config::CsvOptions;
+
 use arrow::array::{self, Array, ArrayRef, Decimal128Builder, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 #[cfg(feature = "compression")]
 use datafusion_common::DataFusionError;
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::source::DataSourceExec;
 
-#[cfg(feature = "compression")]
-use bzip2::write::BzEncoder;
 #[cfg(feature = "compression")]
 use bzip2::Compression as BzCompression;
+#[cfg(feature = "compression")]
+use bzip2::write::BzEncoder;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource_csv::partitioned_csv_config;
 #[cfg(feature = "compression")]
+use flate2::Compression as GzCompression;
+#[cfg(feature = "compression")]
 use flate2::write::GzEncoder;
 #[cfg(feature = "compression")]
-use flate2::Compression as GzCompression;
+use liblzma::write::XzEncoder;
 use object_store::local_unpartitioned_file;
 #[cfg(feature = "compression")]
-use xz2::write::XzEncoder;
-#[cfg(feature = "compression")]
 use zstd::Encoder as ZstdEncoder;
 
 pub fn create_table_dual() -> Arc<dyn TableProvider> {
@@ -84,17 +87,26 @@ pub fn scan_partitioned_csv(
     let schema = aggr_test_schema();
     let filename = "aggregate_test_100.csv";
     let path = format!("{}/csv", arrow_test_data());
+    let csv_format: Arc<dyn FileFormat> = Arc::new(CsvFormat::default());
+
     let file_groups = partitioned_file_groups(
         path.as_str(),
         filename,
         partitions,
-        Arc::new(CsvFormat::default()),
+        &csv_format,
         FileCompressionType::UNCOMPRESSED,
         work_dir,
     )?;
-    let source = Arc::new(CsvSource::new(true, b'"', b'"'));
+    let options = CsvOptions {
+        has_header: Some(true),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
+    let table_schema = TableSchema::from_file_schema(schema);
+    let source = Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
     let config =
-        FileScanConfigBuilder::from(partitioned_csv_config(schema, file_groups, source))
+        FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
             .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
             .build();
     Ok(DataSourceExec::from_data_source(config))
@@ -105,7 +117,7 @@ pub fn partitioned_file_groups(
     path: &str,
     filename: &str,
     partitions: usize,
-    file_format: Arc<dyn FileFormat>,
+    file_format: &Arc<dyn FileFormat>,
     file_compression_type: FileCompressionType,
     work_dir: &Path,
 ) -> Result<Vec<FileGroup>> {
@@ -189,7 +201,7 @@ pub fn partitioned_file_groups(
         .collect::<Vec<_>>())
 }
 
-pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
+pub fn assert_fields_eq(plan: &LogicalPlan, expected: &[&str]) {
     let actual: Vec<String> = plan
         .schema()
         .fields()
diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs
index d31c2719973ec..a0438e3d74ab2 100644
--- a/datafusion/core/src/test/object_store.rs
+++ b/datafusion/core/src/test/object_store.rs
@@ -20,20 +20,20 @@
 use crate::{
     execution::{context::SessionState, session_state::SessionStateBuilder},
     object_store::{
-        memory::InMemory, path::Path, Error, GetOptions, GetResult, ListResult,
-        MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions,
-        PutPayload, PutResult,
+        Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+        ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult,
+        memory::InMemory, path::Path,
     },
     prelude::SessionContext,
 };
-use futures::{stream::BoxStream, FutureExt};
+use futures::{FutureExt, stream::BoxStream};
 use std::{
     fmt::{Debug, Display, Formatter},
     sync::Arc,
 };
 use tokio::{
     sync::Barrier,
-    time::{timeout, Duration},
+    time::{Duration, timeout},
 };
 use url::Url;
 
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 7149c5b0bd8ca..466ee38a426fd 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -25,6 +25,7 @@ pub mod csv;
 use futures::Stream;
 use std::any::Any;
 use std::collections::HashMap;
+use std::fmt::Formatter;
 use std::fs::File;
 use std::io::Write;
 use std::path::Path;
@@ -36,16 +37,20 @@ use crate::dataframe::DataFrame;
 use crate::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
 use crate::datasource::{empty::EmptyTable, provider_as_source};
 use crate::error::Result;
+use crate::execution::session_state::CacheFactory;
 use crate::logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE};
 use crate::physical_plan::ExecutionPlan;
 use crate::prelude::{CsvReadOptions, SessionContext};
 
-use crate::execution::SendableRecordBatchStream;
+use crate::execution::{SendableRecordBatchStream, SessionState, SessionStateBuilder};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_catalog::Session;
-use datafusion_common::TableReference;
-use datafusion_expr::{CreateExternalTable, Expr, SortExpr, TableType};
+use datafusion_common::{DFSchemaRef, TableReference};
+use datafusion_expr::{
+    CreateExternalTable, Expr, LogicalPlan, SortExpr, TableType,
+    UserDefinedLogicalNodeCore,
+};
 use std::pin::Pin;
 
 use async_trait::async_trait;
@@ -282,3 +287,67 @@ impl RecordBatchStream for BoundedStream {
         self.record_batch.schema()
     }
 }
+
+#[derive(Hash, Eq, PartialEq, PartialOrd, Debug)]
+struct CacheNode {
+    input: LogicalPlan,
+}
+
+impl UserDefinedLogicalNodeCore for CacheNode {
+    fn name(&self) -> &str {
+        "CacheNode"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.input.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "CacheNode")
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        assert_eq!(inputs.len(), 1, "input size inconsistent");
+        Ok(Self {
+            input: inputs[0].clone(),
+        })
+    }
+}
+
+#[derive(Debug)]
+struct TestCacheFactory {}
+
+impl CacheFactory for TestCacheFactory {
+    fn create(
+        &self,
+        plan: LogicalPlan,
+        _session_state: &SessionState,
+    ) -> Result<LogicalPlan> {
+        Ok(LogicalPlan::Extension(datafusion_expr::Extension {
+            node: Arc::new(CacheNode { input: plan }),
+        }))
+    }
+}
+
+/// Create a test table registered to a session context with an associated cache factory
+pub async fn test_table_with_cache_factory() -> Result<DataFrame> {
+    let session_state = SessionStateBuilder::new()
+        .with_cache_factory(Some(Arc::new(TestCacheFactory {})))
+        .build();
+    let ctx = SessionContext::new_with_state(session_state);
+    let name = "aggregate_test_100";
+    register_aggregate_csv(&ctx, name).await?;
+    ctx.table(name).await
+}
diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs
index 203d9e97d2a8c..44e884c23a681 100644
--- a/datafusion/core/src/test_util/parquet.rs
+++ b/datafusion/core/src/test_util/parquet.rs
@@ -32,17 +32,15 @@ use crate::logical_expr::execution_props::ExecutionProps;
 use crate::logical_expr::simplify::SimplifyContext;
 use crate::optimizer::simplify_expressions::ExprSimplifier;
 use crate::physical_expr::create_physical_expr;
+use crate::physical_plan::ExecutionPlan;
 use crate::physical_plan::filter::FilterExec;
 use crate::physical_plan::metrics::MetricsSet;
-use crate::physical_plan::ExecutionPlan;
 use crate::prelude::{Expr, SessionConfig, SessionContext};
 
-use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
-use datafusion_datasource::TableSchema;
-use object_store::path::Path;
 use object_store::ObjectMeta;
+use object_store::path::Path;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::WriterProperties;
 
@@ -157,20 +155,21 @@ impl TestParquetFile {
         maybe_filter: Option<Expr>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let parquet_options = ctx.copied_table_options().parquet;
-        let source = Arc::new(ParquetSource::new(parquet_options.clone()));
-        let scan_config_builder = FileScanConfigBuilder::new(
-            self.object_store_url.clone(),
-            Arc::clone(&self.schema),
-            source,
-        )
-        .with_file(PartitionedFile {
-            object_meta: self.object_meta.clone(),
-            partition_values: vec![],
-            range: None,
-            statistics: None,
-            extensions: None,
-            metadata_size_hint: None,
-        });
+        let source = Arc::new(
+            ParquetSource::new(Arc::clone(&self.schema))
+                .with_table_parquet_options(parquet_options.clone()),
+        );
+        let scan_config_builder =
+            FileScanConfigBuilder::new(self.object_store_url.clone(), source).with_file(
+                PartitionedFile {
+                    object_meta: self.object_meta.clone(),
+                    partition_values: vec![],
+                    range: None,
+                    statistics: None,
+                    extensions: None,
+                    metadata_size_hint: None,
+                },
+            );
 
         let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?;
 
@@ -184,10 +183,10 @@ impl TestParquetFile {
                 create_physical_expr(&filter, &df_schema, &ExecutionProps::default())?;
 
             let source = Arc::new(
-                ParquetSource::new(parquet_options)
+                ParquetSource::new(Arc::clone(&self.schema))
+                    .with_table_parquet_options(parquet_options)
                     .with_predicate(Arc::clone(&physical_filter_expr)),
-            )
-            .with_schema(TableSchema::from_file_schema(Arc::clone(&self.schema)));
+            );
             let config = scan_config_builder.with_source(source).build();
             let parquet_exec = DataSourceExec::from_data_source(config);
 
@@ -204,13 +203,12 @@ impl TestParquetFile {
     /// Recursively searches for DataSourceExec and returns the metrics
     /// on the first one it finds
     pub fn parquet_metrics(plan: &Arc<dyn ExecutionPlan>) -> Option<MetricsSet> {
-        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>() {
-            if data_source_exec
+        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>()
+            && data_source_exec
                 .downcast_to_file_source::<ParquetSource>()
                 .is_some()
-            {
-                return data_source_exec.metrics();
-            }
+        {
+            return data_source_exec.metrics();
         }
 
         for child in plan.children() {
diff --git a/datafusion/core/tests/catalog/memory.rs b/datafusion/core/tests/catalog/memory.rs
index 06ed141b2e8bd..5258f3bf97574 100644
--- a/datafusion/core/tests/catalog/memory.rs
+++ b/datafusion/core/tests/catalog/memory.rs
@@ -116,10 +116,12 @@ async fn test_mem_provider() {
     assert!(provider.deregister_table(table_name).unwrap().is_none());
     let test_table = EmptyTable::new(Arc::new(Schema::empty()));
     // register table successfully
-    assert!(provider
-        .register_table(table_name.to_string(), Arc::new(test_table))
-        .unwrap()
-        .is_none());
+    assert!(
+        provider
+            .register_table(table_name.to_string(), Arc::new(test_table))
+            .unwrap()
+            .is_none()
+    );
     assert!(provider.table_exist(table_name));
     let other_table = EmptyTable::new(Arc::new(Schema::empty()));
     let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
diff --git a/datafusion/core/tests/catalog_listing/mod.rs b/datafusion/core/tests/catalog_listing/mod.rs
new file mode 100644
index 0000000000000..cb6cac4fb0672
--- /dev/null
+++ b/datafusion/core/tests/catalog_listing/mod.rs
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod pruned_partition_list;
diff --git a/datafusion/core/tests/catalog_listing/pruned_partition_list.rs b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs
new file mode 100644
index 0000000000000..f4782ee13c24d
--- /dev/null
+++ b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs
@@ -0,0 +1,251 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_schema::DataType;
+use futures::{FutureExt, StreamExt as _, TryStreamExt as _};
+use object_store::{ObjectStore as _, memory::InMemory, path::Path};
+
+use datafusion::execution::SessionStateBuilder;
+use datafusion_catalog_listing::helpers::{
+    describe_partition, list_partitions, pruned_partition_list,
+};
+use datafusion_common::ScalarValue;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_expr::{Expr, col, lit};
+use datafusion_session::Session;
+
+#[tokio::test]
+async fn test_pruned_partition_list_empty() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/mypartition=val1/notparquetfile", 100),
+        ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
+        ("tablepath/file.parquet", 100),
+        ("tablepath/notapartition/file.parquet", 100),
+        ("tablepath/notmypartition=val1/file.parquet", 100),
+    ]);
+    let filter = Expr::eq(col("mypartition"), lit("val1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter],
+        ".parquet",
+        &[(String::from("mypartition"), DataType::Utf8)],
+    )
+    .await
+    .expect("partition pruning failed")
+    .collect::<Vec<_>>()
+    .await;
+
+    assert_eq!(pruned.len(), 0);
+}
+
+#[tokio::test]
+async fn test_pruned_partition_list() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/mypartition=val1/file.parquet", 100),
+        ("tablepath/mypartition=val2/file.parquet", 100),
+        ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
+        ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
+        ("tablepath/notapartition/file.parquet", 100),
+        ("tablepath/notmypartition=val1/file.parquet", 100),
+    ]);
+    let filter = Expr::eq(col("mypartition"), lit("val1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter],
+        ".parquet",
+        &[(String::from("mypartition"), DataType::Utf8)],
+    )
+    .await
+    .expect("partition pruning failed")
+    .try_collect::<Vec<_>>()
+    .await
+    .unwrap();
+
+    assert_eq!(pruned.len(), 2);
+    let f1 = &pruned[0];
+    assert_eq!(
+        f1.object_meta.location.as_ref(),
+        "tablepath/mypartition=val1/file.parquet"
+    );
+    assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]);
+    let f2 = &pruned[1];
+    assert_eq!(
+        f2.object_meta.location.as_ref(),
+        "tablepath/mypartition=val1/other=val3/file.parquet"
+    );
+    assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]);
+}
+
+#[tokio::test]
+async fn test_pruned_partition_list_multi() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/part1=p1v1/file.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100),
+    ]);
+    let filter1 = Expr::eq(col("part1"), lit("p1v2"));
+    let filter2 = Expr::eq(col("part2"), lit("p2v1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter1, filter2],
+        ".parquet",
+        &[
+            (String::from("part1"), DataType::Utf8),
+            (String::from("part2"), DataType::Utf8),
+        ],
+    )
+    .await
+    .expect("partition pruning failed")
+    .try_collect::<Vec<_>>()
+    .await
+    .unwrap();
+
+    assert_eq!(pruned.len(), 2);
+    let f1 = &pruned[0];
+    assert_eq!(
+        f1.object_meta.location.as_ref(),
+        "tablepath/part1=p1v2/part2=p2v1/file1.parquet"
+    );
+    assert_eq!(
+        &f1.partition_values,
+        &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),]
+    );
+    let f2 = &pruned[1];
+    assert_eq!(
+        f2.object_meta.location.as_ref(),
+        "tablepath/part1=p1v2/part2=p2v1/file2.parquet"
+    );
+    assert_eq!(
+        &f2.partition_values,
+        &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")]
+    );
+}
+
+#[tokio::test]
+async fn test_list_partition() {
+    let (store, _) = make_test_store_and_state(&[
+        ("tablepath/part1=p1v1/file.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0),
+    ]);
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        0,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec![]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+        ]
+    );
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        1,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]),
+            ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+            ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]),
+        ]
+    );
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        2,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+            (
+                "tablepath/part1=p1v2/part2=p2v1",
+                2,
+                vec!["file1.parquet", "file2.parquet"]
+            ),
+            ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]),
+            ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]),
+        ]
+    );
+}
+
+pub fn make_test_store_and_state(
+    files: &[(&str, u64)],
+) -> (Arc<InMemory>, Arc<dyn Session>) {
+    let memory = InMemory::new();
+
+    for (name, size) in files {
+        memory
+            .put(&Path::from(*name), vec![0; *size as usize].into())
+            .now_or_never()
+            .unwrap()
+            .unwrap();
+    }
+
+    let state = SessionStateBuilder::new().build();
+    (Arc::new(memory), Arc::new(state))
+}
diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs
index 976597c8a9ac5..6375d4e25d8eb 100644
--- a/datafusion/core/tests/config_from_env.rs
+++ b/datafusion/core/tests/config_from_env.rs
@@ -20,35 +20,43 @@ use std::env;
 
 #[test]
 fn from_env() {
-    // Note: these must be a single test to avoid interference from concurrent execution
-    let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS";
-    // valid testing in different cases
-    for bool_option in ["true", "TRUE", "True", "tRUe"] {
-        env::set_var(env_key, bool_option);
-        let config = ConfigOptions::from_env().unwrap();
-        env::remove_var(env_key);
-        assert!(config.optimizer.filter_null_join_keys);
-    }
+    unsafe {
+        // Note: these must be a single test to avoid interference from concurrent execution
+        let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS";
+        // valid testing in different cases
+        for bool_option in ["true", "TRUE", "True", "tRUe"] {
+            env::set_var(env_key, bool_option);
+            let config = ConfigOptions::from_env().unwrap();
+            env::remove_var(env_key);
+            assert!(config.optimizer.filter_null_join_keys);
+        }
 
-    // invalid testing
-    env::set_var(env_key, "ttruee");
-    let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
-    assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`");
-    env::remove_var(env_key);
+        // invalid testing
+        env::set_var(env_key, "ttruee");
+        let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
+        assert_eq!(
+            err,
+            "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"
+        );
+        env::remove_var(env_key);
 
-    let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE";
+        let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE";
 
-    // for valid testing
-    env::set_var(env_key, "4096");
-    let config = ConfigOptions::from_env().unwrap();
-    assert_eq!(config.execution.batch_size, 4096);
+        // for valid testing
+        env::set_var(env_key, "4096");
+        let config = ConfigOptions::from_env().unwrap();
+        assert_eq!(config.execution.batch_size, 4096);
 
-    // for invalid testing
-    env::set_var(env_key, "abc");
-    let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
-    assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string");
+        // for invalid testing
+        env::set_var(env_key, "abc");
+        let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
+        assert_eq!(
+            err,
+            "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"
+        );
 
-    env::remove_var(env_key);
-    let config = ConfigOptions::from_env().unwrap();
-    assert_eq!(config.execution.batch_size, 8192); // set to its default value
+        env::remove_var(env_key);
+        let config = ConfigOptions::from_env().unwrap();
+        assert_eq!(config.execution.batch_size, 8192); // set to its default value
+    }
 }
diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index edcf039e4e704..bdbe72245323d 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -48,15 +48,15 @@ mod optimizer;
 /// Run all tests that are found in the `physical_optimizer` directory
 mod physical_optimizer;
 
-/// Run all tests that are found in the `schema_adapter` directory
-mod schema_adapter;
-
 /// Run all tests that are found in the `serde` directory
 mod serde;
 
 /// Run all tests that are found in the `catalog` directory
 mod catalog;
 
+/// Run all tests that are found in the `catalog_listing` directory
+mod catalog_listing;
+
 /// Run all tests that are found in the `tracing` directory
 mod tracing;
 
diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs
index cbdc4a448ea41..7b6a3c5fbed75 100644
--- a/datafusion/core/tests/custom_sources_cases/mod.rs
+++ b/datafusion/core/tests/custom_sources_cases/mod.rs
@@ -28,11 +28,11 @@ use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::Result;
 use datafusion::execution::context::{SessionContext, TaskContext};
 use datafusion::logical_expr::{
-    col, Expr, LogicalPlan, LogicalPlanBuilder, TableScan, UNNAMED_TABLE,
+    Expr, LogicalPlan, LogicalPlanBuilder, TableScan, UNNAMED_TABLE, col,
 };
 use datafusion::physical_plan::{
-    collect, ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
-    RecordBatchStream, SendableRecordBatchStream, Statistics,
+    ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
+    RecordBatchStream, SendableRecordBatchStream, Statistics, collect,
 };
 use datafusion::scalar::ScalarValue;
 use datafusion_catalog::Session;
@@ -40,9 +40,9 @@ use datafusion_common::cast::as_primitive_array;
 use datafusion_common::project_schema;
 use datafusion_common::stats::Precision;
 use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_plan::PlanProperties;
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion_physical_plan::PlanProperties;
 
 use async_trait::async_trait;
 use futures::stream::Stream;
@@ -316,6 +316,7 @@ async fn optimizers_catch_all_statistics() {
     assert_eq!(format!("{:?}", actual[0]), format!("{expected:?}"));
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn contains_place_holder_exec(plan: Arc<dyn ExecutionPlan>) -> bool {
     if plan.as_any().is::<PlaceholderRowExec>() {
         true
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index c80c0b4bf54ba..ca1eaa1f958ea 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -35,7 +35,7 @@ use datafusion::prelude::*;
 use datafusion::scalar::ScalarValue;
 use datafusion_catalog::Session;
 use datafusion_common::cast::as_primitive_array;
-use datafusion_common::{internal_err, not_impl_err};
+use datafusion_common::{DataFusionError, internal_err, not_impl_err};
 use datafusion_expr::expr::{BinaryExpr, Cast};
 use datafusion_functions_aggregate::expr_fn::count;
 use datafusion_physical_expr::EquivalenceProperties;
@@ -134,9 +134,19 @@ impl ExecutionPlan for CustomPlan {
         _partition: usize,
         _context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
+        let schema_captured = self.schema().clone();
         Ok(Box::pin(RecordBatchStreamAdapter::new(
             self.schema(),
-            futures::stream::iter(self.batches.clone().into_iter().map(Ok)),
+            futures::stream::iter(self.batches.clone().into_iter().map(move |batch| {
+                let projection: Vec<usize> = schema_captured
+                    .fields()
+                    .iter()
+                    .filter_map(|field| batch.schema().index_of(field.name()).ok())
+                    .collect();
+                batch
+                    .project(&projection)
+                    .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))
+            })),
         )))
     }
 
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 403c04f1737e1..820c2a470b376 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -214,6 +214,7 @@ fn fully_defined() -> (Statistics, Schema) {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(-24))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(10))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(13),
@@ -221,6 +222,7 @@ fn fully_defined() -> (Statistics, Schema) {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-6783))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(10))),
                     null_count: Precision::Exact(5),
+                    byte_size: Precision::Absent,
                 },
             ],
         },
diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow
new file mode 100644
index 0000000000000..bad9e3de4a57f
Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow differ
diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow
new file mode 100644
index 0000000000000..4a07fbfa47f32
Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow differ
diff --git a/datafusion/core/tests/data/recursive_cte/closure.csv b/datafusion/core/tests/data/recursive_cte/closure.csv
new file mode 100644
index 0000000000000..a31e2bfbf36b6
--- /dev/null
+++ b/datafusion/core/tests/data/recursive_cte/closure.csv
@@ -0,0 +1,6 @@
+start,end
+1,2
+2,3
+2,4
+2,4
+4,1
\ No newline at end of file
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 265862ff9af8a..014f356cd64cd 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{types::Int32Type, ListArray};
+use arrow::array::{ListArray, types::Int32Type};
 use arrow::datatypes::SchemaRef;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::{
@@ -31,7 +31,7 @@ use datafusion::prelude::*;
 use datafusion_common::test_util::batches_to_string;
 use datafusion_common::{DFSchema, ScalarValue};
 use datafusion_expr::expr::Alias;
-use datafusion_expr::{table_scan, ExprSchemable, LogicalPlanBuilder};
+use datafusion_expr::{ExprSchemable, LogicalPlanBuilder, table_scan};
 use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont};
 use datafusion_functions_nested::map::map;
 use insta::assert_snapshot;
@@ -313,10 +313,10 @@ async fn test_fn_arrow_typeof() -> Result<()> {
     +----------------------+
     | arrow_typeof(test.l) |
     +----------------------+
-    | List(nullable Int32) |
-    | List(nullable Int32) |
-    | List(nullable Int32) |
-    | List(nullable Int32) |
+    | List(Int32)          |
+    | List(Int32)          |
+    | List(Int32)          |
+    | List(Int32)          |
     +----------------------+
     ");
 
diff --git a/datafusion/core/tests/dataframe/describe.rs b/datafusion/core/tests/dataframe/describe.rs
index 9bd69dfa72b4c..c61fe4fed1615 100644
--- a/datafusion/core/tests/dataframe/describe.rs
+++ b/datafusion/core/tests/dataframe/describe.rs
@@ -17,7 +17,7 @@
 
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use datafusion_common::test_util::batches_to_string;
-use datafusion_common::{test_util::parquet_test_data, Result};
+use datafusion_common::{Result, test_util::parquet_test_data};
 use insta::assert_snapshot;
 
 #[tokio::test]
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 05f5a204c0963..c09db371912b0 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -20,10 +20,10 @@ mod dataframe_functions;
 mod describe;
 
 use arrow::array::{
-    record_batch, Array, ArrayRef, BooleanArray, DictionaryArray, FixedSizeListArray,
-    FixedSizeListBuilder, Float32Array, Float64Array, Int32Array, Int32Builder,
-    Int8Array, LargeListArray, ListArray, ListBuilder, RecordBatch, StringArray,
-    StringBuilder, StructBuilder, UInt32Array, UInt32Builder, UnionArray,
+    Array, ArrayRef, BooleanArray, DictionaryArray, FixedSizeListArray,
+    FixedSizeListBuilder, Float32Array, Float64Array, Int8Array, Int32Array,
+    Int32Builder, LargeListArray, ListArray, ListBuilder, RecordBatch, StringArray,
+    StringBuilder, StructBuilder, UInt32Array, UInt32Builder, UnionArray, record_batch,
 };
 use arrow::buffer::ScalarBuffer;
 use arrow::datatypes::{
@@ -61,13 +61,13 @@ use datafusion::prelude::{
 };
 use datafusion::test_util::{
     parquet_test_data, populate_csv_partitions, register_aggregate_csv, test_table,
-    test_table_with_name,
+    test_table_with_cache_factory, test_table_with_name,
 };
 use datafusion_catalog::TableProvider;
 use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
 use datafusion_common::{
-    assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema,
-    DataFusionError, ScalarValue, TableReference, UnnestOptions,
+    Constraint, Constraints, DFSchema, DataFusionError, ScalarValue, SchemaError,
+    TableReference, UnnestOptions, assert_contains, internal_datafusion_err,
 };
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_datasource::file_format::format_as_file_type;
@@ -76,21 +76,21 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
-    cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
-    scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan,
-    LogicalPlanBuilder, ScalarFunctionImplementation, SortExpr, TableType, WindowFrame,
-    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
+    ScalarFunctionImplementation, SortExpr, TableType, WindowFrame, WindowFrameBound,
+    WindowFrameUnits, WindowFunctionDefinition, cast, col, create_udf, exists,
+    in_subquery, lit, out_ref_col, placeholder, scalar_subquery, when, wildcard,
 };
+use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr::aggregate::AggregateExprBuilder;
 use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion_physical_plan::empty::EmptyExec;
-use datafusion_physical_plan::{displayable, ExecutionPlan, ExecutionPlanProperties};
+use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties, displayable};
 
 use datafusion::error::Result as DataFusionResult;
 use datafusion_functions_window::expr_fn::lag;
@@ -305,6 +305,27 @@ async fn select_columns() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn select_columns_with_nonexistent_columns() -> Result<()> {
+    let t = test_table().await?;
+    let t2 = t.select_columns(&["canada", "c2", "rocks"]);
+
+    match t2 {
+        Err(DataFusionError::SchemaError(boxed_err, _)) => {
+            // Verify it's the first invalid column
+            match boxed_err.as_ref() {
+                SchemaError::FieldNotFound { field, .. } => {
+                    assert_eq!(field.name(), "canada");
+                }
+                _ => panic!("Expected SchemaError::FieldNotFound for 'canada'"),
+            }
+        }
+        _ => panic!("Expected SchemaError"),
+    }
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn select_expr() -> Result<()> {
     // build plan using Table API
@@ -392,14 +413,14 @@ async fn select_with_periods() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +------+
     | f.c1 |
     +------+
     | 1    |
     | 10   |
     +------+
-    "###
+    "
     );
 
     Ok(())
@@ -547,14 +568,14 @@ async fn drop_with_quotes() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r#"
     +------+
     | f"c2 |
     +------+
     | 11   |
     | 2    |
     +------+
-    "###
+    "#
     );
 
     Ok(())
@@ -579,14 +600,14 @@ async fn drop_with_periods() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +------+
     | f.c2 |
     +------+
     | 11   |
     | 2    |
     +------+
-    "###
+    "
     );
 
     Ok(())
@@ -723,23 +744,23 @@ async fn test_aggregate_with_pk() -> Result<()> {
 
     assert_snapshot!(
         physical_plan_to_string(&df).await,
-        @r###"
+        @r"
     AggregateExec: mode=Single, gby=[id@0 as id, name@1 as name], aggr=[]
       DataSourceExec: partitions=1, partition_sizes=[1]
-    "###
+    "
     );
 
     let df_results = df.collect().await?;
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+------+
     | id | name |
     +----+------+
     | 1  | a    |
     +----+------+
-    "###
+    "
     );
 
     Ok(())
@@ -766,9 +787,8 @@ async fn test_aggregate_with_pk2() -> Result<()> {
         physical_plan_to_string(&df).await,
         @r"
     AggregateExec: mode=Single, gby=[id@0 as id, name@1 as name], aggr=[], ordering_mode=Sorted
-      CoalesceBatchesExec: target_batch_size=8192
-        FilterExec: id@0 = 1 AND name@1 = a
-          DataSourceExec: partitions=1, partition_sizes=[1]
+      FilterExec: id@0 = 1 AND name@1 = a
+        DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
 
@@ -778,13 +798,13 @@ async fn test_aggregate_with_pk2() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+------+
     | id | name |
     +----+------+
     | 1  | a    |
     +----+------+
-    "###
+    "
     );
 
     Ok(())
@@ -815,9 +835,8 @@ async fn test_aggregate_with_pk3() -> Result<()> {
         physical_plan_to_string(&df).await,
         @r"
     AggregateExec: mode=Single, gby=[id@0 as id, name@1 as name], aggr=[], ordering_mode=PartiallySorted([0])
-      CoalesceBatchesExec: target_batch_size=8192
-        FilterExec: id@0 = 1
-          DataSourceExec: partitions=1, partition_sizes=[1]
+      FilterExec: id@0 = 1
+        DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
 
@@ -827,13 +846,13 @@ async fn test_aggregate_with_pk3() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+------+
     | id | name |
     +----+------+
     | 1  | a    |
     +----+------+
-    "###
+    "
     );
 
     Ok(())
@@ -866,9 +885,8 @@ async fn test_aggregate_with_pk4() -> Result<()> {
         physical_plan_to_string(&df).await,
         @r"
     AggregateExec: mode=Single, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
-      CoalesceBatchesExec: target_batch_size=8192
-        FilterExec: id@0 = 1
-          DataSourceExec: partitions=1, partition_sizes=[1]
+      FilterExec: id@0 = 1
+        DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
 
@@ -876,13 +894,13 @@ async fn test_aggregate_with_pk4() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+
     | id |
     +----+
     | 1  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -904,7 +922,7 @@ async fn test_aggregate_alias() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+
     | c2 |
     +----+
@@ -914,7 +932,7 @@ async fn test_aggregate_alias() -> Result<()> {
     | 5  |
     | 6  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -951,7 +969,7 @@ async fn test_aggregate_with_union() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+------------+
     | c1 | sum_result |
     +----+------------+
@@ -961,7 +979,7 @@ async fn test_aggregate_with_union() -> Result<()> {
     | d  | 126        |
     | e  | 121        |
     +----+------------+
-    "###
+    "
     );
     Ok(())
 }
@@ -987,7 +1005,7 @@ async fn test_aggregate_subexpr() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----------------+------+
     | c2 + Int32(10) | sum  |
     +----------------+------+
@@ -997,7 +1015,7 @@ async fn test_aggregate_subexpr() -> Result<()> {
     | 15             | 95   |
     | 16             | -146 |
     +----------------+------+
-    "###
+    "
     );
 
     Ok(())
@@ -1020,7 +1038,7 @@ async fn test_aggregate_name_collision() -> Result<()> {
         // The select expr has the same display_name as the group_expr,
         // but since they are different expressions, it should fail.
         .expect_err("Expected error");
-    assert_snapshot!(df.strip_backtrace(), @r###"Schema error: No field named aggregate_test_100.c2. Valid fields are "aggregate_test_100.c2 + aggregate_test_100.c3"."###);
+    assert_snapshot!(df.strip_backtrace(), @r#"Schema error: No field named aggregate_test_100.c2. Valid fields are "aggregate_test_100.c2 + aggregate_test_100.c3"."#);
 
     Ok(())
 }
@@ -1079,33 +1097,33 @@ async fn window_using_aggregates() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df),
-        @r###"
+        @r"
     +-------------+----------+-----------------+---------------+--------+-----+------+----+------+
     | first_value | last_val | approx_distinct | approx_median | median | max | min  | c2 | c3   |
     +-------------+----------+-----------------+---------------+--------+-----+------+----+------+
     |             |          |                 |               |        |     |      | 1  | -85  |
-    | -85         | -101     | 14              | -12           | -101   | 83  | -101 | 4  | -54  |
-    | -85         | -101     | 17              | -25           | -101   | 83  | -101 | 5  | -31  |
-    | -85         | -12      | 10              | -32           | -12    | 83  | -85  | 3  | 13   |
-    | -85         | -25      | 3               | -56           | -25    | -25 | -85  | 1  | -5   |
-    | -85         | -31      | 18              | -29           | -31    | 83  | -101 | 5  | 36   |
-    | -85         | -38      | 16              | -25           | -38    | 83  | -101 | 4  | 65   |
+    | -85         | -101     | 14              | -12           | -12    | 83  | -101 | 4  | -54  |
+    | -85         | -101     | 17              | -25           | -25    | 83  | -101 | 5  | -31  |
+    | -85         | -12      | 10              | -32           | -34    | 83  | -85  | 3  | 13   |
+    | -85         | -25      | 3               | -56           | -56    | -25 | -85  | 1  | -5   |
+    | -85         | -31      | 18              | -29           | -28    | 83  | -101 | 5  | 36   |
+    | -85         | -38      | 16              | -25           | -25    | 83  | -101 | 4  | 65   |
     | -85         | -43      | 7               | -43           | -43    | 83  | -85  | 2  | 45   |
-    | -85         | -48      | 6               | -35           | -48    | 83  | -85  | 2  | -43  |
-    | -85         | -5       | 4               | -37           | -5     | -5  | -85  | 1  | 83   |
-    | -85         | -54      | 15              | -17           | -54    | 83  | -101 | 4  | -38  |
-    | -85         | -56      | 2               | -70           | -56    | -56 | -85  | 1  | -25  |
-    | -85         | -72      | 9               | -43           | -72    | 83  | -85  | 3  | -12  |
+    | -85         | -48      | 6               | -35           | -36    | 83  | -85  | 2  | -43  |
+    | -85         | -5       | 4               | -37           | -40    | -5  | -85  | 1  | 83   |
+    | -85         | -54      | 15              | -17           | -18    | 83  | -101 | 4  | -38  |
+    | -85         | -56      | 2               | -70           | -70    | -56 | -85  | 1  | -25  |
+    | -85         | -72      | 9               | -43           | -43    | 83  | -85  | 3  | -12  |
     | -85         | -85      | 1               | -85           | -85    | -85 | -85  | 1  | -56  |
-    | -85         | 13       | 11              | -17           | 13     | 83  | -85  | 3  | 14   |
-    | -85         | 13       | 11              | -25           | 13     | 83  | -85  | 3  | 13   |
-    | -85         | 14       | 12              | -12           | 14     | 83  | -85  | 3  | 17   |
-    | -85         | 17       | 13              | -11           | 17     | 83  | -85  | 4  | -101 |
-    | -85         | 45       | 8               | -34           | 45     | 83  | -85  | 3  | -72  |
-    | -85         | 65       | 17              | -17           | 65     | 83  | -101 | 5  | -101 |
-    | -85         | 83       | 5               | -25           | 83     | 83  | -85  | 2  | -48  |
+    | -85         | 13       | 11              | -17           | -18    | 83  | -85  | 3  | 14   |
+    | -85         | 13       | 11              | -25           | -25    | 83  | -85  | 3  | 13   |
+    | -85         | 14       | 12              | -12           | -12    | 83  | -85  | 3  | 17   |
+    | -85         | 17       | 13              | -11           | -8     | 83  | -85  | 4  | -101 |
+    | -85         | 45       | 8               | -34           | -34    | 83  | -85  | 3  | -72  |
+    | -85         | 65       | 17              | -17           | -18    | 83  | -101 | 5  | -101 |
+    | -85         | 83       | 5               | -25           | -25    | 83  | -85  | 2  | -48  |
     +-------------+----------+-----------------+---------------+--------+-----+------+----+------+
-    "###
+    "
     );
 
     Ok(())
@@ -1172,7 +1190,7 @@ async fn window_aggregates_with_filter() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +---------+---------+---------+---------+---------+----+-----+
     | sum_pos | avg_pos | min_pos | max_pos | cnt_pos | ts | val |
     +---------+---------+---------+---------+---------+----+-----+
@@ -1182,7 +1200,7 @@ async fn window_aggregates_with_filter() -> Result<()> {
     | 5       | 2.5     | 1       | 4       | 2       | 4  | 4   |
     | 5       | 2.5     | 1       | 4       | 2       | 5  | -1  |
     +---------+---------+---------+---------+---------+----+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -1238,7 +1256,7 @@ async fn test_distinct_sort_by() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+
     | c1 |
     +----+
@@ -1248,7 +1266,7 @@ async fn test_distinct_sort_by() -> Result<()> {
     | d  |
     | e  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -1286,7 +1304,7 @@ async fn test_distinct_on() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+
     | c1 |
     +----+
@@ -1296,7 +1314,7 @@ async fn test_distinct_on() -> Result<()> {
     | d  |
     | e  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -1321,7 +1339,7 @@ async fn test_distinct_on_sort_by() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+
     | c1 |
     +----+
@@ -1331,7 +1349,7 @@ async fn test_distinct_on_sort_by() -> Result<()> {
     | d  |
     | e  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -1395,13 +1413,13 @@ async fn join_coercion_unnamed() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----+------+
     | id | name |
     +----+------+
     | 10 | d    |
     +----+------+
-    "###
+    "
     );
     Ok(())
 }
@@ -1420,13 +1438,13 @@ async fn join_on() -> Result<()> {
         [col("a.c1").not_eq(col("b.c1")), col("a.c2").eq(col("b.c2"))],
     )?;
 
-    assert_snapshot!(join.logical_plan(), @r###"
+    assert_snapshot!(join.logical_plan(), @r"
     Inner Join:  Filter: a.c1 != b.c1 AND a.c2 = b.c2
       Projection: a.c1, a.c2
         TableScan: a
       Projection: b.c1, b.c2
         TableScan: b
-    "###);
+    ");
 
     Ok(())
 }
@@ -1449,7 +1467,11 @@ async fn join_on_filter_datatype() -> Result<()> {
     let err = join.into_optimized_plan().unwrap_err();
     assert_snapshot!(
         err.strip_backtrace(),
-        @"type_coercion\ncaused by\nError during planning: Join condition must be boolean type, but got Utf8"
+        @r"
+    type_coercion
+    caused by
+    Error during planning: Join condition must be boolean type, but got Utf8
+    "
     );
     Ok(())
 }
@@ -1627,7 +1649,9 @@ async fn register_table() -> Result<()> {
     let df_impl = DataFrame::new(ctx.state(), df.logical_plan().clone());
 
     // register a dataframe as a table
-    ctx.register_table("test_table", df_impl.clone().into_view())?;
+    let table_provider = df_impl.clone().into_view();
+    assert_eq!(table_provider.table_type(), TableType::View);
+    ctx.register_table("test_table", table_provider)?;
 
     // pull the table out
     let table = ctx.table("test_table").await?;
@@ -1644,7 +1668,7 @@ async fn register_table() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+-----------------------------+
     | c1 | sum(aggregate_test_100.c12) |
     +----+-----------------------------+
@@ -1654,13 +1678,13 @@ async fn register_table() -> Result<()> {
     | d  | 8.793968289758968           |
     | e  | 10.206140546981722          |
     +----+-----------------------------+
-    "###
+    "
     );
 
     // the results are the same as the results from the view, modulo the leaf table name
     assert_snapshot!(
         batches_to_sort_string(table_results),
-        @r###"
+        @r"
     +----+---------------------+
     | c1 | sum(test_table.c12) |
     +----+---------------------+
@@ -1670,7 +1694,7 @@ async fn register_table() -> Result<()> {
     | d  | 8.793968289758968   |
     | e  | 10.206140546981722  |
     +----+---------------------+
-    "###
+    "
     );
     Ok(())
 }
@@ -1719,7 +1743,7 @@ async fn with_column() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+-----+-----+
     | c1 | c2 | c3  | sum |
     +----+----+-----+-----+
@@ -1730,7 +1754,7 @@ async fn with_column() -> Result<()> {
     | a  | 3  | 14  | 17  |
     | a  | 3  | 17  | 20  |
     +----+----+-----+-----+
-    "###
+    "
     );
 
     // check that col with the same name overwritten
@@ -1742,7 +1766,7 @@ async fn with_column() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results_overwrite),
-        @r###"
+        @r"
     +-----+----+-----+-----+
     | c1  | c2 | c3  | sum |
     +-----+----+-----+-----+
@@ -1753,7 +1777,7 @@ async fn with_column() -> Result<()> {
     | 17  | 3  | 14  | 17  |
     | 20  | 3  | 17  | 20  |
     +-----+----+-----+-----+
-    "###
+    "
     );
 
     // check that col with the same name overwritten using same name as reference
@@ -1765,7 +1789,7 @@ async fn with_column() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results_overwrite_self),
-        @r###"
+        @r"
     +----+----+-----+-----+
     | c1 | c2 | c3  | sum |
     +----+----+-----+-----+
@@ -1776,7 +1800,7 @@ async fn with_column() -> Result<()> {
     | a  | 4  | 14  | 17  |
     | a  | 4  | 17  | 20  |
     +----+----+-----+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -1804,14 +1828,14 @@ async fn test_window_function_with_column() -> Result<()> {
     let df_results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+-----+-----+---+
     | c1 | c2 | c3  | s   | r |
     +----+----+-----+-----+---+
     | c  | 2  | 1   | 3   | 1 |
     | d  | 5  | -40 | -35 | 2 |
     +----+----+-----+-----+---+
-    "###
+    "
     );
 
     Ok(())
@@ -1846,13 +1870,13 @@ async fn with_column_join_same_columns() -> Result<()> {
     let df_results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+
     | c1 | c1 |
     +----+----+
     | a  | a  |
     +----+----+
-    "###
+    "
     );
 
     let df_with_column = df.clone().with_column("new_column", lit(true))?;
@@ -1875,7 +1899,7 @@ async fn with_column_join_same_columns() -> Result<()> {
 
     assert_snapshot!(
         df_with_column.clone().into_optimized_plan().unwrap(),
-        @r###"
+        @r"
     Projection: t1.c1, t2.c1, Boolean(true) AS new_column
       Sort: t1.c1 ASC NULLS FIRST, fetch=1
         Inner Join: t1.c1 = t2.c1
@@ -1883,20 +1907,20 @@ async fn with_column_join_same_columns() -> Result<()> {
             TableScan: aggregate_test_100 projection=[c1]
           SubqueryAlias: t2
             TableScan: aggregate_test_100 projection=[c1]
-    "###
+    "
     );
 
     let df_results = df_with_column.collect().await?;
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+------------+
     | c1 | c1 | new_column |
     +----+----+------------+
     | a  | a  | true       |
     +----+----+------------+
-    "###
+    "
     );
 
     Ok(())
@@ -1946,13 +1970,13 @@ async fn with_column_renamed() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(batches),
-        @r###"
+        @r"
     +-----+-----+-----+-------+
     | one | two | c3  | total |
     +-----+-----+-----+-------+
     | a   | 3   | -72 | -69   |
     +-----+-----+-----+-------+
-    "###
+    "
     );
 
     Ok(())
@@ -2017,13 +2041,13 @@ async fn with_column_renamed_join() -> Result<()> {
     let df_results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+-----+----+----+-----+
     | c1 | c2 | c3  | c1 | c2 | c3  |
     +----+----+-----+----+----+-----+
     | a  | 1  | -85 | a  | 1  | -85 |
     +----+----+-----+----+----+-----+
-    "###
+    "
     );
 
     let df_renamed = df.clone().with_column_renamed("t1.c1", "AAA")?;
@@ -2046,7 +2070,7 @@ async fn with_column_renamed_join() -> Result<()> {
 
     assert_snapshot!(
         df_renamed.clone().into_optimized_plan().unwrap(),
-        @r###"
+        @r"
     Projection: t1.c1 AS AAA, t1.c2, t1.c3, t2.c1, t2.c2, t2.c3
       Sort: t1.c1 ASC NULLS FIRST, t1.c2 ASC NULLS FIRST, t1.c3 ASC NULLS FIRST, t2.c1 ASC NULLS FIRST, t2.c2 ASC NULLS FIRST, t2.c3 ASC NULLS FIRST, fetch=1
         Inner Join: t1.c1 = t2.c1
@@ -2054,20 +2078,20 @@ async fn with_column_renamed_join() -> Result<()> {
             TableScan: aggregate_test_100 projection=[c1, c2, c3]
           SubqueryAlias: t2
             TableScan: aggregate_test_100 projection=[c1, c2, c3]
-    "###
+    "
     );
 
     let df_results = df_renamed.collect().await?;
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +-----+----+-----+----+----+-----+
     | AAA | c2 | c3  | c1 | c2 | c3  |
     +-----+----+-----+----+----+-----+
     | a   | 1  | -85 | a  | 1  | -85 |
     +-----+----+-----+----+----+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -2102,13 +2126,13 @@ async fn with_column_renamed_case_sensitive() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(res),
-        @r###"
+        @r"
     +---------+
     | CoLuMn1 |
     +---------+
     | a       |
     +---------+
-    "###
+    "
     );
 
     let df_renamed = df_renamed
@@ -2118,13 +2142,13 @@ async fn with_column_renamed_case_sensitive() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_renamed),
-        @r###"
+        @r"
     +----+
     | c1 |
     +----+
     | a  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -2162,19 +2186,19 @@ async fn describe_lookup_via_quoted_identifier() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&describe_result.clone().collect().await?),
-        @r###"
-        +------------+--------------+
-        | describe   | CoLu.Mn["1"] |
-        +------------+--------------+
-        | count      | 1            |
-        | max        | a            |
-        | mean       | null         |
-        | median     | null         |
-        | min        | a            |
-        | null_count | 0            |
-        | std        | null         |
-        +------------+--------------+
-    "###
+        @r#"
+    +------------+--------------+
+    | describe   | CoLu.Mn["1"] |
+    +------------+--------------+
+    | count      | 1            |
+    | max        | a            |
+    | mean       | null         |
+    | median     | null         |
+    | min        | a            |
+    | null_count | 0            |
+    | std        | null         |
+    +------------+--------------+
+    "#
     );
 
     Ok(())
@@ -2192,13 +2216,13 @@ async fn cast_expr_test() -> Result<()> {
     df.clone().show().await?;
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +----+----+-----+
     | c2 | c3 | sum |
     +----+----+-----+
     | 2  | 1  | 3   |
     +----+----+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -2214,12 +2238,14 @@ async fn row_writer_resize_test() -> Result<()> {
 
     let data = RecordBatch::try_new(
         schema,
-        vec![
-            Arc::new(StringArray::from(vec![
-                Some("2a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"),
-                Some("3a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800"),
-            ]))
-        ],
+        vec![Arc::new(StringArray::from(vec![
+            Some(
+                "2a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+            ),
+            Some(
+                "3a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800",
+            ),
+        ]))],
     )?;
 
     let ctx = SessionContext::new();
@@ -2258,14 +2284,14 @@ async fn with_column_name() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&df_results),
-        @r###"
+        @r"
     +------+-------+
     | f.c1 | f.c2  |
     +------+-------+
     | 1    | hello |
     | 10   | hello |
     +------+-------+
-    "###
+    "
     );
 
     Ok(())
@@ -2301,13 +2327,13 @@ async fn cache_test() -> Result<()> {
     let cached_df_results = cached_df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&cached_df_results),
-        @r###"
+        @r"
     +----+----+-----+
     | c2 | c3 | sum |
     +----+----+-----+
     | 2  | 1  | 3   |
     +----+----+-----+
-    "###
+    "
     );
 
     assert_eq!(&df_results, &cached_df_results);
@@ -2315,6 +2341,29 @@ async fn cache_test() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn cache_producer_test() -> Result<()> {
+    let df = test_table_with_cache_factory()
+        .await?
+        .select_columns(&["c2", "c3"])?
+        .limit(0, Some(1))?
+        .with_column("sum", cast(col("c2") + col("c3"), DataType::Int64))?;
+
+    let cached_df = df.clone().cache().await?;
+
+    assert_snapshot!(
+        cached_df.clone().into_optimized_plan().unwrap(),
+        @r"
+    CacheNode
+      Projection: aggregate_test_100.c2, aggregate_test_100.c3, CAST(CAST(aggregate_test_100.c2 AS Int64) + CAST(aggregate_test_100.c3 AS Int64) AS Int64) AS sum
+        Projection: aggregate_test_100.c2, aggregate_test_100.c3
+          Limit: skip=0, fetch=1
+            TableScan: aggregate_test_100, fetch=1
+    "
+    );
+    Ok(())
+}
+
 #[tokio::test]
 async fn partition_aware_union() -> Result<()> {
     let left = test_table().await?.select_columns(&["c1", "c2"])?;
@@ -2584,13 +2633,13 @@ async fn filtered_aggr_with_param_values() -> Result<()> {
     let df_results = df?.collect().await?;
     assert_snapshot!(
         batches_to_string(&df_results),
-        @r###"
+        @r"
     +------------------------------------------------+
     | count(table1.c2) FILTER (WHERE table1.c3 > $1) |
     +------------------------------------------------+
     | 54                                             |
     +------------------------------------------------+
-    "###
+    "
     );
 
     Ok(())
@@ -2638,7 +2687,7 @@ async fn write_parquet_with_order() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +---+---+
     | a | b |
     +---+---+
@@ -2648,7 +2697,7 @@ async fn write_parquet_with_order() -> Result<()> {
     | 5 | 3 |
     | 7 | 4 |
     +---+---+
-    "###
+    "
     );
 
     Ok(())
@@ -2696,7 +2745,7 @@ async fn write_csv_with_order() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +---+---+
     | a | b |
     +---+---+
@@ -2706,7 +2755,7 @@ async fn write_csv_with_order() -> Result<()> {
     | 5 | 3 |
     | 7 | 4 |
     +---+---+
-    "###
+    "
     );
     Ok(())
 }
@@ -2753,7 +2802,7 @@ async fn write_json_with_order() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +---+---+
     | a | b |
     +---+---+
@@ -2763,7 +2812,7 @@ async fn write_json_with_order() -> Result<()> {
     | 5 | 3 |
     | 7 | 4 |
     +---+---+
-    "###
+    "
     );
     Ok(())
 }
@@ -2812,7 +2861,7 @@ async fn write_table_with_order() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----------+
     | tablecol1 |
     +-----------+
@@ -2822,7 +2871,7 @@ async fn write_table_with_order() -> Result<()> {
     | x         |
     | z         |
     +-----------+
-    "###
+    "
     );
     Ok(())
 }
@@ -2849,7 +2898,7 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
-        @r###"
+        @r"
     +---------------+------------------------------------------------------------------------------------------------------------+
     | plan_type     | plan                                                                                                       |
     +---------------+------------------------------------------------------------------------------------------------------------+
@@ -2863,36 +2912,32 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
     |               |     SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true]                               |
     |               |       ProjectionExec: expr=[b@0 as b, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] |
     |               |         AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(Int64(1))]                       |
-    |               |           CoalesceBatchesExec: target_batch_size=8192                                                      |
-    |               |             RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4                               |
-    |               |               RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                         |
-    |               |                 AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))]                        |
-    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                        |
+    |               |           RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1                                 |
+    |               |             AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))]                            |
+    |               |               DataSourceExec: partitions=1, partition_sizes=[1]                                            |
     |               |                                                                                                            |
     +---------------+------------------------------------------------------------------------------------------------------------+
-    "###
+    "
     );
 
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
-        @r###"
-    +---------------+--------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                           |
-    +---------------+--------------------------------------------------------------------------------+
-    | logical_plan  | Sort: count(*) ASC NULLS LAST                                                  |
-    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]            |
-    |               |     TableScan: t1 projection=[b]                                               |
-    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                           |
-    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true]     |
-    |               |     AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)]      |
-    |               |       CoalesceBatchesExec: target_batch_size=8192                              |
-    |               |         RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4       |
-    |               |           RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
-    |               |             AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]       |
-    |               |               DataSourceExec: partitions=1, partition_sizes=[1]                |
-    |               |                                                                                |
-    +---------------+--------------------------------------------------------------------------------+
-    "###
+        @r"
+    +---------------+----------------------------------------------------------------------------+
+    | plan_type     | plan                                                                       |
+    +---------------+----------------------------------------------------------------------------+
+    | logical_plan  | Sort: count(*) ASC NULLS LAST                                              |
+    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]        |
+    |               |     TableScan: t1 projection=[b]                                           |
+    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                       |
+    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true] |
+    |               |     AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)]  |
+    |               |       RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1     |
+    |               |         AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]       |
+    |               |           DataSourceExec: partitions=1, partition_sizes=[1]                |
+    |               |                                                                            |
+    +---------------+----------------------------------------------------------------------------+
+    "
     );
     Ok(())
 }
@@ -2910,23 +2955,22 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r"
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                   |
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | LeftSemi Join: CAST(t1.a AS Int64) = __correlated_sq_1.count(*)                                                        |
-    |               |   TableScan: t1 projection=[a, b]                                                                                      |
-    |               |   SubqueryAlias: __correlated_sq_1                                                                                     |
-    |               |     Projection: count(Int64(1)) AS count(*)                                                                            |
-    |               |       Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]                                                                |
-    |               |         TableScan: t2 projection=[]                                                                                    |
-    | physical_plan | CoalesceBatchesExec: target_batch_size=8192                                                                            |
-    |               |   HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(count(*)@0, CAST(t1.a AS Int64)@2)], projection=[a@0, b@1] |
-    |               |     ProjectionExec: expr=[4 as count(*)]                                                                               |
-    |               |       PlaceholderRowExec                                                                                               |
-    |               |     ProjectionExec: expr=[a@0 as a, b@1 as b, CAST(a@0 AS Int64) as CAST(t1.a AS Int64)]                               |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                |
-    |               |                                                                                                                        |
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                 |
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | LeftSemi Join: CAST(t1.a AS Int64) = __correlated_sq_1.count(*)                                                      |
+    |               |   TableScan: t1 projection=[a, b]                                                                                    |
+    |               |   SubqueryAlias: __correlated_sq_1                                                                                   |
+    |               |     Projection: count(Int64(1)) AS count(*)                                                                          |
+    |               |       Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]                                                              |
+    |               |         TableScan: t2 projection=[]                                                                                  |
+    | physical_plan | HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(count(*)@0, CAST(t1.a AS Int64)@2)], projection=[a@0, b@1] |
+    |               |   ProjectionExec: expr=[4 as count(*)]                                                                               |
+    |               |     PlaceholderRowExec                                                                                               |
+    |               |   ProjectionExec: expr=[a@0 as a, b@1 as b, CAST(a@0 AS Int64) as CAST(t1.a AS Int64)]                               |
+    |               |     DataSourceExec: partitions=1, partition_sizes=[1]                                                                |
+    |               |                                                                                                                      |
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
     "
     );
 
@@ -2956,22 +3000,21 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r"
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                   |
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | LeftSemi Join: CAST(t1.a AS Int64) = __correlated_sq_1.count(*)                                                        |
-    |               |   TableScan: t1 projection=[a, b]                                                                                      |
-    |               |   SubqueryAlias: __correlated_sq_1                                                                                     |
-    |               |     Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]                                                      |
-    |               |       TableScan: t2 projection=[]                                                                                      |
-    | physical_plan | CoalesceBatchesExec: target_batch_size=8192                                                                            |
-    |               |   HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(count(*)@0, CAST(t1.a AS Int64)@2)], projection=[a@0, b@1] |
-    |               |     ProjectionExec: expr=[4 as count(*)]                                                                               |
-    |               |       PlaceholderRowExec                                                                                               |
-    |               |     ProjectionExec: expr=[a@0 as a, b@1 as b, CAST(a@0 AS Int64) as CAST(t1.a AS Int64)]                               |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                |
-    |               |                                                                                                                        |
-    +---------------+------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                 |
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | LeftSemi Join: CAST(t1.a AS Int64) = __correlated_sq_1.count(*)                                                      |
+    |               |   TableScan: t1 projection=[a, b]                                                                                    |
+    |               |   SubqueryAlias: __correlated_sq_1                                                                                   |
+    |               |     Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]                                                    |
+    |               |       TableScan: t2 projection=[]                                                                                    |
+    | physical_plan | HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(count(*)@0, CAST(t1.a AS Int64)@2)], projection=[a@0, b@1] |
+    |               |   ProjectionExec: expr=[4 as count(*)]                                                                               |
+    |               |     PlaceholderRowExec                                                                                               |
+    |               |   ProjectionExec: expr=[a@0 as a, b@1 as b, CAST(a@0 AS Int64) as CAST(t1.a AS Int64)]                               |
+    |               |     DataSourceExec: partitions=1, partition_sizes=[1]                                                                |
+    |               |                                                                                                                      |
+    +---------------+----------------------------------------------------------------------------------------------------------------------+
     "
     );
 
@@ -3077,15 +3120,17 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     let df_results = ctx
         .table("t1")
         .await?
-        .select(vec![count_all_window()
-            .order_by(vec![Sort::new(col("a"), false, true)])
-            .window_frame(WindowFrame::new_bounds(
-                WindowFrameUnits::Range,
-                WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
-                WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
-            ))
-            .build()
-            .unwrap()])?
+        .select(vec![
+            count_all_window()
+                .order_by(vec![Sort::new(col("a"), false, true)])
+                .window_frame(WindowFrame::new_bounds(
+                    WindowFrameUnits::Range,
+                    WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
+                    WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
+                ))
+                .build()
+                .unwrap(),
+        ])?
         .explain(false, false)?
         .collect()
         .await?;
@@ -3113,30 +3158,29 @@ async fn test_count_wildcard_on_window() -> Result<()> {
 
 #[tokio::test]
 // Test with `repartition_sorts` disabled, causing a full resort of the data
-async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false(
-) -> Result<()> {
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false()
+-> Result<()> {
     assert_snapshot!(
         union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
-        @r#"
+        @r"
     AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
-      SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
-        CoalescePartitionsExec
-          AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
-            UnionExec
-              DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+      SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+        AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
+          UnionExec
+            DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+            SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
-    "#);
+    ");
     Ok(())
 }
 
-#[ignore] // See https://github.com/apache/datafusion/issues/18380
 #[tokio::test]
 // Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting
-async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true(
-) -> Result<()> {
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true()
+-> Result<()> {
     assert_snapshot!(
         union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
-        @r#"
+        @r"
     AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
       SortPreservingMergeExec: [id@0 ASC NULLS LAST]
         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
@@ -3144,53 +3188,7 @@ async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_reparti
             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
             SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
-    "#);
-
-    // 💥 Doesn't pass, and generates this plan:
-    //
-    // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
-    //   SortPreservingMergeExec: [id@0 ASC NULLS LAST]
-    //     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
-    //       AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
-    //         UnionExec
-    //           DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
-    //           DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
-    //
-    //
-    // === Excerpt from the verbose explain ===
-    //
-    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    // | plan_type                                                  | plan                                                                                                                                                                                                                                                                                                                                        |
-    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    // | initial_physical_plan                                      | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                  |
-    // |                                                            |   AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                              |
-    // |                                                            |     UnionExec                                                                                                                                                                                                                                                                                                                               |
-    // |                                                            |       DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                               |
-    // |                                                            |       SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                                   |
-    // |                                                            |         DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                                    |
-    // ...
-    // | physical_plan after EnforceDistribution                    | OutputRequirementExec: order_by=[], dist_by=Unspecified                                                                                                                                                                                                                                                                                     |
-    // |                                                            |   AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                |
-    // |                                                            |     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                                     |
-    // |                                                            |       CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                |
-    // |                                                            |         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                        |
-    // |                                                            |           UnionExec                                                                                                                                                                                                                                                                                                                         |
-    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                         |
-    // |                                                            |             SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                             |
-    // |                                                            |               DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                              |
-    // |                                                            |                                                                                                                                                                                                                                                                                                                                             |
-    // | physical_plan after CombinePartialFinalAggregate           | SAME TEXT AS ABOVE
-    // |                                                            |                                                                                                                                                                                                                                                                                                                                             |
-    // | physical_plan after EnforceSorting                         | OutputRequirementExec: order_by=[], dist_by=Unspecified                                                                                                                                                                                                                                                                                     |
-    // |                                                            |   AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                |
-    // |                                                            |     SortPreservingMergeExec: [id@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                          |
-    // |                                                            |       SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]                                                                                                                                                                                                                                                                    |
-    // |                                                            |         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]                                                                                                                                                                                                                                                                              |
-    // |                                                            |           UnionExec                                                                                                                                                                                                                                                                                                                         |
-    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                         |
-    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                                |
-    // ...
-    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    ");
 
     Ok(())
 }
@@ -3275,7 +3273,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
-        @r###"
+        @r"
     +---------------+-----------------------------------------------------+
     | plan_type     | plan                                                |
     +---------------+-----------------------------------------------------+
@@ -3286,7 +3284,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
     |               |   PlaceholderRowExec                                |
     |               |                                                     |
     +---------------+-----------------------------------------------------+
-    "###
+    "
     );
 
     // add `.select(vec![count_wildcard()])?` to make sure we can analyze all node instead of just top node.
@@ -3301,7 +3299,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
-        @r###"
+        @r"
     +---------------+---------------------------------------------------------------+
     | plan_type     | plan                                                          |
     +---------------+---------------------------------------------------------------+
@@ -3311,7 +3309,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
     |               |   PlaceholderRowExec                                          |
     |               |                                                               |
     +---------------+---------------------------------------------------------------+
-    "###
+    "
     );
 
     Ok(())
@@ -3331,32 +3329,31 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r"
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                      |
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: t1.a, t1.b                                                                                                    |
-    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)          |
-    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                           |
-    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                   |
-    |               |         TableScan: t1 projection=[a, b]                                                                                   |
-    |               |         SubqueryAlias: __scalar_sq_1                                                                                      |
-    |               |           Projection: count(Int64(1)) AS count(*), t2.a, Boolean(true) AS __always_true                                   |
-    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1))]]                                                         |
-    |               |               TableScan: t2 projection=[a]                                                                                |
-    | physical_plan | CoalesceBatchesExec: target_batch_size=8192                                                                               |
-    |               |   FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                     |
-    |               |     CoalesceBatchesExec: target_batch_size=8192                                                                           |
-    |               |       HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
-    |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
-    |               |         ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                             |
-    |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                    |
-    |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
-    |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |
-    |               |                   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                     |
-    |               |                     DataSourceExec: partitions=1, partition_sizes=[1]                                                     |
-    |               |                                                                                                                           |
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                       |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: t1.a, t1.b                                                                                                     |
+    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)           |
+    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                            |
+    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                    |
+    |               |         TableScan: t1 projection=[a, b]                                                                                    |
+    |               |         SubqueryAlias: __scalar_sq_1                                                                                       |
+    |               |           Projection: count(Int64(1)) AS count(*), t2.a, Boolean(true) AS __always_true                                    |
+    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1))]]                                                          |
+    |               |               TableScan: t2 projection=[a]                                                                                 |
+    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                        |
+    |               |   RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                     |
+    |               |     ProjectionExec: expr=[a@2 as a, b@3 as b, count(*)@0 as count(*), __always_true@1 as __always_true]                    |
+    |               |       HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@1, a@0)], projection=[count(*)@0, __always_true@2, a@3, b@4] |
+    |               |         CoalescePartitionsExec                                                                                             |
+    |               |           ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                            |
+    |               |             AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                   |
+    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                             |
+    |               |                 AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                        |
+    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                                        |
+    |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                  |
+    |               |                                                                                                                            |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
     "
     );
 
@@ -3388,32 +3385,31 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r"
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                      |
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: t1.a, t1.b                                                                                                    |
-    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)          |
-    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                           |
-    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                   |
-    |               |         TableScan: t1 projection=[a, b]                                                                                   |
-    |               |         SubqueryAlias: __scalar_sq_1                                                                                      |
-    |               |           Projection: count(*), t2.a, Boolean(true) AS __always_true                                                      |
-    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1)) AS count(*)]]                                             |
-    |               |               TableScan: t2 projection=[a]                                                                                |
-    | physical_plan | CoalesceBatchesExec: target_batch_size=8192                                                                               |
-    |               |   FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                     |
-    |               |     CoalesceBatchesExec: target_batch_size=8192                                                                           |
-    |               |       HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
-    |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
-    |               |         ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                    |
-    |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                           |
-    |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
-    |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |
-    |               |                   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                            |
-    |               |                     DataSourceExec: partitions=1, partition_sizes=[1]                                                     |
-    |               |                                                                                                                           |
-    +---------------+---------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                       |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: t1.a, t1.b                                                                                                     |
+    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)           |
+    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                            |
+    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                    |
+    |               |         TableScan: t1 projection=[a, b]                                                                                    |
+    |               |         SubqueryAlias: __scalar_sq_1                                                                                       |
+    |               |           Projection: count(*), t2.a, Boolean(true) AS __always_true                                                       |
+    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1)) AS count(*)]]                                              |
+    |               |               TableScan: t2 projection=[a]                                                                                 |
+    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                        |
+    |               |   RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                     |
+    |               |     ProjectionExec: expr=[a@2 as a, b@3 as b, count(*)@0 as count(*), __always_true@1 as __always_true]                    |
+    |               |       HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@1, a@0)], projection=[count(*)@0, __always_true@2, a@3, b@4] |
+    |               |         CoalescePartitionsExec                                                                                             |
+    |               |           ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                   |
+    |               |             AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                          |
+    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                             |
+    |               |                 AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                               |
+    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                                        |
+    |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                  |
+    |               |                                                                                                                            |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------+
     "
     );
 
@@ -3498,7 +3494,7 @@ async fn sort_on_unprojected_columns() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----+
     | a   |
     +-----+
@@ -3507,7 +3503,7 @@ async fn sort_on_unprojected_columns() -> Result<()> {
     | 10  |
     | 1   |
     +-----+
-    "###
+    "
     );
 
     Ok(())
@@ -3545,7 +3541,7 @@ async fn sort_on_distinct_columns() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----+
     | a   |
     +-----+
@@ -3553,7 +3549,7 @@ async fn sort_on_distinct_columns() -> Result<()> {
     | 10  |
     | 1   |
     +-----+
-    "###
+    "
     );
     Ok(())
 }
@@ -3684,14 +3680,14 @@ async fn filter_with_alias_overwrite() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +------+
     | a    |
     +------+
     | true |
     | true |
     +------+
-    "###
+    "
     );
 
     Ok(())
@@ -3720,7 +3716,7 @@ async fn select_with_alias_overwrite() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-------+
     | a     |
     +-------+
@@ -3729,7 +3725,7 @@ async fn select_with_alias_overwrite() -> Result<()> {
     | true  |
     | false |
     +-------+
-    "###
+    "
     );
 
     Ok(())
@@ -3755,7 +3751,7 @@ async fn test_grouping_sets() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----------+-----+---------------+
     | a         | b   | count(test.a) |
     +-----------+-----+---------------+
@@ -3771,7 +3767,7 @@ async fn test_grouping_sets() -> Result<()> {
     | 123AbcDef |     | 1             |
     | 123AbcDef | 100 | 1             |
     +-----------+-----+---------------+
-    "###
+    "
     );
 
     Ok(())
@@ -3798,7 +3794,7 @@ async fn test_grouping_sets_count() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +----+----+-----------------+
     | c1 | c2 | count(Int32(1)) |
     +----+----+-----------------+
@@ -3813,7 +3809,7 @@ async fn test_grouping_sets_count() -> Result<()> {
     | b  |    | 19              |
     | a  |    | 21              |
     +----+----+-----------------+
-    "###
+    "
     );
 
     Ok(())
@@ -3847,7 +3843,7 @@ async fn test_grouping_set_array_agg_with_overflow() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +----+----+--------+---------------------+
     | c1 | c2 | sum_c3 | avg_c3              |
     +----+----+--------+---------------------+
@@ -3887,7 +3883,7 @@ async fn test_grouping_set_array_agg_with_overflow() -> Result<()> {
     | a  | 2  | -46    | -15.333333333333334 |
     | a  | 1  | -88    | -17.6               |
     +----+----+--------+---------------------+
-    "###
+    "
     );
 
     Ok(())
@@ -3924,25 +3920,25 @@ async fn join_with_alias_filter() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Projection: t1.a, t2.a, t1.b, t1.c, t2.b, t2.c [a:UInt32, a:UInt32, b:Utf8, c:Int32, b:Utf8, c:Int32]
       Inner Join: t1.a + UInt32(3) = t2.a + UInt32(1) [a:UInt32, b:Utf8, c:Int32, a:UInt32, b:Utf8, c:Int32]
         TableScan: t1 projection=[a, b, c] [a:UInt32, b:Utf8, c:Int32]
         TableScan: t2 projection=[a, b, c] [a:UInt32, b:Utf8, c:Int32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----+----+---+----+---+---+
     | a  | a  | b | c  | b | c |
     +----+----+---+----+---+---+
     | 1  | 3  | a | 10 | a | 1 |
     | 11 | 13 | c | 30 | c | 3 |
     +----+----+---+----+---+---+
-    "###
+    "
     );
 
     Ok(())
@@ -3969,27 +3965,27 @@ async fn right_semi_with_alias_filter() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     RightSemi Join: t1.a = t2.a [a:UInt32, b:Utf8, c:Int32]
       Projection: t1.a [a:UInt32]
         Filter: t1.c > Int32(1) [a:UInt32, c:Int32]
           TableScan: t1 projection=[a, c] [a:UInt32, c:Int32]
       Filter: t2.c > Int32(1) [a:UInt32, b:Utf8, c:Int32]
         TableScan: t2 projection=[a, b, c] [a:UInt32, b:Utf8, c:Int32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +-----+---+---+
     | a   | b | c |
     +-----+---+---+
     | 10  | b | 2 |
     | 100 | d | 4 |
     +-----+---+---+
-    "###
+    "
     );
 
     Ok(())
@@ -4016,26 +4012,26 @@ async fn right_anti_filter_push_down() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     RightAnti Join: t1.a = t2.a Filter: t2.c > Int32(1) [a:UInt32, b:Utf8, c:Int32]
       Projection: t1.a [a:UInt32]
         Filter: t1.c > Int32(1) [a:UInt32, c:Int32]
           TableScan: t1 projection=[a, c] [a:UInt32, c:Int32]
       TableScan: t2 projection=[a, b, c] [a:UInt32, b:Utf8, c:Int32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----+---+---+
     | a  | b | c |
     +----+---+---+
     | 13 | c | 3 |
     | 3  | a | 1 |
     +----+---+---+
-    "###
+    "
     );
 
     Ok(())
@@ -4048,37 +4044,37 @@ async fn unnest_columns() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
-          +----------+---------------------------------+--------------------------+
-          | shape_id | points                          | tags                     |
-          +----------+---------------------------------+--------------------------+
-          | 1        | [{x: 5, y: -8}, {x: -3, y: -4}] | [tag1]                   |
-          | 2        | [{x: 6, y: 2}, {x: -2, y: -8}]  | [tag1]                   |
-          | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | [tag1, tag2, tag3, tag4] |
-          | 4        |                                 | [tag1, tag2, tag3]       |
-          +----------+---------------------------------+--------------------------+
-        "###);
+        @r"
+    +----------+---------------------------------+--------------------------+
+    | shape_id | points                          | tags                     |
+    +----------+---------------------------------+--------------------------+
+    | 1        | [{x: 5, y: -8}, {x: -3, y: -4}] | [tag1]                   |
+    | 2        | [{x: 6, y: 2}, {x: -2, y: -8}]  | [tag1]                   |
+    | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | [tag1, tag2, tag3, tag4] |
+    | 4        |                                 | [tag1, tag2, tag3]       |
+    +----------+---------------------------------+--------------------------+
+    ");
 
     // Unnest tags
     let df = table_with_nested_types(NUM_ROWS).await?;
     let results = df.unnest_columns(&["tags"])?.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
-          +----------+---------------------------------+------+
-          | shape_id | points                          | tags |
-          +----------+---------------------------------+------+
-          | 1        | [{x: 5, y: -8}, {x: -3, y: -4}] | tag1 |
-          | 2        | [{x: 6, y: 2}, {x: -2, y: -8}]  | tag1 |
-          | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag1 |
-          | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag2 |
-          | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag3 |
-          | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag4 |
-          | 4        |                                 | tag1 |
-          | 4        |                                 | tag2 |
-          | 4        |                                 | tag3 |
-          +----------+---------------------------------+------+
-        "###);
+        @r"
+    +----------+---------------------------------+------+
+    | shape_id | points                          | tags |
+    +----------+---------------------------------+------+
+    | 1        | [{x: 5, y: -8}, {x: -3, y: -4}] | tag1 |
+    | 2        | [{x: 6, y: 2}, {x: -2, y: -8}]  | tag1 |
+    | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag1 |
+    | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag2 |
+    | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag3 |
+    | 3        | [{x: -9, y: -7}, {x: -2, y: 5}] | tag4 |
+    | 4        |                                 | tag1 |
+    | 4        |                                 | tag2 |
+    | 4        |                                 | tag3 |
+    +----------+---------------------------------+------+
+    ");
 
     // Test aggregate results for tags.
     let df = table_with_nested_types(NUM_ROWS).await?;
@@ -4090,19 +4086,19 @@ async fn unnest_columns() -> Result<()> {
     let results = df.unnest_columns(&["points"])?.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
-          +----------+----------------+--------------------------+
-          | shape_id | points         | tags                     |
-          +----------+----------------+--------------------------+
-          | 1        | {x: -3, y: -4} | [tag1]                   |
-          | 1        | {x: 5, y: -8}  | [tag1]                   |
-          | 2        | {x: -2, y: -8} | [tag1]                   |
-          | 2        | {x: 6, y: 2}   | [tag1]                   |
-          | 3        | {x: -2, y: 5}  | [tag1, tag2, tag3, tag4] |
-          | 3        | {x: -9, y: -7} | [tag1, tag2, tag3, tag4] |
-          | 4        |                | [tag1, tag2, tag3]       |
-          +----------+----------------+--------------------------+
-        "###);
+        @r"
+    +----------+----------------+--------------------------+
+    | shape_id | points         | tags                     |
+    +----------+----------------+--------------------------+
+    | 1        | {x: -3, y: -4} | [tag1]                   |
+    | 1        | {x: 5, y: -8}  | [tag1]                   |
+    | 2        | {x: -2, y: -8} | [tag1]                   |
+    | 2        | {x: 6, y: 2}   | [tag1]                   |
+    | 3        | {x: -2, y: 5}  | [tag1, tag2, tag3, tag4] |
+    | 3        | {x: -9, y: -7} | [tag1, tag2, tag3, tag4] |
+    | 4        |                | [tag1, tag2, tag3]       |
+    +----------+----------------+--------------------------+
+    ");
 
     // Test aggregate results for points.
     let df = table_with_nested_types(NUM_ROWS).await?;
@@ -4118,27 +4114,27 @@ async fn unnest_columns() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
-          +----------+----------------+------+
-          | shape_id | points         | tags |
-          +----------+----------------+------+
-          | 1        | {x: -3, y: -4} | tag1 |
-          | 1        | {x: 5, y: -8}  | tag1 |
-          | 2        | {x: -2, y: -8} | tag1 |
-          | 2        | {x: 6, y: 2}   | tag1 |
-          | 3        | {x: -2, y: 5}  | tag1 |
-          | 3        | {x: -2, y: 5}  | tag2 |
-          | 3        | {x: -2, y: 5}  | tag3 |
-          | 3        | {x: -2, y: 5}  | tag4 |
-          | 3        | {x: -9, y: -7} | tag1 |
-          | 3        | {x: -9, y: -7} | tag2 |
-          | 3        | {x: -9, y: -7} | tag3 |
-          | 3        | {x: -9, y: -7} | tag4 |
-          | 4        |                | tag1 |
-          | 4        |                | tag2 |
-          | 4        |                | tag3 |
-          +----------+----------------+------+
-    "###);
+        @r"
+    +----------+----------------+------+
+    | shape_id | points         | tags |
+    +----------+----------------+------+
+    | 1        | {x: -3, y: -4} | tag1 |
+    | 1        | {x: 5, y: -8}  | tag1 |
+    | 2        | {x: -2, y: -8} | tag1 |
+    | 2        | {x: 6, y: 2}   | tag1 |
+    | 3        | {x: -2, y: 5}  | tag1 |
+    | 3        | {x: -2, y: 5}  | tag2 |
+    | 3        | {x: -2, y: 5}  | tag3 |
+    | 3        | {x: -2, y: 5}  | tag4 |
+    | 3        | {x: -9, y: -7} | tag1 |
+    | 3        | {x: -9, y: -7} | tag2 |
+    | 3        | {x: -9, y: -7} | tag3 |
+    | 3        | {x: -9, y: -7} | tag4 |
+    | 4        |                | tag1 |
+    | 4        |                | tag2 |
+    | 4        |                | tag3 |
+    +----------+----------------+------+
+    ");
 
     // Test aggregate results for points and tags.
     let df = table_with_nested_types(NUM_ROWS).await?;
@@ -4178,7 +4174,7 @@ async fn unnest_dict_encoded_columns() -> Result<()> {
     let results = df.collect().await.unwrap();
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----------------+---------+
     | make_array_expr | column1 |
     +-----------------+---------+
@@ -4186,7 +4182,7 @@ async fn unnest_dict_encoded_columns() -> Result<()> {
     | y               | y       |
     | z               | z       |
     +-----------------+---------+
-    "###
+    "
     );
 
     // make_array(dict_encoded_string,literal string)
@@ -4206,7 +4202,7 @@ async fn unnest_dict_encoded_columns() -> Result<()> {
     let results = df.collect().await.unwrap();
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----------------+---------+
     | make_array_expr | column1 |
     +-----------------+---------+
@@ -4217,7 +4213,7 @@ async fn unnest_dict_encoded_columns() -> Result<()> {
     | z               | z       |
     | fixed_string    | z       |
     +-----------------+---------+
-    "###
+    "
     );
     Ok(())
 }
@@ -4228,7 +4224,7 @@ async fn unnest_column_nulls() -> Result<()> {
     let results = df.clone().collect().await?;
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +--------+----+
     | list   | id |
     +--------+----+
@@ -4237,7 +4233,7 @@ async fn unnest_column_nulls() -> Result<()> {
     | []     | C  |
     | [3]    | D  |
     +--------+----+
-    "###
+    "
     );
 
     // Unnest, preserving nulls (row with B is preserved)
@@ -4250,7 +4246,7 @@ async fn unnest_column_nulls() -> Result<()> {
         .await?;
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +------+----+
     | list | id |
     +------+----+
@@ -4259,7 +4255,7 @@ async fn unnest_column_nulls() -> Result<()> {
     |      | B  |
     | 3    | D  |
     +------+----+
-    "###
+    "
     );
 
     let options = UnnestOptions::new().with_preserve_nulls(false);
@@ -4269,7 +4265,7 @@ async fn unnest_column_nulls() -> Result<()> {
         .await?;
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +------+----+
     | list | id |
     +------+----+
@@ -4277,7 +4273,7 @@ async fn unnest_column_nulls() -> Result<()> {
     | 2    | A  |
     | 3    | D  |
     +------+----+
-    "###
+    "
     );
 
     Ok(())
@@ -4294,7 +4290,7 @@ async fn unnest_fixed_list() -> Result<()> {
     let results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+----------------+
     | shape_id | tags           |
     +----------+----------------+
@@ -4305,7 +4301,7 @@ async fn unnest_fixed_list() -> Result<()> {
     | 5        | [tag51, tag52] |
     | 6        | [tag61, tag62] |
     +----------+----------------+
-    "###
+    "
     );
 
     let options = UnnestOptions::new().with_preserve_nulls(true);
@@ -4316,7 +4312,7 @@ async fn unnest_fixed_list() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+-------+
     | shape_id | tags  |
     +----------+-------+
@@ -4331,7 +4327,7 @@ async fn unnest_fixed_list() -> Result<()> {
     | 6        | tag61 |
     | 6        | tag62 |
     +----------+-------+
-    "###
+    "
     );
 
     Ok(())
@@ -4348,7 +4344,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
     let results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+----------------+
     | shape_id | tags           |
     +----------+----------------+
@@ -4359,7 +4355,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
     | 5        | [tag51, tag52] |
     | 6        | [tag61, tag62] |
     +----------+----------------+
-    "###
+    "
     );
 
     let options = UnnestOptions::new().with_preserve_nulls(false);
@@ -4370,7 +4366,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+-------+
     | shape_id | tags  |
     +----------+-------+
@@ -4383,7 +4379,7 @@ async fn unnest_fixed_list_drop_nulls() -> Result<()> {
     | 6        | tag61 |
     | 6        | tag62 |
     +----------+-------+
-    "###
+    "
     );
 
     Ok(())
@@ -4419,7 +4415,7 @@ async fn unnest_fixed_list_non_null() -> Result<()> {
     let results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+----------------+
     | shape_id | tags           |
     +----------+----------------+
@@ -4430,7 +4426,7 @@ async fn unnest_fixed_list_non_null() -> Result<()> {
     | 5        | [tag51, tag52] |
     | 6        | [tag61, tag62] |
     +----------+----------------+
-    "###
+    "
     );
 
     let options = UnnestOptions::new().with_preserve_nulls(true);
@@ -4440,7 +4436,7 @@ async fn unnest_fixed_list_non_null() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+-------+
     | shape_id | tags  |
     +----------+-------+
@@ -4457,7 +4453,7 @@ async fn unnest_fixed_list_non_null() -> Result<()> {
     | 6        | tag61 |
     | 6        | tag62 |
     +----------+-------+
-    "###
+    "
     );
 
     Ok(())
@@ -4471,17 +4467,17 @@ async fn unnest_aggregate_columns() -> Result<()> {
     let results = df.select_columns(&["tags"])?.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
-        +--------------------------+
-        | tags                     |
-        +--------------------------+
-        | [tag1, tag2, tag3, tag4] |
-        | [tag1, tag2, tag3]       |
-        | [tag1, tag2]             |
-        | [tag1]                   |
-        | [tag1]                   |
-        +--------------------------+
-    "###
+        @r"
+    +--------------------------+
+    | tags                     |
+    +--------------------------+
+    | [tag1, tag2, tag3, tag4] |
+    | [tag1, tag2, tag3]       |
+    | [tag1, tag2]             |
+    | [tag1]                   |
+    | [tag1]                   |
+    +--------------------------+
+    "
     );
 
     let df = table_with_nested_types(NUM_ROWS).await?;
@@ -4492,13 +4488,13 @@ async fn unnest_aggregate_columns() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +-------------+
     | count(tags) |
     +-------------+
     | 11          |
     +-------------+
-    "###
+    "
     );
 
     Ok(())
@@ -4571,7 +4567,7 @@ async fn unnest_array_agg() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+--------+
     | shape_id | tag_id |
     +----------+--------+
@@ -4585,7 +4581,7 @@ async fn unnest_array_agg() -> Result<()> {
     | 3        | 32     |
     | 3        | 33     |
     +----------+--------+
-    "###
+    "
     );
 
     // Doing an `array_agg` by `shape_id` produces:
@@ -4599,7 +4595,7 @@ async fn unnest_array_agg() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+--------------+
     | shape_id | tag_id       |
     +----------+--------------+
@@ -4607,7 +4603,7 @@ async fn unnest_array_agg() -> Result<()> {
     | 2        | [21, 22, 23] |
     | 3        | [31, 32, 33] |
     +----------+--------------+
-    "###
+    "
     );
 
     // Unnesting again should produce the original batch.
@@ -4623,7 +4619,7 @@ async fn unnest_array_agg() -> Result<()> {
         .await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+--------+
     | shape_id | tag_id |
     +----------+--------+
@@ -4637,7 +4633,7 @@ async fn unnest_array_agg() -> Result<()> {
     | 3        | 32     |
     | 3        | 33     |
     +----------+--------+
-    "###
+    "
     );
 
     Ok(())
@@ -4667,7 +4663,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let results = df.clone().collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+--------+
     | shape_id | tag_id |
     +----------+--------+
@@ -4681,7 +4677,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     | 3        | 32     |
     | 3        | 33     |
     +----------+--------+
-    "###
+    "
     );
 
     // Doing an `array_agg` by `shape_id` produces:
@@ -4711,7 +4707,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----------+
     | shape_id |
     +----------+
@@ -4725,7 +4721,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     | 3        |
     | 3        |
     +----------+
-    "###
+    "
     );
 
     Ok(())
@@ -4766,7 +4762,7 @@ async fn unnest_multiple_columns() -> Result<()> {
     // string:      a, b, c, d
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +------+------------+------------+--------+
     | list | large_list | fixed_list | string |
     +------+------------+------------+--------+
@@ -4780,7 +4776,7 @@ async fn unnest_multiple_columns() -> Result<()> {
     |      |            | 4          | c      |
     |      |            |            | d      |
     +------+------------+------------+--------+
-    "###
+    "
     );
 
     // Test with `preserve_nulls = false``
@@ -4797,7 +4793,7 @@ async fn unnest_multiple_columns() -> Result<()> {
     // string:      a, b, c, d
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +------+------------+------------+--------+
     | list | large_list | fixed_list | string |
     +------+------------+------------+--------+
@@ -4810,7 +4806,7 @@ async fn unnest_multiple_columns() -> Result<()> {
     |      |            | 3          | c      |
     |      |            | 4          | c      |
     +------+------------+------------+--------+
-    "###
+    "
     );
 
     Ok(())
@@ -4839,7 +4835,7 @@ async fn unnest_non_nullable_list() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +----+
     | c1 |
     +----+
@@ -4847,7 +4843,7 @@ async fn unnest_non_nullable_list() -> Result<()> {
     | 2  |
     |    |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -4892,7 +4888,7 @@ async fn test_read_batches() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----+--------+
     | id | number |
     +----+--------+
@@ -4905,7 +4901,7 @@ async fn test_read_batches() -> Result<()> {
     | 5  | 3.33   |
     | 5  | 6.66   |
     +----+--------+
-    "###
+    "
     );
     Ok(())
 }
@@ -4926,10 +4922,10 @@ async fn test_read_batches_empty() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     ++
     ++
-    "###
+    "
     );
     Ok(())
 }
@@ -4978,14 +4974,14 @@ async fn consecutive_projection_same_schema() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +----+----+----+
     | id | t  | t2 |
     +----+----+----+
     | 0  |    |    |
     | 1  | 10 | 10 |
     +----+----+----+
-    "###
+    "
     );
 
     Ok(())
@@ -5299,13 +5295,13 @@ async fn test_array_agg() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-------------------------------------+
     | array_agg(test.a)                   |
     +-------------------------------------+
     | [abcDEF, abc123, CBAdef, 123AbcDef] |
     +-------------------------------------+
-    "###
+    "
     );
 
     Ok(())
@@ -5373,10 +5369,10 @@ async fn test_dataframe_placeholder_missing_param_values() -> Result<()> {
     // N.B., the test is basically `SELECT 1 as a WHERE a = 3;` which returns no results.
     assert_snapshot!(
        batches_to_string(&df.collect().await.unwrap()),
-        @r###"
+        @r"
     ++
     ++
-    "###
+    "
     );
 
     Ok(())
@@ -5425,20 +5421,20 @@ async fn test_dataframe_placeholder_column_parameter() -> Result<()> {
     assert_snapshot!(
         actual,
         @r"
-    Projection: Int32(3) AS $1 [$1:Null;N]
+    Projection: Int32(3) AS $1 [$1:Int32]
       EmptyRelation: rows=1 []
     "
     );
 
     assert_snapshot!(
        batches_to_string(&df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----+
     | $1 |
     +----+
     | 3  |
     +----+
-    "###
+    "
     );
 
     Ok(())
@@ -5505,13 +5501,13 @@ async fn test_dataframe_placeholder_like_expression() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&df.collect().await.unwrap()),
-        @r###"
+        @r"
     +-----+
     | a   |
     +-----+
     | foo |
     +-----+
-    "###
+    "
     );
 
     Ok(())
@@ -5569,13 +5565,13 @@ async fn write_partitioned_parquet_results() -> Result<()> {
     let results = filter_df.collect().await?;
     assert_snapshot!(
        batches_to_string(&results),
-        @r###"
+        @r"
     +-----+
     | c1  |
     +-----+
     | abc |
     +-----+
-    "###
+    "
     );
 
     // Read the entire set of parquet files
@@ -5591,14 +5587,14 @@ async fn write_partitioned_parquet_results() -> Result<()> {
     let results = df.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +-----+-----+
     | c1  | c2  |
     +-----+-----+
     | abc | 123 |
     | def | 456 |
     +-----+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -5755,7 +5751,7 @@ async fn sparse_union_is_null() {
     // view_all
     assert_snapshot!(
         batches_to_sort_string(&df.clone().collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5766,14 +5762,14 @@ async fn sparse_union_is_null() {
     | {C=a}    |
     | {C=}     |
     +----------+
-    "###
+    "
     );
 
     // filter where is null
     let result_df = df.clone().filter(col("my_union").is_null()).unwrap();
     assert_snapshot!(
         batches_to_sort_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5781,14 +5777,14 @@ async fn sparse_union_is_null() {
     | {B=}     |
     | {C=}     |
     +----------+
-    "###
+    "
     );
 
     // filter where is not null
     let result_df = df.filter(col("my_union").is_not_null()).unwrap();
     assert_snapshot!(
         batches_to_sort_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5796,7 +5792,7 @@ async fn sparse_union_is_null() {
     | {B=3.2}  |
     | {C=a}    |
     +----------+
-    "###
+    "
     );
 }
 
@@ -5838,7 +5834,7 @@ async fn dense_union_is_null() {
     // view_all
     assert_snapshot!(
         batches_to_sort_string(&df.clone().collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5849,14 +5845,14 @@ async fn dense_union_is_null() {
     | {C=a}    |
     | {C=}     |
     +----------+
-    "###
+    "
     );
 
     // filter where is null
     let result_df = df.clone().filter(col("my_union").is_null()).unwrap();
     assert_snapshot!(
         batches_to_sort_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5864,14 +5860,14 @@ async fn dense_union_is_null() {
     | {B=}     |
     | {C=}     |
     +----------+
-    "###
+    "
     );
 
     // filter where is not null
     let result_df = df.filter(col("my_union").is_not_null()).unwrap();
     assert_snapshot!(
         batches_to_sort_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----------+
     | my_union |
     +----------+
@@ -5879,7 +5875,7 @@ async fn dense_union_is_null() {
     | {B=3.2}  |
     | {C=a}    |
     +----------+
-    "###
+    "
     );
 }
 
@@ -5911,7 +5907,7 @@ async fn boolean_dictionary_as_filter() {
     // view_all
     assert_snapshot!(
        batches_to_string(&df.clone().collect().await.unwrap()),
-        @r###"
+        @r"
     +---------+
     | my_dict |
     +---------+
@@ -5923,14 +5919,14 @@ async fn boolean_dictionary_as_filter() {
     | true    |
     | false   |
     +---------+
-    "###
+    "
     );
 
     let result_df = df.clone().filter(col("my_dict")).unwrap();
 
     assert_snapshot!(
        batches_to_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +---------+
     | my_dict |
     +---------+
@@ -5938,7 +5934,7 @@ async fn boolean_dictionary_as_filter() {
     | true    |
     | true    |
     +---------+
-    "###
+    "
     );
 
     // test nested dictionary
@@ -5969,26 +5965,26 @@ async fn boolean_dictionary_as_filter() {
     // view_all
     assert_snapshot!(
        batches_to_string(&df.clone().collect().await.unwrap()),
-        @r###"
+        @r"
     +----------------+
     | my_nested_dict |
     +----------------+
     | true           |
     | false          |
     +----------------+
-    "###
+    "
     );
 
     let result_df = df.clone().filter(col("my_nested_dict")).unwrap();
     assert_snapshot!(
        batches_to_string(&result_df.collect().await.unwrap()),
-        @r###"
+        @r"
     +----------------+
     | my_nested_dict |
     +----------------+
     | true           |
     +----------------+
-    "###
+    "
     );
 }
 
@@ -6066,11 +6062,11 @@ async fn test_alias() -> Result<()> {
         .into_unoptimized_plan()
         .display_indent_schema()
         .to_string();
-    assert_snapshot!(plan, @r###"
+    assert_snapshot!(plan, @r"
     SubqueryAlias: table_alias [a:Utf8, b:Int32, one:Int32]
       Projection: test.a, test.b, Int32(1) AS one [a:Utf8, b:Int32, one:Int32]
         TableScan: test [a:Utf8, b:Int32]
-    "###);
+    ");
 
     // Select over the aliased DataFrame
     let df = df.select(vec![
@@ -6079,7 +6075,7 @@ async fn test_alias() -> Result<()> {
     ])?;
     assert_snapshot!(
         batches_to_sort_string(&df.collect().await.unwrap()),
-        @r###"
+        @r"
     +-----------+---------------------------------+
     | a         | table_alias.b + table_alias.one |
     +-----------+---------------------------------+
@@ -6088,7 +6084,7 @@ async fn test_alias() -> Result<()> {
     | abc123    | 11                              |
     | abcDEF    | 2                               |
     +-----------+---------------------------------+
-    "###
+    "
     );
     Ok(())
 }
@@ -6118,7 +6114,7 @@ async fn test_alias_self_join() -> Result<()> {
     let joined = left.join(right, JoinType::Full, &["a"], &["a"], None)?;
     assert_snapshot!(
         batches_to_sort_string(&joined.collect().await.unwrap()),
-        @r###"
+        @r"
     +-----------+-----+-----------+-----+
     | a         | b   | a         | b   |
     +-----------+-----+-----------+-----+
@@ -6127,7 +6123,7 @@ async fn test_alias_self_join() -> Result<()> {
     | abc123    | 10  | abc123    | 10  |
     | abcDEF    | 1   | abcDEF    | 1   |
     +-----------+-----+-----------+-----+
-    "###
+    "
     );
     Ok(())
 }
@@ -6140,14 +6136,14 @@ async fn test_alias_empty() -> Result<()> {
         .into_unoptimized_plan()
         .display_indent_schema()
         .to_string();
-    assert_snapshot!(plan, @r###"
+    assert_snapshot!(plan, @r"
     SubqueryAlias:  [a:Utf8, b:Int32]
       TableScan: test [a:Utf8, b:Int32]
-    "###);
+    ");
 
     assert_snapshot!(
         batches_to_sort_string(&df.select(vec![col("a"), col("b")])?.collect().await.unwrap()),
-        @r###"
+        @r"
     +-----------+-----+
     | a         | b   |
     +-----------+-----+
@@ -6156,7 +6152,7 @@ async fn test_alias_empty() -> Result<()> {
     | abc123    | 10  |
     | abcDEF    | 1   |
     +-----------+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -6175,12 +6171,12 @@ async fn test_alias_nested() -> Result<()> {
         .into_optimized_plan()?
         .display_indent_schema()
         .to_string();
-    assert_snapshot!(plan, @r###"
+    assert_snapshot!(plan, @r"
     SubqueryAlias: alias2 [a:Utf8, b:Int32, one:Int32]
       SubqueryAlias: alias1 [a:Utf8, b:Int32, one:Int32]
         Projection: test.a, test.b, Int32(1) AS one [a:Utf8, b:Int32, one:Int32]
           TableScan: test projection=[a, b] [a:Utf8, b:Int32]
-    "###);
+    ");
 
     // Select over the aliased DataFrame
     let select1 = df
@@ -6189,7 +6185,7 @@ async fn test_alias_nested() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&select1.collect().await.unwrap()),
-        @r###"
+        @r"
     +-----------+-----------------------+
     | a         | alias2.b + alias2.one |
     +-----------+-----------------------+
@@ -6198,7 +6194,7 @@ async fn test_alias_nested() -> Result<()> {
     | abc123    | 11                    |
     | abcDEF    | 2                     |
     +-----------+-----------------------+
-    "###
+    "
     );
 
     // Only the outermost alias is visible
@@ -6318,7 +6314,10 @@ async fn test_insert_into_checking() -> Result<()> {
         .await
         .unwrap_err();
 
-    assert_contains!(e.to_string(), "Inserting query schema mismatch: Expected table field 'a' with type Int64, but got 'column1' with type Utf8");
+    assert_contains!(
+        e.to_string(),
+        "Inserting query schema mismatch: Expected table field 'a' with type Int64, but got 'column1' with type Utf8"
+    );
 
     Ok(())
 }
@@ -6365,7 +6364,7 @@ async fn test_fill_null() -> Result<()> {
     let results = df_filled.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +---+---------+
     | a | b       |
     +---+---------+
@@ -6373,7 +6372,7 @@ async fn test_fill_null() -> Result<()> {
     | 1 | x       |
     | 3 | z       |
     +---+---------+
-    "###
+    "
     );
 
     Ok(())
@@ -6393,7 +6392,7 @@ async fn test_fill_null_all_columns() -> Result<()> {
 
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +---+---------+
     | a | b       |
     +---+---------+
@@ -6401,7 +6400,7 @@ async fn test_fill_null_all_columns() -> Result<()> {
     | 1 | x       |
     | 3 | z       |
     +---+---------+
-    "###
+    "
     );
 
     // Fill column "a" null values with a value that cannot be cast to Int32.
@@ -6410,7 +6409,7 @@ async fn test_fill_null_all_columns() -> Result<()> {
     let results = df_filled.collect().await?;
     assert_snapshot!(
         batches_to_sort_string(&results),
-        @r###"
+        @r"
     +---+---------+
     | a | b       |
     +---+---------+
@@ -6418,7 +6417,7 @@ async fn test_fill_null_all_columns() -> Result<()> {
     | 1 | x       |
     | 3 | z       |
     +---+---------+
-    "###
+    "
     );
     Ok(())
 }
@@ -6450,7 +6449,10 @@ async fn test_insert_into_casting_support() -> Result<()> {
         .await
         .unwrap_err();
 
-    assert_contains!(e.to_string(), "Inserting query schema mismatch: Expected table field 'a' with type Float16, but got 'a' with type Utf8.");
+    assert_contains!(
+        e.to_string(),
+        "Inserting query schema mismatch: Expected table field 'a' with type Float16, but got 'a' with type Utf8."
+    );
 
     // Testing case2:
     // Inserting query schema mismatch: Expected table field 'a' with type Utf8View, but got 'a' with type Utf8.
@@ -6488,14 +6490,14 @@ async fn test_insert_into_casting_support() -> Result<()> {
 
     assert_snapshot!(
        batches_to_string(&res),
-        @r###"
+        @r"
     +------+
     | a    |
     +------+
     | a123 |
     | b456 |
     +------+
-    "###
+    "
     );
     Ok(())
 }
@@ -6631,13 +6633,13 @@ async fn test_copy_to_preserves_order() -> Result<()> {
     // Expect that input to the DataSinkExec is sorted correctly
     assert_snapshot!(
         physical_plan_format,
-        @r###"
+        @r"
     UnionExec
       DataSinkExec: sink=CsvSink(file_groups=[])
         SortExec: expr=[column1@0 DESC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[1]
       DataSourceExec: partitions=1, partition_sizes=[1]
-    "###
+    "
     );
     Ok(())
 }
diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs
index f89ca9e049147..2e1b1484076d9 100644
--- a/datafusion/core/tests/datasource/object_store_access.rs
+++ b/datafusion/core/tests/datasource/object_store_access.rs
@@ -98,6 +98,59 @@ async fn create_multi_file_csv_file() {
     );
 }
 
+#[tokio::test]
+async fn multi_query_multi_file_csv_file() {
+    let test = Test::new().with_multi_file_csv().await;
+    assert_snapshot!(
+        test.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.csv
+    - GET  (opts) path=data/file_1.csv
+    - GET  (opts) path=data/file_2.csv
+    "
+    );
+
+    // the second query should re-use the cached LIST results and should not reissue LIST
+    assert_snapshot!(
+        test.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.csv
+    - GET  (opts) path=data/file_1.csv
+    - GET  (opts) path=data/file_2.csv
+    "
+    );
+}
+
 #[tokio::test]
 async fn query_multi_csv_file() {
     let test = Test::new().with_multi_file_csv().await;
@@ -145,17 +198,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 13
-    - LIST (with delimiter) prefix=data
-    - LIST (with delimiter) prefix=data/a=1
-    - LIST (with delimiter) prefix=data/a=2
-    - LIST (with delimiter) prefix=data/a=3
-    - LIST (with delimiter) prefix=data/a=1/b=10
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=3/b=30
-    - LIST (with delimiter) prefix=data/a=1/b=10/c=100
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
-    - LIST (with delimiter) prefix=data/a=3/b=30/c=300
+    Total Requests: 4
+    - LIST prefix=data
     - GET  (opts) path=data/a=1/b=10/c=100/file_1.csv
     - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
     - GET  (opts) path=data/a=3/b=30/c=300/file_3.csv
@@ -174,10 +218,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 4
-    - LIST (with delimiter) prefix=data/a=2
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
+    Total Requests: 2
+    - LIST prefix=data/a=2
     - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
     "
     );
@@ -194,17 +236,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 11
-    - LIST (with delimiter) prefix=data
-    - LIST (with delimiter) prefix=data/a=1
-    - LIST (with delimiter) prefix=data/a=2
-    - LIST (with delimiter) prefix=data/a=3
-    - LIST (with delimiter) prefix=data/a=1/b=10
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=3/b=30
-    - LIST (with delimiter) prefix=data/a=1/b=10/c=100
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
-    - LIST (with delimiter) prefix=data/a=3/b=30/c=300
+    Total Requests: 2
+    - LIST prefix=data
     - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
     "
     );
@@ -221,17 +254,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 11
-    - LIST (with delimiter) prefix=data
-    - LIST (with delimiter) prefix=data/a=1
-    - LIST (with delimiter) prefix=data/a=2
-    - LIST (with delimiter) prefix=data/a=3
-    - LIST (with delimiter) prefix=data/a=1/b=10
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=3/b=30
-    - LIST (with delimiter) prefix=data/a=1/b=10/c=100
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
-    - LIST (with delimiter) prefix=data/a=3/b=30/c=300
+    Total Requests: 2
+    - LIST prefix=data
     - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
     "
     );
@@ -248,9 +272,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 3
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
+    Total Requests: 2
+    - LIST prefix=data/a=2/b=20
     - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
     "
     );
@@ -267,17 +290,8 @@ async fn query_partitioned_csv_file() {
     +---------+-------+-------+---+----+-----+
     ------- Object Store Request Summary -------
     RequestCountingObjectStore()
-    Total Requests: 11
-    - LIST (with delimiter) prefix=data
-    - LIST (with delimiter) prefix=data/a=1
-    - LIST (with delimiter) prefix=data/a=2
-    - LIST (with delimiter) prefix=data/a=3
-    - LIST (with delimiter) prefix=data/a=1/b=10
-    - LIST (with delimiter) prefix=data/a=2/b=20
-    - LIST (with delimiter) prefix=data/a=3/b=30
-    - LIST (with delimiter) prefix=data/a=1/b=10/c=100
-    - LIST (with delimiter) prefix=data/a=2/b=20/c=200
-    - LIST (with delimiter) prefix=data/a=3/b=30/c=300
+    Total Requests: 2
+    - LIST prefix=data
     - GET  (opts) path=data/a=1/b=10/c=100/file_1.csv
     "
     );
diff --git a/datafusion/core/tests/execution/coop.rs b/datafusion/core/tests/execution/coop.rs
index b6f406e967509..27dacf598c2c0 100644
--- a/datafusion/core/tests/execution/coop.rs
+++ b/datafusion/core/tests/execution/coop.rs
@@ -22,25 +22,25 @@ use datafusion::common::NullEquality;
 use datafusion::functions_aggregate::sum;
 use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan;
+use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion::physical_plan::execution_plan::Boundedness;
-use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
-use datafusion_common::{exec_datafusion_err, DataFusionError, JoinType, ScalarValue};
+use datafusion_common::{DataFusionError, JoinType, ScalarValue, exec_datafusion_err};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr_common::operator::Operator;
 use datafusion_expr_common::operator::Operator::{Divide, Eq, Gt, Modulo};
 use datafusion_functions_aggregate::min_max;
+use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr::expressions::{
-    binary, col, lit, BinaryExpr, Column, Literal,
+    BinaryExpr, Column, Literal, binary, col, lit,
 };
-use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
-use datafusion_physical_optimizer::ensure_coop::EnsureCooperative;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::ensure_coop::EnsureCooperative;
 use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion_physical_plan::coop::make_cooperative;
 use datafusion_physical_plan::filter::FilterExec;
@@ -64,13 +64,14 @@ use std::time::Duration;
 use tokio::runtime::{Handle, Runtime};
 use tokio::select;
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 struct RangeBatchGenerator {
     schema: SchemaRef,
     value_range: Range<i64>,
     boundedness: Boundedness,
     batch_size: usize,
     poll_count: usize,
+    original_range: Range<i64>,
 }
 
 impl std::fmt::Display for RangeBatchGenerator {
@@ -110,6 +111,13 @@ impl LazyBatchGenerator for RangeBatchGenerator {
             RecordBatch::try_new(Arc::clone(&self.schema), vec![Arc::new(array)])?;
         Ok(Some(batch))
     }
+
+    fn reset_state(&self) -> Arc<RwLock<dyn LazyBatchGenerator>> {
+        let mut new = self.clone();
+        new.poll_count = 0;
+        new.value_range = new.original_range.clone();
+        Arc::new(RwLock::new(new))
+    }
 }
 
 fn make_lazy_exec(column_name: &str, pretend_infinite: bool) -> LazyMemoryExec {
@@ -136,16 +144,17 @@ fn make_lazy_exec_with_range(
     };
 
     // Instantiate the generator with the batch and limit
-    let gen = RangeBatchGenerator {
+    let batch_gen = RangeBatchGenerator {
         schema: Arc::clone(&schema),
         boundedness,
-        value_range: range,
+        value_range: range.clone(),
         batch_size: 8192,
         poll_count: 0,
+        original_range: range,
     };
 
     // Wrap the generator in a trait object behind Arc<RwLock<_>>
-    let generator: Arc<RwLock<dyn LazyBatchGenerator>> = Arc::new(RwLock::new(gen));
+    let generator: Arc<RwLock<dyn LazyBatchGenerator>> = Arc::new(RwLock::new(batch_gen));
 
     // Create a LazyMemoryExec with one partition using our generator
     let mut exec = LazyMemoryExec::try_new(schema, vec![generator]).unwrap();
@@ -170,7 +179,7 @@ async fn agg_no_grouping_yields(
     let inf = Arc::new(make_lazy_exec("value", pretend_infinite));
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![Arc::new(
             AggregateExprBuilder::new(
                 sum::sum_udaf(),
@@ -204,7 +213,7 @@ async fn agg_grouping_yields(
 
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![(group, "group".to_string())], vec![], vec![]),
+        PhysicalGroupBy::new(vec![(group, "group".to_string())], vec![], vec![], false),
         vec![Arc::new(
             AggregateExprBuilder::new(sum::sum_udaf(), vec![value_col.clone()])
                 .schema(inf.schema())
@@ -240,6 +249,7 @@ async fn agg_grouped_topk_yields(
                 vec![(group, "group".to_string())],
                 vec![],
                 vec![vec![false]],
+                false,
             ),
             vec![Arc::new(
                 AggregateExprBuilder::new(min_max::max_udaf(), vec![value_col.clone()])
@@ -545,6 +555,7 @@ async fn interleave_then_aggregate_yields(
             vec![], // no GROUP BY columns
             vec![], // no GROUP BY expressions
             vec![], // no GROUP BY physical expressions
+            false,
         ),
         vec![Arc::new(aggregate_expr)],
         vec![None], // no “distinct” flags
@@ -653,7 +664,7 @@ async fn join_agg_yields(
 
     let proj_expr = vec![ProjectionExpr::new(
         Arc::new(Column::new_with_schema("value", &input_schema)?) as _,
-        "value".to_string(),
+        "value",
     )];
 
     let projection = Arc::new(ProjectionExec::try_new(proj_expr, join)?);
@@ -676,7 +687,7 @@ async fn join_agg_yields(
 
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![Arc::new(aggregate_expr)],
         vec![None],
         projection,
diff --git a/datafusion/core/tests/execution/datasource_split.rs b/datafusion/core/tests/execution/datasource_split.rs
index 0b90c6f326168..370249cd8044e 100644
--- a/datafusion/core/tests/execution/datasource_split.rs
+++ b/datafusion/core/tests/execution/datasource_split.rs
@@ -22,7 +22,7 @@ use arrow::{
 };
 use datafusion_datasource::memory::MemorySourceConfig;
 use datafusion_execution::TaskContext;
-use datafusion_physical_plan::{common::collect, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, common::collect};
 use std::sync::Arc;
 
 /// Helper function to create a memory source with the given batch size and collect all batches
diff --git a/datafusion/core/tests/execution/logical_plan.rs b/datafusion/core/tests/execution/logical_plan.rs
index ef2e263f2c467..3eaa3fb2ed5e6 100644
--- a/datafusion/core/tests/execution/logical_plan.rs
+++ b/datafusion/core/tests/execution/logical_plan.rs
@@ -20,7 +20,7 @@
 
 use arrow::array::Int64Array;
 use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::datasource::{provider_as_source, ViewTable};
+use datafusion::datasource::{ViewTable, provider_as_source};
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, ScalarValue, Spans};
 use datafusion_execution::TaskContext;
diff --git a/datafusion/core/tests/execution/mod.rs b/datafusion/core/tests/execution/mod.rs
index 8770b2a201051..f33ef87aa3023 100644
--- a/datafusion/core/tests/execution/mod.rs
+++ b/datafusion/core/tests/execution/mod.rs
@@ -18,3 +18,4 @@
 mod coop;
 mod datasource_split;
 mod logical_plan;
+mod register_arrow;
diff --git a/datafusion/core/tests/execution/register_arrow.rs b/datafusion/core/tests/execution/register_arrow.rs
new file mode 100644
index 0000000000000..4ce16dc0906c1
--- /dev/null
+++ b/datafusion/core/tests/execution/register_arrow.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for register_arrow API
+
+use datafusion::{execution::options::ArrowReadOptions, prelude::*};
+use datafusion_common::Result;
+
+#[tokio::test]
+async fn test_register_arrow_auto_detects_format() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    ctx.register_arrow(
+        "file_format",
+        "../../datafusion/datasource-arrow/tests/data/example.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    ctx.register_arrow(
+        "stream_format",
+        "../../datafusion/datasource-arrow/tests/data/example_stream.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    let file_result = ctx.sql("SELECT * FROM file_format ORDER BY f0").await?;
+    let stream_result = ctx.sql("SELECT * FROM stream_format ORDER BY f0").await?;
+
+    let file_batches = file_result.collect().await?;
+    let stream_batches = stream_result.collect().await?;
+
+    assert_eq!(file_batches.len(), stream_batches.len());
+    assert_eq!(file_batches[0].schema(), stream_batches[0].schema());
+
+    let file_rows: usize = file_batches.iter().map(|b| b.num_rows()).sum();
+    let stream_rows: usize = stream_batches.iter().map(|b| b.num_rows()).sum();
+    assert_eq!(file_rows, stream_rows);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_register_arrow_join_file_and_stream() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    ctx.register_arrow(
+        "file_table",
+        "../../datafusion/datasource-arrow/tests/data/example.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    ctx.register_arrow(
+        "stream_table",
+        "../../datafusion/datasource-arrow/tests/data/example_stream.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    let result = ctx
+        .sql(
+            "SELECT a.f0, a.f1, b.f0, b.f1
+             FROM file_table a
+             JOIN stream_table b ON a.f0 = b.f0
+             WHERE a.f0 <= 2
+             ORDER BY a.f0",
+        )
+        .await?;
+    let batches = result.collect().await?;
+
+    let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+    assert_eq!(total_rows, 2);
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index 84e644480a4fd..90c1b96749b3c 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -16,17 +16,17 @@
 // under the License.
 
 use arrow::array::{
-    builder::{ListBuilder, StringBuilder},
     ArrayRef, Int64Array, RecordBatch, StringArray, StructArray,
+    builder::{ListBuilder, StringBuilder},
 };
 use arrow::datatypes::{DataType, Field};
 use arrow::util::pretty::{pretty_format_batches, pretty_format_columns};
 use datafusion::prelude::*;
 use datafusion_common::{DFSchema, ScalarValue};
+use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr::NullTreatment;
 use datafusion_expr::simplify::SimplifyContext;
-use datafusion_expr::ExprFunctionExt;
 use datafusion_functions::core::expr_ext::FieldAccessor;
 use datafusion_functions_aggregate::first_last::first_value_udaf;
 use datafusion_functions_aggregate::sum::sum_udaf;
@@ -36,6 +36,7 @@ use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use std::sync::{Arc, LazyLock};
 
 mod parse_sql_expr;
+#[expect(clippy::needless_pass_by_value)]
 mod simplification;
 
 #[test]
@@ -384,6 +385,7 @@ async fn evaluate_agg_test(expr: Expr, expected_lines: Vec<&str>) {
 
 /// Converts the `Expr` to a `PhysicalExpr`, evaluates it against the provided
 /// `RecordBatch` and compares the result to the expected result.
+#[expect(clippy::needless_pass_by_value)]
 fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) {
     let batch = &TEST_BATCH;
     let df_schema = DFSchema::try_from(batch.schema()).unwrap();
diff --git a/datafusion/core/tests/expr_api/parse_sql_expr.rs b/datafusion/core/tests/expr_api/parse_sql_expr.rs
index 92c18204324f7..b0d8b3a349ae2 100644
--- a/datafusion/core/tests/expr_api/parse_sql_expr.rs
+++ b/datafusion/core/tests/expr_api/parse_sql_expr.rs
@@ -19,9 +19,9 @@ use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::prelude::{CsvReadOptions, SessionContext};
 use datafusion_common::DFSchema;
 use datafusion_common::{DFSchemaRef, Result, ToDFSchema};
+use datafusion_expr::Expr;
 use datafusion_expr::col;
 use datafusion_expr::lit;
-use datafusion_expr::Expr;
 use datafusion_sql::unparser::Unparser;
 /// A schema like:
 ///
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 46c36c6abdacc..a42dfc951da0d 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -24,15 +24,15 @@ use arrow::array::{ArrayRef, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema};
 use chrono::{DateTime, TimeZone, Utc};
 use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*};
-use datafusion_common::cast::as_int32_array;
 use datafusion_common::ScalarValue;
+use datafusion_common::cast::as_int32_array;
 use datafusion_common::{DFSchemaRef, ToDFSchema};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::logical_plan::builder::table_scan_with_filters;
 use datafusion_expr::simplify::SimplifyInfo;
 use datafusion_expr::{
-    table_scan, Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
-    ScalarUDF, Volatility,
+    Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, ScalarUDF,
+    Volatility, table_scan,
 };
 use datafusion_functions::math;
 use datafusion_optimizer::optimizer::Optimizer;
@@ -243,10 +243,10 @@ fn to_timestamp_expr_folded() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r#"
     Projection: TimestampNanosecond(1599566400000000000, None) AS to_timestamp(Utf8("2020-09-08T12:00:00+00:00"))
       TableScan: test
-    "###
+    "#
     );
     Ok(())
 }
@@ -273,10 +273,10 @@ fn now_less_than_timestamp() -> Result<()> {
 
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Filter: Boolean(true)
       TableScan: test
-    "###
+    "
     );
     Ok(())
 }
@@ -312,10 +312,10 @@ fn select_date_plus_interval() -> Result<()> {
 
     assert_snapshot!(
         actual,
-        @r###"
+        @r#"
     Projection: Date32("2021-01-09") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("IntervalDayTime { days: 123, milliseconds: 0 }")
       TableScan: test
-    "###
+    "#
     );
     Ok(())
 }
@@ -334,10 +334,10 @@ fn simplify_project_scalar_fn() -> Result<()> {
     let actual = formatter.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Projection: test.f AS power(test.f,Float64(1))
       TableScan: test
-    "###
+    "
     );
     Ok(())
 }
diff --git a/datafusion/core/tests/fifo/mod.rs b/datafusion/core/tests/fifo/mod.rs
index 141a3f3b75586..36cc769417dbc 100644
--- a/datafusion/core/tests/fifo/mod.rs
+++ b/datafusion/core/tests/fifo/mod.rs
@@ -22,21 +22,21 @@
 mod unix_test {
     use std::fs::File;
     use std::path::PathBuf;
-    use std::sync::atomic::{AtomicBool, Ordering};
     use std::sync::Arc;
+    use std::sync::atomic::{AtomicBool, Ordering};
     use std::time::Duration;
 
     use arrow::array::Array;
     use arrow::csv::ReaderBuilder;
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
     use datafusion::datasource::TableProvider;
+    use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
     use datafusion::{
         prelude::{CsvReadOptions, SessionConfig, SessionContext},
         test_util::{aggr_test_schema, arrow_test_data},
     };
     use datafusion_common::instant::Instant;
-    use datafusion_common::{exec_err, Result};
+    use datafusion_common::{Result, exec_err};
     use datafusion_expr::SortExpr;
 
     use futures::StreamExt;
@@ -44,7 +44,7 @@ mod unix_test {
     use nix::unistd;
     use tempfile::TempDir;
     use tokio::io::AsyncWriteExt;
-    use tokio::task::{spawn_blocking, JoinHandle};
+    use tokio::task::{JoinHandle, spawn_blocking};
 
     /// Makes a TableProvider for a fifo file
     fn fifo_table(
diff --git a/datafusion/core/tests/fuzz.rs b/datafusion/core/tests/fuzz.rs
index 92646e8b37636..5e94f12b5805d 100644
--- a/datafusion/core/tests/fuzz.rs
+++ b/datafusion/core/tests/fuzz.rs
@@ -15,7 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/// Run all tests that are found in the `fuzz_cases` directory
+/// Run all tests that are found in the `fuzz_cases` directory.
+/// Fuzz tests are slow and gated behind the `extended_tests` feature.
+/// Run with: cargo test --features extended_tests
+#[cfg(feature = "extended_tests")]
 mod fuzz_cases;
 
 #[cfg(test)]
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 4e04da26f70b6..97d1db5728cf3 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -24,37 +24,37 @@ use crate::fuzz_cases::aggregation_fuzzer::{
 };
 
 use arrow::array::{
-    types::Int64Type, Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch,
-    StringArray,
+    Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch, StringArray,
+    types::Int64Type,
 };
 use arrow::compute::concat_batches;
 use arrow::datatypes::DataType;
 use arrow::util::pretty::pretty_format_batches;
 use arrow_schema::{Field, Schema, SchemaRef};
+use datafusion::datasource::MemTable;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
-use datafusion::datasource::MemTable;
 use datafusion::prelude::{DataFrame, SessionConfig, SessionContext};
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
 use datafusion_common::{HashMap, Result};
 use datafusion_common_runtime::JoinSet;
 use datafusion_functions_aggregate::sum::sum_udaf;
-use datafusion_physical_expr::expressions::{col, lit, Column};
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr::expressions::{Column, col, lit};
 use datafusion_physical_plan::InputOrderMode;
-use test_utils::{add_empty_batches, StringBatchGenerator};
+use test_utils::{StringBatchGenerator, add_empty_batches};
 
+use datafusion_execution::TaskContext;
 use datafusion_execution::memory_pool::FairSpillPool;
 use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-use datafusion_execution::TaskContext;
 use datafusion_physical_expr::aggregate::AggregateExprBuilder;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion_physical_plan::metrics::MetricValue;
-use datafusion_physical_plan::{collect, displayable, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, collect, displayable};
 use rand::rngs::StdRng;
-use rand::{random, rng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, random, rng};
 
 // ========================================================================
 //  The new aggregation fuzz tests based on [`AggregationFuzzer`]
@@ -326,15 +326,14 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
             .unwrap(),
     );
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()])
-                .schema(Arc::clone(&schema))
-                .alias("sum1")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("sum1")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
     let expr = group_by_columns
         .iter()
         .map(|elem| (col(elem, &schema).unwrap(), (*elem).to_string()))
@@ -650,7 +649,9 @@ pub(crate) fn assert_spill_count_metric(
         if expect_spill && spill_count == 0 {
             panic!("Expected spill but SpillCount metric not found or SpillCount was 0.");
         } else if !expect_spill && spill_count > 0 {
-            panic!("Expected no spill but found SpillCount metric with value greater than 0.");
+            panic!(
+                "Expected no spill but found SpillCount metric with value greater than 0."
+            );
         }
 
         spill_count
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
index fa8ea0b31c023..bf71053d6c852 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
@@ -25,7 +25,7 @@ use datafusion_catalog::TableProvider;
 use datafusion_common::ScalarValue;
 use datafusion_common::{error::Result, utils::get_available_parallelism};
 use datafusion_expr::col;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 
 use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset;
 
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
index aaf2d1b9bad4f..e49cffa89b04e 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
@@ -18,7 +18,7 @@
 use arrow::array::RecordBatch;
 use arrow::datatypes::DataType;
 use datafusion_common::Result;
-use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
+use datafusion_physical_expr::{PhysicalSortExpr, expressions::col};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::sorts::sort::sort_batch;
 use test_utils::stagger_batch;
@@ -209,8 +209,8 @@ mod test {
             sort_keys_set: vec![vec!["b".to_string()]],
         };
 
-        let mut gen = DatasetGenerator::new(config);
-        let datasets = gen.generate().unwrap();
+        let mut data_gen = DatasetGenerator::new(config);
+        let datasets = data_gen.generate().unwrap();
 
         // Should Generate 2 datasets
         assert_eq!(datasets.len(), 2);
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
index 1a8ef278cc299..430762b1c28db 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
@@ -19,9 +19,9 @@ use std::sync::Arc;
 
 use arrow::array::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
-use datafusion_common::{internal_datafusion_err, Result};
+use datafusion_common::{Result, internal_datafusion_err};
 use datafusion_common_runtime::JoinSet;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 
 use crate::fuzz_cases::aggregation_fuzzer::query_builder::QueryBuilder;
 use crate::fuzz_cases::aggregation_fuzzer::{
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
index 766e2bedd74c2..0d04e98536f2a 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
@@ -17,7 +17,7 @@
 
 use std::{collections::HashSet, str::FromStr};
 
-use rand::{rng, seq::SliceRandom, Rng};
+use rand::{Rng, rng, seq::SliceRandom};
 
 /// Random aggregate query builder
 ///
diff --git a/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs b/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs
index 3049631d4b3fe..92adda200d1a5 100644
--- a/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs
@@ -19,7 +19,7 @@
 
 use std::sync::Arc;
 
-use arrow::array::{cast::AsArray, Array, OffsetSizeTrait, RecordBatch};
+use arrow::array::{Array, OffsetSizeTrait, RecordBatch, cast::AsArray};
 
 use datafusion::datasource::MemTable;
 use datafusion_common_runtime::JoinSet;
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
index 171839b390ffa..a57095066ee12 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
@@ -16,19 +16,19 @@
 // under the License.
 
 use crate::fuzz_cases::equivalence::utils::{
-    create_random_schema, create_test_params, create_test_schema_2,
+    TestScalarUDF, create_random_schema, create_test_params, create_test_schema_2,
     generate_table_for_eq_properties, generate_table_for_orderings,
-    is_table_same_after_sort, TestScalarUDF,
+    is_table_same_after_sort,
 };
 use arrow::compute::SortOptions;
-use datafusion_common::config::ConfigOptions;
 use datafusion_common::Result;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{Operator, ScalarUDF};
+use datafusion_physical_expr::ScalarFunctionExpr;
 use datafusion_physical_expr::equivalence::{
     convert_to_orderings, convert_to_sort_exprs,
 };
-use datafusion_physical_expr::expressions::{col, BinaryExpr};
-use datafusion_physical_expr::ScalarFunctionExpr;
+use datafusion_physical_expr::expressions::{BinaryExpr, col};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use itertools::Itertools;
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
index a72a1558b2e41..2f67e211ce915 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
@@ -16,15 +16,15 @@
 // under the License.
 
 use crate::fuzz_cases::equivalence::utils::{
-    apply_projection, create_random_schema, generate_table_for_eq_properties,
-    is_table_same_after_sort, TestScalarUDF,
+    TestScalarUDF, apply_projection, create_random_schema,
+    generate_table_for_eq_properties, is_table_same_after_sort,
 };
 use arrow::compute::SortOptions;
-use datafusion_common::config::ConfigOptions;
 use datafusion_common::Result;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{Operator, ScalarUDF};
 use datafusion_physical_expr::equivalence::ProjectionMapping;
-use datafusion_physical_expr::expressions::{col, BinaryExpr};
+use datafusion_physical_expr::expressions::{BinaryExpr, col};
 use datafusion_physical_expr::{PhysicalExprRef, ScalarFunctionExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
index 382c4da943219..1490eb08a0291 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
@@ -18,13 +18,13 @@
 use std::sync::Arc;
 
 use crate::fuzz_cases::equivalence::utils::{
-    create_random_schema, generate_table_for_eq_properties, is_table_same_after_sort,
-    TestScalarUDF,
+    TestScalarUDF, create_random_schema, generate_table_for_eq_properties,
+    is_table_same_after_sort,
 };
 
 use datafusion_common::Result;
 use datafusion_expr::{Operator, ScalarUDF};
-use datafusion_physical_expr::expressions::{col, BinaryExpr};
+use datafusion_physical_expr::expressions::{BinaryExpr, col};
 use datafusion_physical_expr::{LexOrdering, ScalarFunctionExpr};
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
index be35ddca8f02d..580a226721083 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
@@ -20,21 +20,21 @@ use std::cmp::Ordering;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Float32Array, Float64Array, RecordBatch, UInt32Array};
-use arrow::compute::{lexsort_to_indices, take_record_batch, SortColumn, SortOptions};
+use arrow::compute::{SortColumn, SortOptions, lexsort_to_indices, take_record_batch};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::utils::{compare_rows, get_row_at_idx};
-use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
+use datafusion_common::{Result, exec_err, internal_datafusion_err, plan_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_physical_expr::equivalence::{
-    convert_to_orderings, EquivalenceClass, ProjectionMapping,
+    EquivalenceClass, ProjectionMapping, convert_to_orderings,
 };
 use datafusion_physical_expr::{ConstExpr, EquivalenceProperties};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
-use datafusion_physical_plan::expressions::{col, Column};
+use datafusion_physical_plan::expressions::{Column, col};
 
 use itertools::izip;
 use rand::prelude::*;
diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index e8ff1ccf06704..ce422494db101 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -38,8 +38,8 @@ use datafusion::physical_plan::joins::{
 };
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::{NullEquality, ScalarValue};
-use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr::PhysicalExprRef;
+use datafusion_physical_expr::expressions::Literal;
 
 use itertools::Itertools;
 use rand::Rng;
@@ -91,484 +91,564 @@ fn col_lt_col_filter(schema1: Arc<Schema>, schema2: Arc<Schema>) -> JoinFilter {
 
 #[tokio::test]
 async fn test_inner_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Inner,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Inner,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_inner_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Inner,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Inner,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Left,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Left,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Left,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Left,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Right,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Right,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Right,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Right,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_full_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Full,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Full,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_full_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::Full,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[NljHj, HjSmj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::Full,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[NljHj, HjSmj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_semi_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftSemi,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftSemi,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_semi_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftSemi,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftSemi,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_semi_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightSemi,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightSemi,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_semi_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightSemi,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightSemi,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_anti_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftAnti,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftAnti,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_anti_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftAnti,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftAnti,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_anti_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightAnti,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightAnti,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_anti_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightAnti,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightAnti,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_mark_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftMark,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftMark,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_mark_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::LeftMark,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::LeftMark,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 // todo: add JoinTestType::HjSmj after Right mark SortMergeJoin support
 #[tokio::test]
 async fn test_right_mark_join_1k() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightMark,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightMark,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_mark_join_1k_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_i32(1000),
-        make_staggered_batches_i32(1000),
-        JoinType::RightMark,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_i32(1000, left_extra),
+            make_staggered_batches_i32(1000, right_extra),
+            JoinType::RightMark,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_inner_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Inner,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Inner,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_inner_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Inner,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Inner,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Left,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Left,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Left,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Left,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Right,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Right,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Right,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Right,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_full_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Full,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Full,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_full_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::Full,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[NljHj, HjSmj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::Full,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[NljHj, HjSmj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_semi_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftSemi,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftSemi,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_semi_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftSemi,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftSemi,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_semi_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightSemi,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightSemi,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_semi_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightSemi,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightSemi,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_anti_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftAnti,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftAnti,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_anti_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftAnti,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftAnti,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_anti_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightAnti,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightAnti,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_anti_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightAnti,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightAnti,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_mark_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftMark,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftMark,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_left_mark_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::LeftMark,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::LeftMark,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 // todo: add JoinTestType::HjSmj after Right mark SortMergeJoin support
 #[tokio::test]
 async fn test_right_mark_join_1k_binary() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightMark,
-        None,
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightMark,
+            None,
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 #[tokio::test]
 async fn test_right_mark_join_1k_binary_filtered() {
-    JoinFuzzTestCase::new(
-        make_staggered_batches_binary(1000),
-        make_staggered_batches_binary(1000),
-        JoinType::RightMark,
-        Some(Box::new(col_lt_col_filter)),
-    )
-    .run_test(&[HjSmj, NljHj], false)
-    .await
+    for (left_extra, right_extra) in [(true, true), (false, true), (true, false)] {
+        JoinFuzzTestCase::new(
+            make_staggered_batches_binary(1000, left_extra),
+            make_staggered_batches_binary(1000, right_extra),
+            JoinType::RightMark,
+            Some(Box::new(col_lt_col_filter)),
+        )
+        .run_test(&[HjSmj, NljHj], false)
+        .await
+    }
 }
 
 type JoinFilterBuilder = Box<dyn Fn(Arc<Schema>, Arc<Schema>) -> JoinFilter>;
@@ -841,7 +921,9 @@ impl JoinFuzzTestCase {
                 std::fs::remove_dir_all(fuzz_debug).unwrap_or(());
                 std::fs::create_dir_all(fuzz_debug).unwrap();
                 let out_dir_name = &format!("{fuzz_debug}/batch_size_{batch_size}");
-                println!("Test result data mismatch found. HJ rows {hj_rows}, SMJ rows {smj_rows}, NLJ rows {nlj_rows}");
+                println!(
+                    "Test result data mismatch found. HJ rows {hj_rows}, SMJ rows {smj_rows}, NLJ rows {nlj_rows}"
+                );
                 println!("The debug is ON. Input data will be saved to {out_dir_name}");
 
                 Self::save_partitioned_batches_as_parquet(
@@ -892,10 +974,18 @@ impl JoinFuzzTestCase {
             }
 
             if join_tests.contains(&NljHj) {
-                let err_msg_rowcnt = format!("NestedLoopJoinExec and HashJoinExec produced different row counts, batch_size: {batch_size}");
+                let err_msg_rowcnt = format!(
+                    "NestedLoopJoinExec and HashJoinExec produced different row counts, batch_size: {batch_size}"
+                );
                 assert_eq!(nlj_rows, hj_rows, "{}", err_msg_rowcnt.as_str());
+                if nlj_rows == 0 && hj_rows == 0 {
+                    // both joins returned no rows, skip content comparison
+                    continue;
+                }
 
-                let err_msg_contents = format!("NestedLoopJoinExec and HashJoinExec produced different results, batch_size: {batch_size}");
+                let err_msg_contents = format!(
+                    "NestedLoopJoinExec and HashJoinExec produced different results, batch_size: {batch_size}"
+                );
                 // row level compare if any of joins returns the result
                 // the reason is different formatting when there is no rows
                 for (i, (nlj_line, hj_line)) in nlj_formatted_sorted
@@ -913,10 +1003,16 @@ impl JoinFuzzTestCase {
             }
 
             if join_tests.contains(&HjSmj) {
-                let err_msg_row_cnt = format!("HashJoinExec and SortMergeJoinExec produced different row counts, batch_size: {}", &batch_size);
+                let err_msg_row_cnt = format!(
+                    "HashJoinExec and SortMergeJoinExec produced different row counts, batch_size: {}",
+                    &batch_size
+                );
                 assert_eq!(hj_rows, smj_rows, "{}", err_msg_row_cnt.as_str());
 
-                let err_msg_contents = format!("SortMergeJoinExec and HashJoinExec produced different results, batch_size: {}", &batch_size);
+                let err_msg_contents = format!(
+                    "SortMergeJoinExec and HashJoinExec produced different results, batch_size: {}",
+                    &batch_size
+                );
                 // row level compare if any of joins returns the result
                 // the reason is different formatting when there is no rows
                 if smj_rows > 0 || hj_rows > 0 {
@@ -1031,7 +1127,7 @@ impl JoinFuzzTestCase {
 /// Return randomly sized record batches with:
 /// two sorted int32 columns 'a', 'b' ranged from 0..99 as join columns
 /// two random int32 columns 'x', 'y' as other columns
-fn make_staggered_batches_i32(len: usize) -> Vec<RecordBatch> {
+fn make_staggered_batches_i32(len: usize, with_extra_column: bool) -> Vec<RecordBatch> {
     let mut rng = rand::rng();
     let mut input12: Vec<(i32, i32)> = vec![(0, 0); len];
     let mut input3: Vec<i32> = vec![0; len];
@@ -1047,14 +1143,18 @@ fn make_staggered_batches_i32(len: usize) -> Vec<RecordBatch> {
     let input3 = Int32Array::from_iter_values(input3);
     let input4 = Int32Array::from_iter_values(input4);
 
-    // split into several record batches
-    let batch = RecordBatch::try_from_iter(vec![
+    let mut columns = vec![
         ("a", Arc::new(input1) as ArrayRef),
         ("b", Arc::new(input2) as ArrayRef),
         ("x", Arc::new(input3) as ArrayRef),
-        ("y", Arc::new(input4) as ArrayRef),
-    ])
-    .unwrap();
+    ];
+
+    if with_extra_column {
+        columns.push(("y", Arc::new(input4) as ArrayRef));
+    }
+
+    // split into several record batches
+    let batch = RecordBatch::try_from_iter(columns).unwrap();
 
     // use a random number generator to pick a random sized output
     stagger_batch_with_seed(batch, 42)
@@ -1070,7 +1170,10 @@ fn rand_bytes<R: Rng>(rng: &mut R, min: usize, max: usize) -> Vec<u8> {
 /// Return randomly sized record batches with:
 /// two sorted binary columns 'a', 'b' (lexicographically) as join columns
 /// two random binary columns 'x', 'y' as other columns
-fn make_staggered_batches_binary(len: usize) -> Vec<RecordBatch> {
+fn make_staggered_batches_binary(
+    len: usize,
+    with_extra_column: bool,
+) -> Vec<RecordBatch> {
     let mut rng = rand::rng();
 
     // produce (a,b) pairs then sort lexicographically so SMJ has naturally sorted keys
@@ -1088,13 +1191,17 @@ fn make_staggered_batches_binary(len: usize) -> Vec<RecordBatch> {
     let x = BinaryArray::from_iter_values(input3.iter());
     let y = BinaryArray::from_iter_values(input4.iter());
 
-    let batch = RecordBatch::try_from_iter(vec![
+    let mut columns = vec![
         ("a", Arc::new(a) as ArrayRef),
         ("b", Arc::new(b) as ArrayRef),
         ("x", Arc::new(x) as ArrayRef),
-        ("y", Arc::new(y) as ArrayRef),
-    ])
-    .unwrap();
+    ];
+
+    if with_extra_column {
+        columns.push(("y", Arc::new(y) as ArrayRef));
+    }
+
+    let batch = RecordBatch::try_from_iter(columns).unwrap();
 
     // preserve your existing randomized partitioning
     stagger_batch_with_seed(batch, 42)
diff --git a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
index 4c5ebf0402414..1c5741e7a21b3 100644
--- a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs
@@ -24,7 +24,7 @@ use arrow::util::pretty::pretty_format_batches;
 use datafusion::datasource::MemTable;
 use datafusion::prelude::SessionContext;
 use datafusion_common::assert_contains;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 use std::sync::Arc;
 use test_utils::stagger_batch;
 
diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
index b92dec64e3f19..59430a98cc4b4 100644
--- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
@@ -27,7 +27,7 @@ use arrow::{
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::physical_plan::{
     collect,
-    expressions::{col, PhysicalSortExpr},
+    expressions::{PhysicalSortExpr, col},
     sorts::sort_preserving_merge::SortPreservingMergeExec,
 };
 use datafusion::prelude::{SessionConfig, SessionContext};
diff --git a/datafusion/core/tests/fuzz_cases/mod.rs b/datafusion/core/tests/fuzz_cases/mod.rs
index 9e2fd170f7f0c..edb53df382c62 100644
--- a/datafusion/core/tests/fuzz_cases/mod.rs
+++ b/datafusion/core/tests/fuzz_cases/mod.rs
@@ -15,20 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#[expect(clippy::needless_pass_by_value)]
 mod aggregate_fuzz;
 mod distinct_count_string_fuzz;
+#[expect(clippy::needless_pass_by_value)]
 mod join_fuzz;
 mod merge_fuzz;
+#[expect(clippy::needless_pass_by_value)]
 mod sort_fuzz;
+#[expect(clippy::needless_pass_by_value)]
 mod sort_query_fuzz;
 mod topk_filter_pushdown;
 
 mod aggregation_fuzzer;
+#[expect(clippy::needless_pass_by_value)]
 mod equivalence;
 
 mod pruning;
 
 mod limit_fuzz;
+#[expect(clippy::needless_pass_by_value)]
 mod sort_preserving_repartition_fuzz;
 mod window_fuzz;
 
diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs
index f8bd4dbc1a768..8a84e4c5d1814 100644
--- a/datafusion/core/tests/fuzz_cases/pruning.rs
+++ b/datafusion/core/tests/fuzz_cases/pruning.rs
@@ -29,9 +29,9 @@ use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_physical_expr::PhysicalExpr;
-use datafusion_physical_plan::{collect, filter::FilterExec, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, collect, filter::FilterExec};
 use itertools::Itertools;
-use object_store::{memory::InMemory, path::Path, ObjectStore, PutPayload};
+use object_store::{ObjectStore, PutPayload, memory::InMemory, path::Path};
 use parquet::{
     arrow::ArrowWriter,
     file::properties::{EnabledStatistics, WriterProperties},
@@ -276,13 +276,12 @@ async fn execute_with_predicate(
     ctx: &SessionContext,
 ) -> Vec<String> {
     let parquet_source = if prune_stats {
-        ParquetSource::default().with_predicate(predicate.clone())
+        ParquetSource::new(schema.clone()).with_predicate(predicate.clone())
     } else {
-        ParquetSource::default()
+        ParquetSource::new(schema.clone())
     };
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("memory://").unwrap(),
-        schema.clone(),
         Arc::new(parquet_source),
     )
     .with_file_group(
diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
index 45dba5f7864b1..22b145f5095a7 100644
--- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
@@ -19,23 +19,23 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, DictionaryArray, PrimitiveArray, RecordBatch};
 use arrow::datatypes::{
-    ArrowPrimitiveType, BooleanType, DataType, Date32Type, Date64Type, Decimal128Type,
-    Decimal256Type, Decimal32Type, Decimal64Type, DurationMicrosecondType,
+    ArrowPrimitiveType, BooleanType, DataType, Date32Type, Date64Type, Decimal32Type,
+    Decimal64Type, Decimal128Type, Decimal256Type, DurationMicrosecondType,
     DurationMillisecondType, DurationNanosecondType, DurationSecondType, Field,
-    Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
+    Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
     IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
     Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
     Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type,
 };
 use arrow_schema::{
-    DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
-    DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
-    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
+    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION,
+    DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
-use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
-use rand::{rng, rngs::StdRng, Rng, SeedableRng};
+use datafusion_common::{Result, arrow_datafusion_err};
+use rand::{Rng, SeedableRng, rng, rngs::StdRng};
 use test_utils::array_gen::{
     BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator,
     PrimitiveArrayGenerator, StringArrayGenerator,
diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
index 28d28a6622a76..0d8a066d432dd 100644
--- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
@@ -20,7 +20,7 @@
 use std::sync::Arc;
 
 use arrow::{
-    array::{as_string_array, ArrayRef, Int32Array, StringArray},
+    array::{ArrayRef, Int32Array, StringArray, as_string_array},
     compute::SortOptions,
     record_batch::RecordBatch,
 };
@@ -28,7 +28,7 @@ use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
 use datafusion::physical_plan::expressions::PhysicalSortExpr;
 use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::{collect, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, collect};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::cast::as_int32_array;
 use datafusion_execution::memory_pool::GreedyMemoryPool;
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index 99b20790fc46b..c424a314270c6 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -20,34 +20,33 @@ mod sp_repartition_fuzz_tests {
     use std::sync::Arc;
 
     use arrow::array::{ArrayRef, Int64Array, RecordBatch, UInt64Array};
-    use arrow::compute::{concat_batches, lexsort, SortColumn, SortOptions};
+    use arrow::compute::{SortColumn, SortOptions, concat_batches, lexsort};
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 
     use datafusion::datasource::memory::MemorySourceConfig;
     use datafusion::datasource::source::DataSourceExec;
     use datafusion::physical_plan::{
-        collect,
+        ExecutionPlan, Partitioning, collect,
         metrics::{BaselineMetrics, ExecutionPlanMetricsSet},
         repartition::RepartitionExec,
         sorts::sort_preserving_merge::SortPreservingMergeExec,
         sorts::streaming_merge::StreamingMergeBuilder,
         stream::RecordBatchStreamAdapter,
-        ExecutionPlan, Partitioning,
     };
     use datafusion::prelude::SessionContext;
     use datafusion_common::Result;
     use datafusion_execution::{config::SessionConfig, memory_pool::MemoryConsumer};
+    use datafusion_physical_expr::ConstExpr;
     use datafusion_physical_expr::equivalence::{
         EquivalenceClass, EquivalenceProperties,
     };
-    use datafusion_physical_expr::expressions::{col, Column};
-    use datafusion_physical_expr::ConstExpr;
+    use datafusion_physical_expr::expressions::{Column, col};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
     use test_utils::add_empty_batches;
 
     use itertools::izip;
-    use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
+    use rand::{Rng, SeedableRng, rngs::StdRng, seq::SliceRandom};
 
     // Generate a schema which consists of 6 columns (a, b, c, d, e, f)
     fn create_test_schema() -> Result<SchemaRef> {
diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
index 2ce7db3ea4bc7..376306f3e0659 100644
--- a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
@@ -24,24 +24,22 @@ use arrow::array::RecordBatch;
 use arrow_schema::SchemaRef;
 use datafusion::datasource::MemTable;
 use datafusion::prelude::{SessionConfig, SessionContext};
-use datafusion_common::{instant::Instant, Result};
+use datafusion_common::{Result, human_readable_size, instant::Instant};
 use datafusion_execution::disk_manager::DiskManagerBuilder;
-use datafusion_execution::memory_pool::{
-    human_readable_size, MemoryPool, UnboundedMemoryPool,
-};
+use datafusion_execution::memory_pool::{MemoryPool, UnboundedMemoryPool};
 use datafusion_expr::display_schema;
 use datafusion_physical_plan::spill::get_record_batch_memory_size;
 use std::time::Duration;
 
 use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBuilder};
-use rand::prelude::IndexedRandom;
 use rand::Rng;
-use rand::{rngs::StdRng, SeedableRng};
+use rand::prelude::IndexedRandom;
+use rand::{SeedableRng, rngs::StdRng};
 
 use crate::fuzz_cases::aggregation_fuzzer::check_equality_of_batches;
 
 use super::aggregation_fuzzer::ColumnDescr;
-use super::record_batch_generator::{get_supported_types_columns, RecordBatchGenerator};
+use super::record_batch_generator::{RecordBatchGenerator, get_supported_types_columns};
 
 /// Entry point for executing the sort query fuzzer.
 ///
@@ -177,16 +175,16 @@ impl SortQueryFuzzer {
         n_round: usize,
         n_query: usize,
     ) -> bool {
-        if let Some(time_limit) = self.time_limit {
-            if Instant::now().duration_since(start_time) > time_limit {
-                println!(
-                    "[SortQueryFuzzer] Time limit reached: {} queries ({} random configs each) in {} rounds",
-                    n_round * self.queries_per_round + n_query,
-                    self.config_variations_per_query,
-                    n_round
-                );
-                return true;
-            }
+        if let Some(time_limit) = self.time_limit
+            && Instant::now().duration_since(start_time) > time_limit
+        {
+            println!(
+                "[SortQueryFuzzer] Time limit reached: {} queries ({} random configs each) in {} rounds",
+                n_round * self.queries_per_round + n_query,
+                self.config_variations_per_query,
+                n_round
+            );
+            return true;
         }
         false
     }
diff --git a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
index 6c1bd316cdd39..16481516e0bed 100644
--- a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
+++ b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
@@ -27,18 +27,18 @@ use arrow::{array::StringArray, compute::SortOptions, record_batch::RecordBatch}
 use arrow_schema::{DataType, Field, Schema};
 use datafusion::common::Result;
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_plan::expressions::PhysicalSortExpr;
 use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionConfig;
-use datafusion_execution::memory_pool::units::{KB, MB};
+use datafusion_common::units::{KB, MB};
 use datafusion_execution::memory_pool::{
     FairSpillPool, MemoryConsumer, MemoryReservation,
 };
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
 use datafusion_physical_expr::aggregate::AggregateExprBuilder;
-use datafusion_physical_expr::expressions::{col, Column};
+use datafusion_physical_expr::expressions::{Column, col};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
@@ -80,9 +80,9 @@ async fn test_sort_with_limited_memory() -> Result<()> {
 
     let total_spill_files_size = spill_count * record_batch_size;
     assert!(
-    total_spill_files_size > pool_size,
-    "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}",
-  );
+        total_spill_files_size > pool_size,
+        "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}",
+    );
 
     Ok(())
 }
@@ -126,8 +126,8 @@ async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch() ->
 }
 
 #[tokio::test]
-async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation(
-) -> Result<()> {
+async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
@@ -164,8 +164,8 @@ async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_c
 }
 
 #[tokio::test]
-async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory(
-) -> Result<()> {
+async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
@@ -356,16 +356,16 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory() -> Result<()
 
     let total_spill_files_size = spill_count * record_batch_size;
     assert!(
-    total_spill_files_size > pool_size,
-    "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}",
-  );
+        total_spill_files_size > pool_size,
+        "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}",
+    );
 
     Ok(())
 }
 
 #[tokio::test]
-async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch(
-) -> Result<()> {
+async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
@@ -398,8 +398,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_
 }
 
 #[tokio::test]
-async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation(
-) -> Result<()> {
+async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
@@ -432,8 +432,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_
 }
 
 #[tokio::test]
-async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory(
-) -> Result<()> {
+async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
@@ -466,8 +466,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_
 }
 
 #[tokio::test]
-async fn test_aggregate_with_high_cardinality_with_limited_memory_and_large_record_batch(
-) -> Result<()> {
+async fn test_aggregate_with_high_cardinality_with_limited_memory_and_large_record_batch()
+-> Result<()> {
     let record_batch_size = 8192;
     let pool_size = 2 * MB as usize;
     let task_ctx = {
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 65a41d39d3c54..2ecfcd84aba98 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -18,19 +18,19 @@
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int32Array, StringArray};
-use arrow::compute::{concat_batches, SortOptions};
+use arrow::compute::{SortOptions, concat_batches};
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::functions_window::row_number::row_number_udwf;
+use datafusion::physical_plan::InputOrderMode::{Linear, PartiallySorted, Sorted};
 use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::physical_plan::windows::{
-    create_window_expr, schema_add_window_field, BoundedWindowAggExec, WindowAggExec,
+    BoundedWindowAggExec, WindowAggExec, create_window_expr, schema_add_window_field,
 };
-use datafusion::physical_plan::InputOrderMode::{Linear, PartiallySorted, Sorted};
-use datafusion::physical_plan::{collect, InputOrderMode};
+use datafusion::physical_plan::{InputOrderMode, collect};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::HashMap;
 use datafusion_common::{Result, ScalarValue};
@@ -445,14 +445,14 @@ fn get_random_function(
     let fn_name = window_fn_map.keys().collect::<Vec<_>>()[rand_fn_idx];
     let (window_fn, args) = window_fn_map.values().collect::<Vec<_>>()[rand_fn_idx];
     let mut args = args.clone();
-    if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn {
-        if !args.is_empty() {
-            // Do type coercion first argument
-            let a = args[0].clone();
-            let dt = a.return_field(schema.as_ref()).unwrap();
-            let coerced = fields_with_aggregate_udf(&[dt], udf).unwrap();
-            args[0] = cast(a, schema, coerced[0].data_type().clone()).unwrap();
-        }
+    if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn
+        && !args.is_empty()
+    {
+        // Do type coercion first argument
+        let a = args[0].clone();
+        let dt = a.return_field(schema.as_ref()).unwrap();
+        let coerced = fields_with_aggregate_udf(&[dt], udf).unwrap();
+        args[0] = cast(a, schema, coerced[0].data_type().clone()).unwrap();
     }
 
     (window_fn.clone(), args, (*fn_name).to_string())
@@ -569,10 +569,11 @@ fn convert_bound_to_current_row_if_applicable(
 ) {
     match bound {
         WindowFrameBound::Preceding(value) | WindowFrameBound::Following(value) => {
-            if let Ok(zero) = ScalarValue::new_zero(&value.data_type()) {
-                if value == &zero && rng.random_range(0..2) == 0 {
-                    *bound = WindowFrameBound::CurrentRow;
-                }
+            if let Ok(zero) = ScalarValue::new_zero(&value.data_type())
+                && value == &zero
+                && rng.random_range(0..2) == 0
+            {
+                *bound = WindowFrameBound::CurrentRow;
             }
         }
         _ => {}
@@ -644,10 +645,8 @@ async fn run_window_test(
     ) as _;
     // Table is ordered according to ORDER BY a, b, c In linear test we use PARTITION BY b, ORDER BY a
     // For WindowAggExec  to produce correct result it need table to be ordered by b,a. Hence add a sort.
-    if is_linear {
-        if let Some(ordering) = LexOrdering::new(sort_keys) {
-            exec1 = Arc::new(SortExec::new(ordering, exec1)) as _;
-        }
+    if is_linear && let Some(ordering) = LexOrdering::new(sort_keys) {
+        exec1 = Arc::new(SortExec::new(ordering, exec1)) as _;
     }
 
     let extended_schema = schema_add_window_field(&args, &schema, &window_fn, &fn_name)?;
@@ -699,7 +698,9 @@ async fn run_window_test(
 
     // BoundedWindowAggExec should produce more chunk than the usual WindowAggExec.
     // Otherwise it means that we cannot generate result in running mode.
-    let err_msg = format!("Inconsistent result for window_frame: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, random_seed: {random_seed:?}, search_mode: {search_mode:?}, partition_by_columns:{partition_by_columns:?}, orderby_columns: {orderby_columns:?}");
+    let err_msg = format!(
+        "Inconsistent result for window_frame: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, random_seed: {random_seed:?}, search_mode: {search_mode:?}, partition_by_columns:{partition_by_columns:?}, orderby_columns: {orderby_columns:?}"
+    );
     // Below check makes sure that, streaming execution generates more chunks than the bulk execution.
     // Since algorithms and operators works on sliding windows in the streaming execution.
     // However, in the current test setup for some random generated window frame clauses: It is not guaranteed
@@ -731,8 +732,12 @@ async fn run_window_test(
         .enumerate()
     {
         if !usual_line.eq(running_line) {
-            println!("Inconsistent result for window_frame at line:{i:?}: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, pb_cols:{partition_by_columns:?}, ob_cols:{orderby_columns:?}, search_mode:{search_mode:?}");
-            println!("--------usual_formatted_sorted----------------running_formatted_sorted--------");
+            println!(
+                "Inconsistent result for window_frame at line:{i:?}: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, pb_cols:{partition_by_columns:?}, ob_cols:{orderby_columns:?}, search_mode:{search_mode:?}"
+            );
+            println!(
+                "--------usual_formatted_sorted----------------running_formatted_sorted--------"
+            );
             for (line1, line2) in
                 usual_formatted_sorted.iter().zip(running_formatted_sorted)
             {
diff --git a/datafusion/core/tests/macro_hygiene/mod.rs b/datafusion/core/tests/macro_hygiene/mod.rs
index c9f33f6fdf0f4..48f0103113cf6 100644
--- a/datafusion/core/tests/macro_hygiene/mod.rs
+++ b/datafusion/core/tests/macro_hygiene/mod.rs
@@ -85,6 +85,7 @@ mod config_field {
         impl std::error::Error for E {}
 
         #[allow(dead_code)]
+        #[derive(Default)]
         struct S;
 
         impl std::str::FromStr for S {
diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
index 7b157b707a6de..2c9fae20c8606 100644
--- a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
+++ b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
@@ -16,16 +16,14 @@
 // under the License.
 
 use datafusion_common_runtime::SpawnedTask;
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, System};
-use tokio::time::{interval, Duration};
+use tokio::time::{Duration, interval};
 
 use datafusion::prelude::{SessionConfig, SessionContext};
-use datafusion_execution::{
-    memory_pool::{human_readable_size, FairSpillPool},
-    runtime_env::RuntimeEnvBuilder,
-};
+use datafusion_common::human_readable_size;
+use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBuilder};
 
 /// Measures the maximum RSS (in bytes) during the execution of an async task. RSS
 /// will be sampled every 7ms.
@@ -40,7 +38,7 @@ use datafusion_execution::{
 async fn measure_max_rss<F, Fut, T>(f: F) -> (T, usize)
 where
     F: FnOnce() -> Fut,
-    Fut: std::future::Future<Output = T>,
+    Fut: Future<Output = T>,
 {
     // Initialize system information
     let mut system = System::new_all();
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 5d8a1d24181cb..c28d23ba0602b 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -39,19 +39,19 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::streaming::PartitionStream;
 use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
 use datafusion::prelude::{SessionConfig, SessionContext};
-use datafusion_catalog::streaming::StreamingTable;
 use datafusion_catalog::Session;
-use datafusion_common::{assert_contains, Result};
+use datafusion_catalog::streaming::StreamingTable;
+use datafusion_common::{Result, assert_contains};
+use datafusion_execution::TaskContext;
 use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
 use datafusion_execution::memory_pool::{
     FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool,
 };
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_execution::TaskContext;
 use datafusion_expr::{Expr, TableType};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
-use datafusion_physical_optimizer::join_selection::JoinSelection;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::join_selection::JoinSelection;
 use datafusion_physical_plan::collect as collect_batches;
 use datafusion_physical_plan::common::collect;
 use datafusion_physical_plan::spill::get_record_batch_memory_size;
@@ -604,8 +604,8 @@ async fn test_disk_spill_limit_reached() -> Result<()> {
 
     let err = df.collect().await.unwrap_err();
     assert_contains!(
-    err.to_string(),
-    "The used disk space during the spilling process has exceeded the allowable limit"
+        err.to_string(),
+        "The used disk space during the spilling process has exceeded the allowable limit"
     );
 
     Ok(())
@@ -977,11 +977,13 @@ impl Scenario {
                     descending: false,
                     nulls_first: false,
                 };
-                let sort_information = vec![[
-                    PhysicalSortExpr::new(col("a", &schema).unwrap(), options),
-                    PhysicalSortExpr::new(col("b", &schema).unwrap(), options),
-                ]
-                .into()];
+                let sort_information = vec![
+                    [
+                        PhysicalSortExpr::new(col("a", &schema).unwrap(), options),
+                        PhysicalSortExpr::new(col("b", &schema).unwrap(), options),
+                    ]
+                    .into(),
+                ];
 
                 let table = SortedTableProvider::new(batches, sort_information);
                 Arc::new(table)
@@ -1057,7 +1059,7 @@ fn make_dict_batches() -> Vec<RecordBatch> {
     let batch_size = 50;
 
     let mut i = 0;
-    let gen = std::iter::from_fn(move || {
+    let batch_gen = std::iter::from_fn(move || {
         // create values like
         // 0000000001
         // 0000000002
@@ -1080,7 +1082,7 @@ fn make_dict_batches() -> Vec<RecordBatch> {
 
     let num_batches = 5;
 
-    let batches: Vec<_> = gen.take(num_batches).collect();
+    let batches: Vec<_> = batch_gen.take(num_batches).collect();
 
     batches.iter().enumerate().for_each(|(i, batch)| {
         println!("Dict batch[{i}] size is: {}", batch.get_array_memory_size());
diff --git a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
index a7af2f01d1cc9..b21bffebaf95e 100644
--- a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
+++ b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
@@ -25,7 +25,7 @@ use datafusion::{
 use datafusion_catalog::MemTable;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-use datafusion_physical_plan::{repartition::RepartitionExec, ExecutionPlanProperties};
+use datafusion_physical_plan::{ExecutionPlanProperties, repartition::RepartitionExec};
 use futures::TryStreamExt;
 use itertools::Itertools;
 
@@ -45,11 +45,14 @@ async fn test_repartition_memory_limit() {
         .with_batch_size(32)
         .with_target_partitions(2);
     let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime));
-    let batches = vec![RecordBatch::try_from_iter(vec![(
-        "c1",
-        Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000))) as ArrayRef,
-    )])
-    .unwrap()];
+    let batches = vec![
+        RecordBatch::try_from_iter(vec![(
+            "c1",
+            Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000)))
+                as ArrayRef,
+        )])
+        .unwrap(),
+    ];
     let table = Arc::new(MemTable::try_new(batches[0].schema(), vec![batches]).unwrap());
     ctx.register_table("t", table).unwrap();
     let plan = ctx
diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs
index 9b2a5596827d0..6466e9ad96d17 100644
--- a/datafusion/core/tests/optimizer/mod.rs
+++ b/datafusion/core/tests/optimizer/mod.rs
@@ -27,17 +27,16 @@ use arrow::datatypes::{
     DataType, Field, Fields, Schema, SchemaBuilder, SchemaRef, TimeUnit,
 };
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::tree_node::{TransformedResult, TreeNode};
-use datafusion_common::{plan_err, DFSchema, Result, ScalarValue, TableReference};
+use datafusion_common::tree_node::TransformedResult;
+use datafusion_common::{DFSchema, Result, ScalarValue, TableReference, plan_err};
 use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
 use datafusion_expr::{
-    col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator,
-    ScalarUDF, TableSource, WindowUDF,
+    AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, ScalarUDF,
+    TableSource, WindowUDF, col, lit,
 };
 use datafusion_functions::core::expr_ext::FieldAccessor;
 use datafusion_optimizer::analyzer::Analyzer;
 use datafusion_optimizer::optimizer::Optimizer;
-use datafusion_optimizer::simplify_expressions::GuaranteeRewriter;
 use datafusion_optimizer::{OptimizerConfig, OptimizerContext};
 use datafusion_sql::planner::{ContextProvider, SqlToRel};
 use datafusion_sql::sqlparser::ast::Statement;
@@ -45,6 +44,7 @@ use datafusion_sql::sqlparser::dialect::GenericDialect;
 use datafusion_sql::sqlparser::parser::Parser;
 
 use chrono::DateTime;
+use datafusion_expr::expr_rewriter::rewrite_with_guarantees;
 use datafusion_functions::datetime;
 
 #[cfg(test)]
@@ -304,8 +304,6 @@ fn test_inequalities_non_null_bounded() {
         ),
     ];
 
-    let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
     // (original_expr, expected_simplification)
     let simplified_cases = &[
         (col("x").lt(lit(0)), false),
@@ -337,7 +335,7 @@ fn test_inequalities_non_null_bounded() {
         ),
     ];
 
-    validate_simplified_cases(&mut rewriter, simplified_cases);
+    validate_simplified_cases(&guarantees, simplified_cases);
 
     let unchanged_cases = &[
         col("x").gt(lit(2)),
@@ -348,16 +346,20 @@ fn test_inequalities_non_null_bounded() {
         col("x").not_between(lit(3), lit(10)),
     ];
 
-    validate_unchanged_cases(&mut rewriter, unchanged_cases);
+    validate_unchanged_cases(&guarantees, unchanged_cases);
 }
 
-fn validate_simplified_cases<T>(rewriter: &mut GuaranteeRewriter, cases: &[(Expr, T)])
-where
+fn validate_simplified_cases<T>(
+    guarantees: &[(Expr, NullableInterval)],
+    cases: &[(Expr, T)],
+) where
     ScalarValue: From<T>,
     T: Clone,
 {
     for (expr, expected_value) in cases {
-        let output = expr.clone().rewrite(rewriter).data().unwrap();
+        let output = rewrite_with_guarantees(expr.clone(), guarantees)
+            .data()
+            .unwrap();
         let expected = lit(ScalarValue::from(expected_value.clone()));
         assert_eq!(
             output, expected,
@@ -365,9 +367,11 @@ where
         );
     }
 }
-fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) {
+fn validate_unchanged_cases(guarantees: &[(Expr, NullableInterval)], cases: &[Expr]) {
     for expr in cases {
-        let output = expr.clone().rewrite(rewriter).data().unwrap();
+        let output = rewrite_with_guarantees(expr.clone(), guarantees)
+            .data()
+            .unwrap();
         assert_eq!(
             &output, expr,
             "{expr} was simplified to {output}, but expected it to be unchanged"
diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs
index 3a1f06656236c..31ec6efd19510 100644
--- a/datafusion/core/tests/parquet/custom_reader.rs
+++ b/datafusion/core/tests/parquet/custom_reader.rs
@@ -20,7 +20,7 @@ use std::ops::Range;
 use std::sync::Arc;
 use std::time::SystemTime;
 
-use arrow::array::{ArrayRef, Int64Array, Int8Array, StringArray};
+use arrow::array::{ArrayRef, Int8Array, Int64Array, StringArray};
 use arrow::datatypes::{Field, Schema, SchemaBuilder};
 use arrow::record_batch::RecordBatch;
 use datafusion::datasource::listing::PartitionedFile;
@@ -31,8 +31,8 @@ use datafusion::datasource::physical_plan::{
 use datafusion::physical_plan::collect;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::prelude::SessionContext;
-use datafusion_common::test_util::batches_to_sort_string;
 use datafusion_common::Result;
+use datafusion_common::test_util::batches_to_sort_string;
 
 use bytes::Bytes;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
@@ -44,9 +44,9 @@ use insta::assert_snapshot;
 use object_store::memory::InMemory;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
+use parquet::arrow::ArrowWriter;
 use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::async_reader::AsyncFileReader;
-use parquet::arrow::ArrowWriter;
 use parquet::errors::ParquetError;
 use parquet::file::metadata::ParquetMetaData;
 
@@ -80,7 +80,7 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() {
         .collect();
 
     let source = Arc::new(
-        ParquetSource::default()
+        ParquetSource::new(file_schema.clone())
             // prepare the scan
             .with_parquet_file_reader_factory(Arc::new(
                 InMemoryParquetFileReaderFactory(Arc::clone(&in_memory_object_store)),
@@ -89,7 +89,6 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() {
     let base_config = FileScanConfigBuilder::new(
         // just any url that doesn't point to in memory object store
         ObjectStoreUrl::local_filesystem(),
-        file_schema,
         source,
     )
     .with_file_group(file_group)
diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs
index 09b93f06ce85d..8b3170e367457 100644
--- a/datafusion/core/tests/parquet/encryption.rs
+++ b/datafusion/core/tests/parquet/encryption.rs
@@ -25,11 +25,11 @@ use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::datasource::listing::ListingOptions;
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use datafusion_common::config::{EncryptionFactoryOptions, TableParquetOptions};
-use datafusion_common::{assert_batches_sorted_eq, exec_datafusion_err, DataFusionError};
+use datafusion_common::{DataFusionError, assert_batches_sorted_eq, exec_datafusion_err};
 use datafusion_datasource_parquet::ParquetFormat;
 use datafusion_execution::parquet_encryption::EncryptionFactory;
-use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::encryption::decrypt::FileDecryptionProperties;
 use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::file::column_crypto_metadata::ColumnCryptoMetaData;
@@ -54,6 +54,7 @@ async fn read_parquet_test_data<'a, T: Into<String>>(
         .unwrap()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 pub fn write_batches(
     path: PathBuf,
     props: WriterProperties,
diff --git a/datafusion/core/tests/parquet/expr_adapter.rs b/datafusion/core/tests/parquet/expr_adapter.rs
new file mode 100644
index 0000000000000..515422ed750ef
--- /dev/null
+++ b/datafusion/core/tests/parquet/expr_adapter.rs
@@ -0,0 +1,466 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{RecordBatch, record_batch};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use bytes::{BufMut, BytesMut};
+use datafusion::assert_batches_eq;
+use datafusion::common::Result;
+use datafusion::datasource::listing::{
+    ListingTable, ListingTableConfig, ListingTableConfigExt,
+};
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_common::DataFusionError;
+use datafusion_common::ScalarValue;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_datasource::ListingTableUrl;
+use datafusion_execution::object_store::ObjectStoreUrl;
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::{self, Column};
+use datafusion_physical_expr_adapter::{
+    DefaultPhysicalExprAdapter, DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter,
+    PhysicalExprAdapterFactory,
+};
+use object_store::{ObjectStore, memory::InMemory, path::Path};
+use parquet::arrow::ArrowWriter;
+
+async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
+    let mut out = BytesMut::new().writer();
+    {
+        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.finish().unwrap();
+    }
+    let data = out.into_inner().freeze();
+    store.put(&Path::from(path), data.into()).await.unwrap();
+}
+
+// Implement a custom PhysicalExprAdapterFactory that fills in missing columns with
+// the default value for the field type:
+// - Int64 columns are filled with `1`
+// - Utf8 columns are filled with `'b'`
+#[derive(Debug)]
+struct CustomPhysicalExprAdapterFactory;
+
+impl PhysicalExprAdapterFactory for CustomPhysicalExprAdapterFactory {
+    fn create(
+        &self,
+        logical_file_schema: SchemaRef,
+        physical_file_schema: SchemaRef,
+    ) -> Arc<dyn PhysicalExprAdapter> {
+        Arc::new(CustomPhysicalExprAdapter {
+            logical_file_schema: Arc::clone(&logical_file_schema),
+            physical_file_schema: Arc::clone(&physical_file_schema),
+            inner: Arc::new(DefaultPhysicalExprAdapter::new(
+                logical_file_schema,
+                physical_file_schema,
+            )),
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
+struct CustomPhysicalExprAdapter {
+    logical_file_schema: SchemaRef,
+    physical_file_schema: SchemaRef,
+    inner: Arc<dyn PhysicalExprAdapter>,
+}
+
+impl PhysicalExprAdapter for CustomPhysicalExprAdapter {
+    fn rewrite(&self, mut expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
+        expr = expr
+            .transform(|expr| {
+                if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                    let field_name = column.name();
+                    if self
+                        .physical_file_schema
+                        .field_with_name(field_name)
+                        .ok()
+                        .is_none()
+                    {
+                        let field = self
+                            .logical_file_schema
+                            .field_with_name(field_name)
+                            .map_err(|_| {
+                                DataFusionError::Plan(format!(
+                                    "Field '{field_name}' not found in logical file schema",
+                                ))
+                            })?;
+                        // If the field does not exist, create a default value expression
+                        // Note that we use slightly different logic here to create a default value so that we can see different behavior in tests
+                        let default_value = match field.data_type() {
+                            DataType::Int64 => ScalarValue::Int64(Some(1)),
+                            DataType::Utf8 => ScalarValue::Utf8(Some("b".to_string())),
+                            _ => unimplemented!(
+                                "Unsupported data type: {}",
+                                field.data_type()
+                            ),
+                        };
+                        return Ok(Transformed::yes(Arc::new(
+                            expressions::Literal::new(default_value),
+                        )));
+                    }
+                }
+
+                Ok(Transformed::no(expr))
+            })
+            .data()?;
+        self.inner.rewrite(expr)
+    }
+}
+
+#[tokio::test]
+async fn test_custom_schema_adapter_and_custom_expression_adapter() {
+    let batch =
+        record_batch!(("extra", Int64, [1, 2, 3]), ("c1", Int32, [1, 2, 3])).unwrap();
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    let path = "test.parquet";
+    write_parquet(batch, store.clone(), path).await;
+
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int64, false),
+        Field::new("c2", DataType::Utf8, true),
+    ]));
+
+    let mut cfg = SessionConfig::new()
+        // Disable statistics collection for this test otherwise early pruning makes it hard to demonstrate data adaptation
+        .with_collect_statistics(false)
+        .with_parquet_pruning(false)
+        .with_parquet_page_index_pruning(false);
+    cfg.options_mut().execution.parquet.pushdown_filters = true;
+    let ctx = SessionContext::new_with_config(cfg);
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+    assert!(
+        !ctx.state()
+            .config_mut()
+            .options_mut()
+            .execution
+            .collect_statistics
+    );
+    assert!(!ctx.state().config().collect_statistics());
+
+    // Test with DefaultPhysicalExprAdapterFactory - missing columns are filled with NULL
+    let listing_table_config =
+        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
+            .infer_options(&ctx.state())
+            .await
+            .unwrap()
+            .with_schema(table_schema.clone())
+            .with_expr_adapter_factory(Arc::new(DefaultPhysicalExprAdapterFactory));
+
+    let table = ListingTable::try_new(listing_table_config).unwrap();
+    ctx.register_table("t", Arc::new(table)).unwrap();
+
+    let batches = ctx
+        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 IS NULL")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let expected = [
+        "+----+----+",
+        "| c2 | c1 |",
+        "+----+----+",
+        "|    | 2  |",
+        "+----+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    // Test with a custom physical expr adapter
+    // PhysicalExprAdapterFactory now handles both predicates AND projections
+    // CustomPhysicalExprAdapterFactory fills missing columns with 'b' for Utf8
+    let listing_table_config =
+        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
+            .infer_options(&ctx.state())
+            .await
+            .unwrap()
+            .with_schema(table_schema.clone())
+            .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory));
+    let table = ListingTable::try_new(listing_table_config).unwrap();
+    ctx.deregister_table("t").unwrap();
+    ctx.register_table("t", Arc::new(table)).unwrap();
+    let batches = ctx
+        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'b'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+    // With CustomPhysicalExprAdapterFactory, missing column c2 is filled with 'b'
+    // in both the predicate (c2 = 'b' becomes 'b' = 'b' -> true) and the projection
+    let expected = [
+        "+----+----+",
+        "| c2 | c1 |",
+        "+----+----+",
+        "| b  | 2  |",
+        "+----+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+}
+
+/// Test demonstrating how to implement a custom PhysicalExprAdapterFactory
+/// that fills missing columns with non-null default values.
+///
+/// PhysicalExprAdapterFactory rewrites expressions to use literals for
+/// missing columns, handling schema evolution efficiently at planning time.
+#[tokio::test]
+async fn test_physical_expr_adapter_with_non_null_defaults() {
+    // File only has c1 column
+    let batch = record_batch!(("c1", Int32, [10, 20, 30])).unwrap();
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+    write_parquet(batch, store.clone(), "defaults_test.parquet").await;
+
+    // Table schema has additional columns c2 (Utf8) and c3 (Int64) that don't exist in file
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int64, false), // type differs from file (Int32 vs Int64)
+        Field::new("c2", DataType::Utf8, true),   // missing from file
+        Field::new("c3", DataType::Int64, true),  // missing from file
+    ]));
+
+    let mut cfg = SessionConfig::new()
+        .with_collect_statistics(false)
+        .with_parquet_pruning(false);
+    cfg.options_mut().execution.parquet.pushdown_filters = true;
+    let ctx = SessionContext::new_with_config(cfg);
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+
+    // CustomPhysicalExprAdapterFactory fills:
+    // - missing Utf8 columns with 'b'
+    // - missing Int64 columns with 1
+    let listing_table_config =
+        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
+            .infer_options(&ctx.state())
+            .await
+            .unwrap()
+            .with_schema(table_schema.clone())
+            .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory));
+
+    let table = ListingTable::try_new(listing_table_config).unwrap();
+    ctx.register_table("t", Arc::new(table)).unwrap();
+
+    // Query all columns - missing columns should have default values
+    let batches = ctx
+        .sql("SELECT c1, c2, c3 FROM t ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    // c1 is cast from Int32 to Int64, c2 defaults to 'b', c3 defaults to 1
+    let expected = [
+        "+----+----+----+",
+        "| c1 | c2 | c3 |",
+        "+----+----+----+",
+        "| 10 | b  | 1  |",
+        "| 20 | b  | 1  |",
+        "| 30 | b  | 1  |",
+        "+----+----+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    // Verify predicates work with default values
+    // c3 = 1 should match all rows since default is 1
+    let batches = ctx
+        .sql("SELECT c1 FROM t WHERE c3 = 1 ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    #[rustfmt::skip]
+    let expected = [
+        "+----+",
+        "| c1 |",
+        "+----+",
+        "| 10 |",
+        "| 20 |",
+        "| 30 |",
+        "+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    // c3 = 999 should match no rows
+    let batches = ctx
+        .sql("SELECT c1 FROM t WHERE c3 = 999")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    #[rustfmt::skip]
+    let expected = [
+        "++",
+        "++",
+    ];
+    assert_batches_eq!(expected, &batches);
+}
+
+/// Test demonstrating that a single PhysicalExprAdapterFactory instance can be
+/// reused across multiple ListingTable instances.
+///
+/// This addresses the concern: "This is important for ListingTable. A test for
+/// ListingTable would add assurance that the functionality is retained [i.e. we
+/// can re-use a PhysicalExprAdapterFactory]"
+#[tokio::test]
+async fn test_physical_expr_adapter_factory_reuse_across_tables() {
+    // Create two different parquet files with different schemas
+    // File 1: has column c1 only
+    let batch1 = record_batch!(("c1", Int32, [1, 2, 3])).unwrap();
+    // File 2: has column c1 only but different data
+    let batch2 = record_batch!(("c1", Int32, [10, 20, 30])).unwrap();
+
+    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+
+    // Write files to different paths
+    write_parquet(batch1, store.clone(), "table1/data.parquet").await;
+    write_parquet(batch2, store.clone(), "table2/data.parquet").await;
+
+    // Table schema has additional columns that don't exist in files
+    let table_schema = Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int64, false),
+        Field::new("c2", DataType::Utf8, true), // missing from files
+    ]));
+
+    let mut cfg = SessionConfig::new()
+        .with_collect_statistics(false)
+        .with_parquet_pruning(false);
+    cfg.options_mut().execution.parquet.pushdown_filters = true;
+    let ctx = SessionContext::new_with_config(cfg);
+    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+
+    // Create ONE factory instance wrapped in Arc - this will be REUSED
+    let factory: Arc<dyn PhysicalExprAdapterFactory> =
+        Arc::new(CustomPhysicalExprAdapterFactory);
+
+    // Create ListingTable 1 using the shared factory
+    let listing_table_config1 =
+        ListingTableConfig::new(ListingTableUrl::parse("memory:///table1/").unwrap())
+            .infer_options(&ctx.state())
+            .await
+            .unwrap()
+            .with_schema(table_schema.clone())
+            .with_expr_adapter_factory(Arc::clone(&factory)); // Clone the Arc, not create new factory
+
+    let table1 = ListingTable::try_new(listing_table_config1).unwrap();
+    ctx.register_table("t1", Arc::new(table1)).unwrap();
+
+    // Create ListingTable 2 using the SAME factory instance
+    let listing_table_config2 =
+        ListingTableConfig::new(ListingTableUrl::parse("memory:///table2/").unwrap())
+            .infer_options(&ctx.state())
+            .await
+            .unwrap()
+            .with_schema(table_schema.clone())
+            .with_expr_adapter_factory(Arc::clone(&factory)); // Reuse same factory
+
+    let table2 = ListingTable::try_new(listing_table_config2).unwrap();
+    ctx.register_table("t2", Arc::new(table2)).unwrap();
+
+    // Verify table 1 works correctly with the shared factory
+    // CustomPhysicalExprAdapterFactory fills missing Utf8 columns with 'b'
+    let batches = ctx
+        .sql("SELECT c1, c2 FROM t1 ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let expected = [
+        "+----+----+",
+        "| c1 | c2 |",
+        "+----+----+",
+        "| 1  | b  |",
+        "| 2  | b  |",
+        "| 3  | b  |",
+        "+----+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    // Verify table 2 also works correctly with the SAME shared factory
+    let batches = ctx
+        .sql("SELECT c1, c2 FROM t2 ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let expected = [
+        "+----+----+",
+        "| c1 | c2 |",
+        "+----+----+",
+        "| 10 | b  |",
+        "| 20 | b  |",
+        "| 30 | b  |",
+        "+----+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    // Verify predicates work on both tables with the shared factory
+    let batches = ctx
+        .sql("SELECT c1 FROM t1 WHERE c2 = 'b' ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    #[rustfmt::skip]
+    let expected = [
+        "+----+",
+        "| c1 |",
+        "+----+",
+        "| 1  |",
+        "| 2  |",
+        "| 3  |",
+        "+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+
+    let batches = ctx
+        .sql("SELECT c1 FROM t2 WHERE c2 = 'b' ORDER BY c1")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    #[rustfmt::skip]
+    let expected = [
+        "+----+",
+        "| c1 |",
+        "+----+",
+        "| 10 |",
+        "| 20 |",
+        "| 30 |",
+        "+----+",
+    ];
+    assert_batches_eq!(expected, &batches);
+}
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index 5135f956852c3..0c02c8fe523dc 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -21,7 +21,7 @@ use std::path::Path;
 use std::sync::Arc;
 
 use crate::parquet::utils::MetricsFinder;
-use crate::parquet::{create_data_batch, Scenario};
+use crate::parquet::{Scenario, create_data_batch};
 
 use arrow::datatypes::SchemaRef;
 use arrow::util::pretty::pretty_format_batches;
@@ -29,17 +29,17 @@ use datafusion::common::Result;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::physical_plan::ParquetSource;
 use datafusion::prelude::SessionContext;
-use datafusion_common::{assert_contains, DFSchema};
+use datafusion_common::{DFSchema, assert_contains};
 use datafusion_datasource_parquet::{ParquetAccessPlan, RowGroupAccess};
 use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_expr::{col, lit, Expr};
-use datafusion_physical_plan::metrics::{MetricValue, MetricsSet};
+use datafusion_expr::{Expr, col, lit};
 use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::metrics::{MetricValue, MetricsSet};
 
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
-use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
 use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
 use parquet::file::properties::WriterProperties;
 use tempfile::NamedTempFile;
 
@@ -257,7 +257,10 @@ async fn bad_selection() {
     .await
     .unwrap_err();
     let err_string = err.to_string();
-    assert_contains!(&err_string, "Internal error: Invalid ParquetAccessPlan Selection. Row group 0 has 5 rows but selection only specifies 4 rows");
+    assert_contains!(
+        &err_string,
+        "Row group 0 has 5 rows but selection only specifies 4 rows."
+    );
 }
 
 /// Return a RowSelection of 1 rows from a row group of 5 rows
@@ -355,11 +358,11 @@ impl TestFull {
         let source = if let Some(predicate) = predicate {
             let df_schema = DFSchema::try_from(schema.clone())?;
             let predicate = ctx.create_physical_expr(predicate, &df_schema)?;
-            Arc::new(ParquetSource::default().with_predicate(predicate))
+            Arc::new(ParquetSource::new(schema.clone()).with_predicate(predicate))
         } else {
-            Arc::new(ParquetSource::default())
+            Arc::new(ParquetSource::new(schema.clone()))
         };
-        let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source)
+        let config = FileScanConfigBuilder::new(object_store_url, source)
             .with_file(partitioned_file)
             .build();
 
diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs
index 64ee92eda2545..fdefdafa00aa4 100644
--- a/datafusion/core/tests/parquet/file_statistics.rs
+++ b/datafusion/core/tests/parquet/file_statistics.rs
@@ -18,31 +18,30 @@
 use std::fs;
 use std::sync::Arc;
 
+use datafusion::datasource::TableProvider;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::source::DataSourceExec;
-use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::prelude::SessionContext;
-use datafusion_common::stats::Precision;
 use datafusion_common::DFSchema;
+use datafusion_common::stats::Precision;
+use datafusion_execution::cache::DefaultListFilesCache;
 use datafusion_execution::cache::cache_manager::CacheManagerConfig;
-use datafusion_execution::cache::cache_unit::{
-    DefaultFileStatisticsCache, DefaultListFilesCache,
-};
+use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-use datafusion_expr::{col, lit, Expr};
+use datafusion_expr::{Expr, col, lit};
 
 use datafusion::datasource::physical_plan::FileScanConfig;
 use datafusion_common::config::ConfigOptions;
-use datafusion_physical_optimizer::filter_pushdown::FilterPushdown;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
-use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_optimizer::filter_pushdown::FilterPushdown;
 use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::filter::FilterExec;
 use tempfile::tempdir;
 
 #[tokio::test]
@@ -127,8 +126,9 @@ async fn load_table_stats_with_session_level_cache() {
     );
     assert_eq!(
         exec1.partition_statistics(None).unwrap().total_byte_size,
-        // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
-        Precision::Exact(671),
+        // Byte size is absent because we cannot estimate the output size
+        // of the Arrow data since there are variable length columns.
+        Precision::Absent,
     );
     assert_eq!(get_static_cache_size(&state1), 1);
 
@@ -142,8 +142,8 @@ async fn load_table_stats_with_session_level_cache() {
     );
     assert_eq!(
         exec2.partition_statistics(None).unwrap().total_byte_size,
-        // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
-        Precision::Exact(671),
+        // Absent because the data contains variable length columns
+        Precision::Absent,
     );
     assert_eq!(get_static_cache_size(&state2), 1);
 
@@ -157,8 +157,8 @@ async fn load_table_stats_with_session_level_cache() {
     );
     assert_eq!(
         exec3.partition_statistics(None).unwrap().total_byte_size,
-        // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
-        Precision::Exact(671),
+        // Absent because the data contains variable length columns
+        Precision::Absent,
     );
     // List same file no increase
     assert_eq!(get_static_cache_size(&state1), 1);
diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs
index 966f251613979..e3a191ee9ade2 100644
--- a/datafusion/core/tests/parquet/filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -31,7 +31,7 @@ use arrow::record_batch::RecordBatch;
 use datafusion::physical_plan::collect;
 use datafusion::physical_plan::metrics::{MetricValue, MetricsSet};
 use datafusion::prelude::{
-    col, lit, lit_timestamp_nano, Expr, ParquetReadOptions, SessionContext,
+    Expr, ParquetReadOptions, SessionContext, col, lit, lit_timestamp_nano,
 };
 use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile};
 use datafusion_expr::utils::{conjunction, disjunction, split_conjunction};
@@ -636,6 +636,27 @@ async fn predicate_cache_pushdown_default() -> datafusion_common::Result<()> {
     config.options_mut().execution.parquet.pushdown_filters = true;
     let ctx = SessionContext::new_with_config(config);
     // The cache is on by default, and used when filter pushdown is enabled
+    PredicateCacheTest {
+        expected_inner_records: 8,
+        expected_records: 7, // reads more than necessary from the cache as then another bitmap is applied
+    }
+    .run(&ctx)
+    .await
+}
+
+#[tokio::test]
+async fn predicate_cache_pushdown_default_selections_only()
+-> datafusion_common::Result<()> {
+    let mut config = SessionConfig::new();
+    config.options_mut().execution.parquet.pushdown_filters = true;
+    // forcing filter selections minimizes the number of rows read from the cache
+    config
+        .options_mut()
+        .execution
+        .parquet
+        .force_filter_selections = true;
+    let ctx = SessionContext::new_with_config(config);
+    // The cache is on by default, and used when filter pushdown is enabled
     PredicateCacheTest {
         expected_inner_records: 8,
         expected_records: 4,
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index 097600e45eadd..35b5918d9e8bf 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -19,12 +19,12 @@
 use crate::parquet::utils::MetricsFinder;
 use arrow::{
     array::{
-        make_array, Array, ArrayRef, BinaryArray, Date32Array, Date64Array,
-        Decimal128Array, DictionaryArray, FixedSizeBinaryArray, Float64Array, Int16Array,
-        Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray,
-        StringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-        UInt64Array, UInt8Array,
+        Array, ArrayRef, BinaryArray, Date32Array, Date64Array, Decimal128Array,
+        DictionaryArray, FixedSizeBinaryArray, Float64Array, Int8Array, Int16Array,
+        Int32Array, Int64Array, LargeBinaryArray, LargeStringArray, StringArray,
+        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+        TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array,
+        make_array,
     },
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
@@ -32,7 +32,7 @@ use arrow::{
 };
 use chrono::{Datelike, Duration, TimeDelta};
 use datafusion::{
-    datasource::{provider_as_source, TableProvider},
+    datasource::{TableProvider, provider_as_source},
     physical_plan::metrics::MetricsSet,
     prelude::{ParquetReadOptions, SessionConfig, SessionContext},
 };
@@ -46,13 +46,13 @@ use tempfile::NamedTempFile;
 mod custom_reader;
 #[cfg(feature = "parquet_encryption")]
 mod encryption;
+mod expr_adapter;
 mod external_access_plan;
 mod file_statistics;
 mod filter_pushdown;
 mod page_pruning;
 mod row_group_pruning;
 mod schema;
-mod schema_adapter;
 mod schema_coercion;
 mod utils;
 
@@ -147,15 +147,14 @@ impl TestOutput {
 
         for metric in self.parquet_metrics.iter() {
             let metric = metric.as_ref();
-            if metric.value().name() == metric_name {
-                if let MetricValue::PruningMetrics {
+            if metric.value().name() == metric_name
+                && let MetricValue::PruningMetrics {
                     pruning_metrics, ..
                 } = metric.value()
-                {
-                    total_pruned += pruning_metrics.pruned();
-                    total_matched += pruning_metrics.matched();
-                    found = true;
-                }
+            {
+                total_pruned += pruning_metrics.pruned();
+                total_matched += pruning_metrics.matched();
+                found = true;
             }
         }
 
@@ -652,6 +651,7 @@ fn make_date_batch(offset: Duration) -> RecordBatch {
 /// of the column. It is *not* a table named service.name
 ///
 /// name | service.name
+#[expect(clippy::needless_pass_by_value)]
 fn make_bytearray_batch(
     name: &str,
     string_values: Vec<&str>,
@@ -707,6 +707,7 @@ fn make_bytearray_batch(
 /// of the column. It is *not* a table named service.name
 ///
 /// name | service.name
+#[expect(clippy::needless_pass_by_value)]
 fn make_names_batch(name: &str, service_name_values: Vec<&str>) -> RecordBatch {
     let num_rows = service_name_values.len();
     let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect();
@@ -791,6 +792,7 @@ fn make_utf8_batch(value: Vec<Option<&str>>) -> RecordBatch {
     .unwrap()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn make_dictionary_batch(strings: Vec<&str>, integers: Vec<i32>) -> RecordBatch {
     let keys = Int32Array::from_iter(0..strings.len() as i32);
     let small_keys = Int16Array::from_iter(0..strings.len() as i16);
@@ -839,6 +841,7 @@ fn make_dictionary_batch(strings: Vec<&str>, integers: Vec<i32>) -> RecordBatch
     .unwrap()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
     match scenario {
         Scenario::Timestamps => {
diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs
index 27bee10234b57..17392974b63a8 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -21,25 +21,25 @@ use crate::parquet::Unit::Page;
 use crate::parquet::{ContextWithParquet, Scenario};
 
 use arrow::array::RecordBatch;
-use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::ParquetSource;
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::execution::context::SessionState;
-use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::prelude::SessionContext;
 use datafusion_common::{ScalarValue, ToDFSchema};
 use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::{col, lit, Expr};
+use datafusion_expr::{Expr, col, lit};
 use datafusion_physical_expr::create_physical_expr;
 
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use futures::StreamExt;
-use object_store::path::Path;
 use object_store::ObjectMeta;
+use object_store::path::Path;
 
 async fn get_parquet_exec(
     state: &SessionState,
@@ -81,12 +81,12 @@ async fn get_parquet_exec(
     let predicate = create_physical_expr(&filter, &df_schema, &execution_props).unwrap();
 
     let source = Arc::new(
-        ParquetSource::default()
+        ParquetSource::new(schema.clone())
             .with_predicate(predicate)
             .with_enable_page_index(true)
             .with_pushdown_filters(pushdown_filters),
     );
-    let base_config = FileScanConfigBuilder::new(object_store_url, schema, source)
+    let base_config = FileScanConfigBuilder::new(object_store_url, source)
         .with_file(partitioned_file)
         .build();
 
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
deleted file mode 100644
index 40fc6176e212b..0000000000000
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ /dev/null
@@ -1,553 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{record_batch, RecordBatch, RecordBatchOptions};
-use arrow::compute::{cast_with_options, CastOptions};
-use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
-use bytes::{BufMut, BytesMut};
-use datafusion::assert_batches_eq;
-use datafusion::common::Result;
-use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableConfigExt,
-};
-use datafusion::prelude::{SessionConfig, SessionContext};
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::DataFusionError;
-use datafusion_common::{ColumnStatistics, ScalarValue};
-use datafusion_datasource::file::FileSource;
-use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-use datafusion_datasource::schema_adapter::{
-    DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
-};
-use datafusion_datasource::ListingTableUrl;
-use datafusion_datasource_parquet::source::ParquetSource;
-use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_physical_expr::expressions::{self, Column};
-use datafusion_physical_expr::PhysicalExpr;
-use datafusion_physical_expr_adapter::{
-    DefaultPhysicalExprAdapter, DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter,
-    PhysicalExprAdapterFactory,
-};
-use itertools::Itertools;
-use object_store::{memory::InMemory, path::Path, ObjectStore};
-use parquet::arrow::ArrowWriter;
-
-async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
-    let mut out = BytesMut::new().writer();
-    {
-        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.finish().unwrap();
-    }
-    let data = out.into_inner().freeze();
-    store.put(&Path::from(path), data.into()).await.unwrap();
-}
-
-#[derive(Debug)]
-struct CustomSchemaAdapterFactory;
-
-impl SchemaAdapterFactory for CustomSchemaAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(CustomSchemaAdapter {
-            logical_file_schema: projected_table_schema,
-        })
-    }
-}
-
-#[derive(Debug)]
-struct CustomSchemaAdapter {
-    logical_file_schema: SchemaRef,
-}
-
-impl SchemaAdapter for CustomSchemaAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        for (idx, field) in file_schema.fields().iter().enumerate() {
-            if field.name() == self.logical_file_schema.field(index).name() {
-                return Some(idx);
-            }
-        }
-        None
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let projection = (0..file_schema.fields().len()).collect_vec();
-        Ok((
-            Arc::new(CustomSchemaMapper {
-                logical_file_schema: Arc::clone(&self.logical_file_schema),
-            }),
-            projection,
-        ))
-    }
-}
-
-#[derive(Debug)]
-struct CustomSchemaMapper {
-    logical_file_schema: SchemaRef,
-}
-
-impl SchemaMapper for CustomSchemaMapper {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let mut output_columns =
-            Vec::with_capacity(self.logical_file_schema.fields().len());
-        for field in self.logical_file_schema.fields() {
-            if let Some(array) = batch.column_by_name(field.name()) {
-                output_columns.push(cast_with_options(
-                    array,
-                    field.data_type(),
-                    &CastOptions::default(),
-                )?);
-            } else {
-                // Create a new array with the default value for the field type
-                let default_value = match field.data_type() {
-                    DataType::Int64 => ScalarValue::Int64(Some(0)),
-                    DataType::Utf8 => ScalarValue::Utf8(Some("a".to_string())),
-                    _ => unimplemented!("Unsupported data type: {}", field.data_type()),
-                };
-                output_columns
-                    .push(default_value.to_array_of_size(batch.num_rows()).unwrap());
-            }
-        }
-        let batch = RecordBatch::try_new_with_options(
-            Arc::clone(&self.logical_file_schema),
-            output_columns,
-            &RecordBatchOptions::new().with_row_count(Some(batch.num_rows())),
-        )
-        .unwrap();
-        Ok(batch)
-    }
-
-    fn map_column_statistics(
-        &self,
-        _file_col_statistics: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(vec![
-            ColumnStatistics::new_unknown();
-            self.logical_file_schema.fields().len()
-        ])
-    }
-}
-
-// Implement a custom PhysicalExprAdapterFactory that fills in missing columns with the default value for the field type
-#[derive(Debug)]
-struct CustomPhysicalExprAdapterFactory;
-
-impl PhysicalExprAdapterFactory for CustomPhysicalExprAdapterFactory {
-    fn create(
-        &self,
-        logical_file_schema: SchemaRef,
-        physical_file_schema: SchemaRef,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        Arc::new(CustomPhysicalExprAdapter {
-            logical_file_schema: Arc::clone(&logical_file_schema),
-            physical_file_schema: Arc::clone(&physical_file_schema),
-            inner: Arc::new(DefaultPhysicalExprAdapter::new(
-                logical_file_schema,
-                physical_file_schema,
-            )),
-        })
-    }
-}
-
-#[derive(Debug, Clone)]
-struct CustomPhysicalExprAdapter {
-    logical_file_schema: SchemaRef,
-    physical_file_schema: SchemaRef,
-    inner: Arc<dyn PhysicalExprAdapter>,
-}
-
-impl PhysicalExprAdapter for CustomPhysicalExprAdapter {
-    fn rewrite(&self, mut expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-        expr = expr
-            .transform(|expr| {
-                if let Some(column) = expr.as_any().downcast_ref::<Column>() {
-                    let field_name = column.name();
-                    if self
-                        .physical_file_schema
-                        .field_with_name(field_name)
-                        .ok()
-                        .is_none()
-                    {
-                        let field = self
-                            .logical_file_schema
-                            .field_with_name(field_name)
-                            .map_err(|_| {
-                                DataFusionError::Plan(format!(
-                                    "Field '{field_name}' not found in logical file schema",
-                                ))
-                            })?;
-                        // If the field does not exist, create a default value expression
-                        // Note that we use slightly different logic here to create a default value so that we can see different behavior in tests
-                        let default_value = match field.data_type() {
-                            DataType::Int64 => ScalarValue::Int64(Some(1)),
-                            DataType::Utf8 => ScalarValue::Utf8(Some("b".to_string())),
-                            _ => unimplemented!(
-                                "Unsupported data type: {}",
-                                field.data_type()
-                            ),
-                        };
-                        return Ok(Transformed::yes(Arc::new(
-                            expressions::Literal::new(default_value),
-                        )));
-                    }
-                }
-
-                Ok(Transformed::no(expr))
-            })
-            .data()?;
-        self.inner.rewrite(expr)
-    }
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        assert!(
-            partition_values.is_empty(),
-            "Partition values are not supported in this test"
-        );
-        Arc::new(self.clone())
-    }
-}
-
-#[tokio::test]
-async fn test_custom_schema_adapter_and_custom_expression_adapter() {
-    let batch =
-        record_batch!(("extra", Int64, [1, 2, 3]), ("c1", Int32, [1, 2, 3])).unwrap();
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch, store.clone(), path).await;
-
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Int64, false),
-        Field::new("c2", DataType::Utf8, true),
-    ]));
-
-    let mut cfg = SessionConfig::new()
-        // Disable statistics collection for this test otherwise early pruning makes it hard to demonstrate data adaptation
-        .with_collect_statistics(false)
-        .with_parquet_pruning(false)
-        .with_parquet_page_index_pruning(false);
-    cfg.options_mut().execution.parquet.pushdown_filters = true;
-    let ctx = SessionContext::new_with_config(cfg);
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-    assert!(
-        !ctx.state()
-            .config_mut()
-            .options_mut()
-            .execution
-            .collect_statistics
-    );
-    assert!(!ctx.state().config().collect_statistics());
-
-    let listing_table_config =
-        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
-            .infer_options(&ctx.state())
-            .await
-            .unwrap()
-            .with_schema(table_schema.clone())
-            .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory))
-            .with_expr_adapter_factory(Arc::new(DefaultPhysicalExprAdapterFactory));
-
-    let table = ListingTable::try_new(listing_table_config).unwrap();
-    ctx.register_table("t", Arc::new(table)).unwrap();
-
-    let batches = ctx
-        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 IS NULL")
-        .await
-        .unwrap()
-        .collect()
-        .await
-        .unwrap();
-
-    let expected = [
-        "+----+----+",
-        "| c2 | c1 |",
-        "+----+----+",
-        "|    | 2  |",
-        "+----+----+",
-    ];
-    assert_batches_eq!(expected, &batches);
-
-    // Test using a custom schema adapter and no explicit physical expr adapter
-    // This should use the custom schema adapter both for projections and predicate pushdown
-    let listing_table_config =
-        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
-            .infer_options(&ctx.state())
-            .await
-            .unwrap()
-            .with_schema(table_schema.clone())
-            .with_schema_adapter_factory(Arc::new(CustomSchemaAdapterFactory));
-    let table = ListingTable::try_new(listing_table_config).unwrap();
-    ctx.deregister_table("t").unwrap();
-    ctx.register_table("t", Arc::new(table)).unwrap();
-    let batches = ctx
-        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'a'")
-        .await
-        .unwrap()
-        .collect()
-        .await
-        .unwrap();
-    let expected = [
-        "+----+----+",
-        "| c2 | c1 |",
-        "+----+----+",
-        "| a  | 2  |",
-        "+----+----+",
-    ];
-    assert_batches_eq!(expected, &batches);
-
-    // Do the same test but with a custom physical expr adapter
-    // Now the default schema adapter will be used for projections, but the custom physical expr adapter will be used for predicate pushdown
-    let listing_table_config =
-        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
-            .infer_options(&ctx.state())
-            .await
-            .unwrap()
-            .with_schema(table_schema.clone())
-            .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory));
-    let table = ListingTable::try_new(listing_table_config).unwrap();
-    ctx.deregister_table("t").unwrap();
-    ctx.register_table("t", Arc::new(table)).unwrap();
-    let batches = ctx
-        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'b'")
-        .await
-        .unwrap()
-        .collect()
-        .await
-        .unwrap();
-    let expected = [
-        "+----+----+",
-        "| c2 | c1 |",
-        "+----+----+",
-        "|    | 2  |",
-        "+----+----+",
-    ];
-    assert_batches_eq!(expected, &batches);
-
-    // If we use both then the custom physical expr adapter will be used for predicate pushdown and the custom schema adapter will be used for projections
-    let listing_table_config =
-        ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap())
-            .infer_options(&ctx.state())
-            .await
-            .unwrap()
-            .with_schema(table_schema.clone())
-            .with_schema_adapter_factory(Arc::new(CustomSchemaAdapterFactory))
-            .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory));
-    let table = ListingTable::try_new(listing_table_config).unwrap();
-    ctx.deregister_table("t").unwrap();
-    ctx.register_table("t", Arc::new(table)).unwrap();
-    let batches = ctx
-        .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'b'")
-        .await
-        .unwrap()
-        .collect()
-        .await
-        .unwrap();
-    let expected = [
-        "+----+----+",
-        "| c2 | c1 |",
-        "+----+----+",
-        "| a  | 2  |",
-        "+----+----+",
-    ];
-    assert_batches_eq!(expected, &batches);
-}
-
-/// A test schema adapter factory that adds prefix to column names
-#[derive(Debug)]
-struct PrefixAdapterFactory {
-    prefix: String,
-}
-
-impl SchemaAdapterFactory for PrefixAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(PrefixAdapter {
-            input_schema: projected_table_schema,
-            prefix: self.prefix.clone(),
-        })
-    }
-}
-
-/// A test schema adapter that adds prefix to column names
-#[derive(Debug)]
-struct PrefixAdapter {
-    input_schema: SchemaRef,
-    prefix: String,
-}
-
-impl SchemaAdapter for PrefixAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.input_schema.field(index);
-        file_schema.fields.find(field.name()).map(|(i, _)| i)
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::with_capacity(file_schema.fields().len());
-        for (file_idx, file_field) in file_schema.fields().iter().enumerate() {
-            if self.input_schema.fields().find(file_field.name()).is_some() {
-                projection.push(file_idx);
-            }
-        }
-
-        // Create a schema mapper that adds a prefix to column names
-        #[derive(Debug)]
-        struct PrefixSchemaMapping {
-            // Keep only the prefix field which is actually used in the implementation
-            prefix: String,
-        }
-
-        impl SchemaMapper for PrefixSchemaMapping {
-            fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-                // Create a new schema with prefixed field names
-                let prefixed_fields: Vec<Field> = batch
-                    .schema()
-                    .fields()
-                    .iter()
-                    .map(|field| {
-                        Field::new(
-                            format!("{}{}", self.prefix, field.name()),
-                            field.data_type().clone(),
-                            field.is_nullable(),
-                        )
-                    })
-                    .collect();
-                let prefixed_schema = Arc::new(Schema::new(prefixed_fields));
-
-                // Create a new batch with the prefixed schema but the same data
-                let options = RecordBatchOptions::default();
-                RecordBatch::try_new_with_options(
-                    prefixed_schema,
-                    batch.columns().to_vec(),
-                    &options,
-                )
-                .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))
-            }
-
-            fn map_column_statistics(
-                &self,
-                stats: &[ColumnStatistics],
-            ) -> Result<Vec<ColumnStatistics>> {
-                // For testing, just return the input statistics
-                Ok(stats.to_vec())
-            }
-        }
-
-        Ok((
-            Arc::new(PrefixSchemaMapping {
-                prefix: self.prefix.clone(),
-            }),
-            projection,
-        ))
-    }
-}
-
-#[test]
-fn test_apply_schema_adapter_with_factory() {
-    // Create a schema
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create a parquet source
-    let source = ParquetSource::default();
-
-    // Create a file scan config with source that has a schema adapter factory
-    let factory = Arc::new(PrefixAdapterFactory {
-        prefix: "test_".to_string(),
-    });
-
-    let file_source = source.clone().with_schema_adapter_factory(factory).unwrap();
-
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .build();
-
-    // Apply schema adapter to a new source
-    let result_source = source.apply_schema_adapter(&config).unwrap();
-
-    // Verify the adapter was applied
-    assert!(result_source.schema_adapter_factory().is_some());
-
-    // Create adapter and test it produces expected schema
-    let adapter_factory = result_source.schema_adapter_factory().unwrap();
-    let adapter = adapter_factory.create(schema.clone(), schema.clone());
-
-    // Create a dummy batch to test the schema mapping
-    let dummy_batch = RecordBatch::new_empty(schema.clone());
-
-    // Get the file schema (which is the same as the table schema in this test)
-    let (mapper, _) = adapter.map_schema(&schema).unwrap();
-
-    // Apply the mapping to get the output schema
-    let mapped_batch = mapper.map_batch(dummy_batch).unwrap();
-    let output_schema = mapped_batch.schema();
-
-    // Check the column names have the prefix
-    assert_eq!(output_schema.field(0).name(), "test_id");
-    assert_eq!(output_schema.field(1).name(), "test_name");
-}
-
-#[test]
-fn test_apply_schema_adapter_without_factory() {
-    // Create a schema
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, true),
-    ]));
-
-    // Create a parquet source
-    let source = ParquetSource::default();
-
-    // Convert to Arc<dyn FileSource>
-    let file_source: Arc<dyn FileSource> = Arc::new(source.clone());
-
-    // Create a file scan config without a schema adapter factory
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .build();
-
-    // Apply schema adapter function - should pass through the source unchanged
-    let result_source = source.apply_schema_adapter(&config).unwrap();
-
-    // Verify no adapter was applied
-    assert!(result_source.schema_adapter_factory().is_none());
-}
diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs
index 9be391a9108e6..6f7e2e328d0c3 100644
--- a/datafusion/core/tests/parquet/schema_coercion.rs
+++ b/datafusion/core/tests/parquet/schema_coercion.rs
@@ -18,16 +18,16 @@
 use std::sync::Arc;
 
 use arrow::array::{
-    types::Int32Type, ArrayRef, DictionaryArray, Float32Array, Int64Array, RecordBatch,
-    StringArray,
+    ArrayRef, DictionaryArray, Float32Array, Int64Array, RecordBatch, StringArray,
+    types::Int32Type,
 };
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::datasource::physical_plan::ParquetSource;
 use datafusion::physical_plan::collect;
 use datafusion::prelude::SessionContext;
 use datafusion::test::object_store::local_unpartitioned_file;
-use datafusion_common::test_util::batches_to_sort_string;
 use datafusion_common::Result;
+use datafusion_common::test_util::batches_to_sort_string;
 use datafusion_execution::object_store::ObjectStoreUrl;
 
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
@@ -62,14 +62,10 @@ async fn multi_parquet_coercion() {
         Field::new("c2", DataType::Int32, true),
         Field::new("c3", DataType::Float64, true),
     ]));
-    let source = Arc::new(ParquetSource::default());
-    let conf = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        source,
-    )
-    .with_file_group(file_group)
-    .build();
+    let source = Arc::new(ParquetSource::new(file_schema.clone()));
+    let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+        .with_file_group(file_group)
+        .build();
 
     let parquet_exec = DataSourceExec::from_data_source(conf);
 
@@ -122,11 +118,11 @@ async fn multi_parquet_coercion_projection() {
     ]));
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(file_schema)),
     )
     .with_file_group(file_group)
     .with_projection_indices(Some(vec![1, 0, 2]))
+    .unwrap()
     .build();
 
     let parquet_exec = DataSourceExec::from_data_source(config);
diff --git a/datafusion/core/tests/parquet/utils.rs b/datafusion/core/tests/parquet/utils.rs
index 24b6cadc148f8..e5e0026ec1f16 100644
--- a/datafusion/core/tests/parquet/utils.rs
+++ b/datafusion/core/tests/parquet/utils.rs
@@ -20,7 +20,7 @@
 use datafusion::datasource::physical_plan::ParquetSource;
 use datafusion::datasource::source::DataSourceExec;
 use datafusion_physical_plan::metrics::MetricsSet;
-use datafusion_physical_plan::{accept, ExecutionPlan, ExecutionPlanVisitor};
+use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanVisitor, accept};
 
 /// Find the metrics from the first DataSourceExec encountered in the plan
 #[derive(Debug)]
@@ -47,13 +47,12 @@ impl MetricsFinder {
 impl ExecutionPlanVisitor for MetricsFinder {
     type Error = std::convert::Infallible;
     fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
-        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>() {
-            if data_source_exec
+        if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>()
+            && data_source_exec
                 .downcast_to_file_source::<ParquetSource>()
                 .is_some()
-            {
-                self.metrics = data_source_exec.metrics();
-            }
+        {
+            self.metrics = data_source_exec.metrics();
         }
         // stop searching once we have found the metrics
         Ok(self.metrics.is_none())
diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
index a79d743cb253d..1fdc0ae6c7f60 100644
--- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
@@ -24,14 +24,15 @@ use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
+use datafusion_common::Result;
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::{self, cast};
-use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics;
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::aggregates::AggregateMode;
 use datafusion_physical_plan::aggregates::PhysicalGroupBy;
@@ -39,7 +40,6 @@ use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::common;
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::projection::ProjectionExec;
-use datafusion_physical_plan::ExecutionPlan;
 
 /// Mock data using a MemorySourceConfig which has an exact count statistic
 fn mock_data() -> Result<Arc<DataSourceExec>> {
diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index 9c76f6ab6f58b..2fdfece2a86e7 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -29,18 +29,18 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_functions_aggregate::count::count_udaf;
 use datafusion_functions_aggregate::sum::sum_udaf;
+use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::{col, lit};
-use datafusion_physical_expr::Partitioning;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion_physical_plan::displayable;
 use datafusion_physical_plan::repartition::RepartitionExec;
-use datafusion_physical_plan::ExecutionPlan;
 
 /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
 macro_rules! assert_optimized {
@@ -191,7 +191,7 @@ fn aggregations_combined() -> datafusion_common::Result<()> {
     // should combine the Partial/Final AggregateExecs to the Single AggregateExec
     assert_optimized!(
         plan,
-        @ "
+        @ r"
     AggregateExec: mode=Single, gby=[], aggr=[COUNT(1)]
       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
     "
diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
index 5b7d9ac8fbe99..7cedaf86cb52f 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -26,32 +26,33 @@ use crate::physical_optimizer::test_utils::{
     sort_preserving_merge_exec, union_exec,
 };
 
-use arrow::array::{RecordBatch, UInt64Array, UInt8Array};
+use arrow::array::{RecordBatch, UInt8Array, UInt64Array};
 use arrow::compute::SortOptions;
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion::config::ConfigOptions;
+use datafusion::datasource::MemTable;
 use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{CsvSource, ParquetSource};
 use datafusion::datasource::source::DataSourceExec;
-use datafusion::datasource::MemTable;
 use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_common::ScalarValue;
+use datafusion_common::config::CsvOptions;
 use datafusion_common::error::Result;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::ScalarValue;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_expr::{JoinType, Operator};
-use datafusion_physical_expr::expressions::{binary, lit, BinaryExpr, Column, Literal};
+use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal, binary, lit};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, OrderingRequirements, PhysicalSortExpr,
 };
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_optimizer::enforce_distribution::*;
 use datafusion_physical_optimizer::enforce_sorting::EnforceSorting;
 use datafusion_physical_optimizer::output_requirements::OutputRequirements;
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
@@ -66,8 +67,8 @@ use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::{
-    displayable, DisplayAs, DisplayFormatType, ExecutionPlanProperties, PlanProperties,
-    Statistics,
+    DisplayAs, DisplayFormatType, ExecutionPlanProperties, PlanProperties, Statistics,
+    displayable,
 };
 use insta::Settings;
 
@@ -229,8 +230,7 @@ fn parquet_exec_multiple_sorted(
 ) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema())),
     )
     .with_file_groups(vec![
         FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
@@ -247,14 +247,19 @@ fn csv_exec() -> Arc<DataSourceExec> {
 }
 
 fn csv_exec_with_sort(output_ordering: Vec<LexOrdering>) -> Arc<DataSourceExec> {
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(CsvSource::new(false, b',', b'"')),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_output_ordering(output_ordering)
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: b',',
+                quote: b'"',
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x".to_string(), 100))
+        .with_output_ordering(output_ordering)
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -265,17 +270,22 @@ fn csv_exec_multiple() -> Arc<DataSourceExec> {
 
 // Created a sorted parquet exec with multiple files
 fn csv_exec_multiple_sorted(output_ordering: Vec<LexOrdering>) -> Arc<DataSourceExec> {
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(CsvSource::new(false, b',', b'"')),
-    )
-    .with_file_groups(vec![
-        FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
-        FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]),
-    ])
-    .with_output_ordering(output_ordering)
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: b',',
+                quote: b'"',
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema()).with_csv_options(options))
+        })
+        .with_file_groups(vec![
+            FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
+            FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]),
+        ])
+        .with_output_ordering(output_ordering)
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -618,16 +628,13 @@ fn multi_hash_joins() -> Result<()> {
                                 assert_plan!(plan_distrib, @r"
                                 HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)]
                                   HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
-                                    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-                                      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-                                      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-                                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                                    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
                                       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
                                 ");
                             },
                     // Should include 4 RepartitionExecs
@@ -636,16 +643,13 @@ fn multi_hash_joins() -> Result<()> {
                                 HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)]
                                   RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
                                     HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
-                                      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-                                        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                                      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
                                           DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-                                        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                                            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-                                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
                                 ");
                             },
                 };
@@ -690,16 +694,13 @@ fn multi_hash_joins() -> Result<()> {
                             assert_plan!(plan_distrib, @r"
                             HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@1, c@2)]
                               HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
-                                RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-                                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-                                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                    ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-                                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                                RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
                                   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
                             ");
 
                             }
@@ -710,16 +711,13 @@ fn multi_hash_joins() -> Result<()> {
                             HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@6, c@2)]
                               RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
                                 HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
-                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-                                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                    ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
                                       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                                  RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-                                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-                                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
                             ");
 
                             },
@@ -780,15 +778,12 @@ fn multi_joins_after_alias() -> Result<()> {
     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, c@2)]
       ProjectionExec: expr=[a@0 as a1, a@0 as a2]
         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
-          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
     "
     );
     let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
@@ -811,15 +806,12 @@ fn multi_joins_after_alias() -> Result<()> {
     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a2@1, c@2)]
       ProjectionExec: expr=[a@0 as a1, a@0 as a2]
         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
-          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
     "
     );
     let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
@@ -869,15 +861,12 @@ fn multi_joins_after_multi_alias() -> Result<()> {
         ProjectionExec: expr=[c1@0 as a]
           ProjectionExec: expr=[c@2 as c1]
             HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
-              RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-              RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10
-                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+              RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+              RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
     "
     );
     let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
@@ -1098,21 +1087,17 @@ fn multi_hash_join_key_ordering() -> Result<()> {
       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(B@2, b1@6), (C@3, c@2), (AA@1, a1@5)]
         ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
           HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]
-            RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-            RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
-                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]
-          RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-          RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1
               ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
                 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]
+          RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1
+            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
     "
     );
     let plan_sort = test_config.to_plan(filter_top_join, &SORT_DISTRIB_DISTRIB);
@@ -1236,25 +1221,21 @@ fn reorder_join_keys_to_left_input() -> Result<()> {
         assert_eq!(captured_join_type, join_type.to_string());
 
         insta::allow_duplicates! {insta::assert_snapshot!(modified_plan, @r"
-HashJoinExec: mode=Partitioned, join_type=..., on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)]
-  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
-    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)]
-      RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-      RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
-    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");}
+        HashJoinExec: mode=Partitioned, join_type=..., on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)]
+          ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
+            HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)]
+              RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+              RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=1
+                ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
+            RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+            RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1
+              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        ");}
     }
 
     Ok(())
@@ -1368,25 +1349,21 @@ fn reorder_join_keys_to_right_input() -> Result<()> {
         let (_, plan_str) =
             hide_first(reordered.as_ref(), r"join_type=(\w+)", "join_type=...");
         insta::allow_duplicates! {insta::assert_snapshot!(plan_str, @r"
-HashJoinExec: mode=Partitioned, join_type=..., on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)]
-  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
-    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)]
-      RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-      RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
-    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");}
+        HashJoinExec: mode=Partitioned, join_type=..., on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)]
+          ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
+            HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)]
+              RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+              RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=1
+                ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
+            RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+            RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1
+              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        ");}
     }
 
     Ok(())
@@ -1447,52 +1424,46 @@ fn multi_smj_joins() -> Result<()> {
                     // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
                     JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
                         assert_plan!(plan_distrib, @r"
-SortMergeJoin: join_type=..., on=[(a@0, c@2)]
-  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
-      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                        SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
+                          SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                            SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+                              RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                            SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+                              RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                          SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+                            RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                        ");
                     }
                     // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
-                    // Since ordering of the left child is not preserved after SortMergeJoin
+                    // Since ordering of the left child is not preserved after SortMergeJoinExec
                     // when mode is Right, RightSemi, RightAnti, Full
-                    // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                    // - We need to add one additional SortExec after SortMergeJoinExec in contrast the test cases
                     //   when mode is Inner, Left, LeftSemi, LeftAnti
                     // Similarly, since partitioning of the left side is not preserved
                     // when mode is Right, RightSemi, RightAnti, Full
-                    // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
+                    // - We need to add one additional Hash Repartition after SortMergeJoinExec in contrast the test
                     //   cases when mode is Inner, Left, LeftSemi, LeftAnti
                     _ => {
                         assert_plan!(plan_distrib, @r"
-SortMergeJoin: join_type=..., on=[(a@0, c@2)]
-  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-      SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
-          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                        SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
+                          SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+                            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+                              SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+                                  RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                    ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                          SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+                            RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                        ");
                     }
                 }
 
@@ -1503,55 +1474,48 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)]
                     JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
                         // TODO(wiedld): show different test result if enforce distribution first.
                         assert_plan!(plan_sort, @r"
-SortMergeJoin: join_type=..., on=[(a@0, c@2)]
-  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                        SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
+                          SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
+                              SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                            RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1, maintains_sort_order=true
+                              SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+                                ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                          RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1, maintains_sort_order=true
+                            SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                        ");
                     }
                     // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
-                    // Since ordering of the left child is not preserved after SortMergeJoin
+                    // Since ordering of the left child is not preserved after SortMergeJoinExec
                     // when mode is Right, RightSemi, RightAnti, Full
-                    // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                    // - We need to add one additional SortExec after SortMergeJoinExec in contrast the test cases
                     //   when mode is Inner, Left, LeftSemi, LeftAnti
                     // Similarly, since partitioning of the left side is not preserved
                     // when mode is Right, RightSemi, RightAnti, Full
                     // - We need to add one additional Hash Repartition and Roundrobin repartition after
-                    //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
+                    //   SortMergeJoinExec in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
                     _ => {
                         // TODO(wiedld): show different test result if enforce distribution first.
                         assert_plan!(plan_sort, @r"
-SortMergeJoin: join_type=..., on=[(a@0, c@2)]
-  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-        CoalescePartitionsExec
-          SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-            RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
-                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                        SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
+                          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
+                            SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+                              CoalescePartitionsExec
+                                SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
+                                    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1, maintains_sort_order=true
+                                    SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+                                      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                          RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1, maintains_sort_order=true
+                            SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                        ");
                     }
                 }
 
@@ -1572,45 +1536,39 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)]
                             JoinType::Inner | JoinType::Right => {
                                 // TODO(wiedld): show different test result if enforce sorting first.
                                 assert_plan!(plan_distrib, @r"
-SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
-  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
-      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                                SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
+                                  SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+                                      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+                                      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+                                    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
                             }
                             // Should include 7 RepartitionExecs (4 hash, 3 round-robin) and 4 SortExecs
                             JoinType::Left | JoinType::Full => {
                                 // TODO(wiedld): show different test result if enforce sorting first.
                                 assert_plan!(plan_distrib, @r"
-SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
-  SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
-      SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
-          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
-    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                                SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
+                                  SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]
+                                    RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
+                                      SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+                                          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+                                          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+                                    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
                             }
                             // this match arm cannot be reached
                             _ => unreachable!()
@@ -1623,47 +1581,40 @@ SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
                             JoinType::Inner | JoinType::Right => {
                                 // TODO(wiedld): show different test result if enforce distribution first.
                                 assert_plan!(plan_sort, @r"
-SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
-  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
-          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                                SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
+                                  SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
+                                      SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1, maintains_sort_order=true
+                                      SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+                                        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1, maintains_sort_order=true
+                                    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
                             }
                             // Should include 8 RepartitionExecs (4 of them preserves order) and 4 SortExecs
                             JoinType::Left | JoinType::Full => {
                                 // TODO(wiedld): show different test result if enforce distribution first.
                                 assert_plan!(plan_sort, @r"
-SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
-  RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@6 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]
-        CoalescePartitionsExec
-          SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
-            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-            RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
-                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
-                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                                SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
+                                  RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=1, maintains_sort_order=true
+                                    SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]
+                                      CoalescePartitionsExec
+                                        SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
+                                          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
+                                            SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+                                              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1, maintains_sort_order=true
+                                            SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+                                              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1, maintains_sort_order=true
+                                    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
                             }
                             // this match arm cannot be reached
                             _ => unreachable!()
@@ -1731,50 +1682,48 @@ fn smj_join_key_ordering() -> Result<()> {
     // Only two RepartitionExecs added
     let plan_distrib = test_config.to_plan(join.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib, @r"
-SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
-  SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]
-    ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
-      ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
-        AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
-          RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
-            AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
-              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]
-    ProjectionExec: expr=[a@1 as a2, b@0 as b2]
-      AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
-        RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
-          AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
-            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortMergeJoinExec: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
+      SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]
+        ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
+          ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
+            AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
+              RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
+                AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]
+        ProjectionExec: expr=[a@1 as a2, b@0 as b2]
+          AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
+            RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
+              AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
+                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort, @r"
-SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
-  RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC, a3@0 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]
-        CoalescePartitionsExec
-          ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
-            ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
-              AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
-                RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
-                  AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+    SortMergeJoinExec: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
+      RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=1, maintains_sort_order=true
+        SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]
+          CoalescePartitionsExec
+            ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
+              ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
+                AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
+                  RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
+                    AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+                      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=1, maintains_sort_order=true
+        SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]
+          CoalescePartitionsExec
+            ProjectionExec: expr=[a@1 as a2, b@0 as b2]
+              AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
+                RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
+                  AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
                     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
                       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC, a2@0 ASC
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]
-        CoalescePartitionsExec
-          ProjectionExec: expr=[a@1 as a2, b@0 as b2]
-            AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
-              RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
-                AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
-                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    ");
 
     Ok(())
 }
@@ -1807,10 +1756,10 @@ fn merge_does_not_need_sort() -> Result<()> {
     let plan_distrib = test_config.to_plan(exec.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                     @r"
-SortPreservingMergeExec: [a@0 ASC]
-  CoalesceBatchesExec: target_batch_size=4096
-    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [a@0 ASC]
+      CoalesceBatchesExec: target_batch_size=4096
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     //
@@ -1821,11 +1770,11 @@ SortPreservingMergeExec: [a@0 ASC]
     let plan_sort = test_config.to_plan(exec, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                     @r"
-SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    CoalesceBatchesExec: target_batch_size=4096
-      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        CoalesceBatchesExec: target_batch_size=4096
+          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -2002,11 +1951,11 @@ fn repartition_sorted_limit() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-GlobalLimitExec: skip=0, fetch=100
-  LocalLimitExec: fetch=100
-    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    GlobalLimitExec: skip=0, fetch=100
+      LocalLimitExec: fetch=100
+        SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // data is sorted so can't repartition here
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
@@ -2031,12 +1980,12 @@ fn repartition_sorted_limit_with_filter() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortRequiredExec: [c@2 ASC]
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortRequiredExec: [c@2 ASC]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // We can use repartition here, ordering requirement by SortRequiredExec
     // is still satisfied.
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
@@ -2057,19 +2006,19 @@ fn repartition_ignores_limit() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        GlobalLimitExec: skip=0, fetch=100
-          CoalescePartitionsExec
-            LocalLimitExec: fetch=100
-              FilterExec: c@2 = 0
-                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-                  GlobalLimitExec: skip=0, fetch=100
-                    LocalLimitExec: fetch=100
-                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            GlobalLimitExec: skip=0, fetch=100
+              CoalescePartitionsExec
+                LocalLimitExec: fetch=100
+                  FilterExec: c@2 = 0
+                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                      GlobalLimitExec: skip=0, fetch=100
+                        LocalLimitExec: fetch=100
+                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // repartition should happen prior to the filter to maximize parallelism
     // Expect no repartition to happen for local limit (DataSourceExec)
 
@@ -2087,13 +2036,13 @@ fn repartition_ignores_union() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-UnionExec
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    UnionExec
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Expect no repartition of DataSourceExec
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
@@ -2116,9 +2065,9 @@ fn repartition_through_sort_preserving_merge() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -2144,9 +2093,9 @@ fn repartition_ignores_sort_preserving_merge() -> Result<()> {
     // Test: run EnforceDistribution, then EnforceSort
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [c@2 ASC]
-  DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [c@2 ASC]
+      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     // should not sort (as the data was already sorted)
     // should not repartition, since increased parallelism is not beneficial for SortPReservingMerge
 
@@ -2154,10 +2103,10 @@ SortPreservingMergeExec: [c@2 ASC]
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -2182,11 +2131,11 @@ fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> {
     // Test: run EnforceDistribution, then EnforceSort.
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [c@2 ASC]
-  UnionExec
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [c@2 ASC]
+      UnionExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     //
     // should not repartition / sort (as the data was already sorted)
 
@@ -2194,12 +2143,12 @@ SortPreservingMergeExec: [c@2 ASC]
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    UnionExec
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        UnionExec
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -2226,11 +2175,11 @@ fn repartition_does_not_destroy_sort() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortRequiredExec: [d@3 ASC]
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
-");
+    SortRequiredExec: [d@3 ASC]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
+    ");
     // during repartitioning ordering is preserved
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
@@ -2266,13 +2215,13 @@ fn repartition_does_not_destroy_sort_more_complex() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-UnionExec
-  SortRequiredExec: [c@2 ASC]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    UnionExec
+      SortRequiredExec: [c@2 ASC]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // union input 1: no repartitioning
     // union input 2: should repartition
     //
@@ -2309,23 +2258,23 @@ fn repartition_transitively_with_projection() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [sum@0 ASC]
-  SortExec: expr=[sum@0 ASC], preserve_partitioning=[true]
-    ProjectionExec: expr=[a@0 + b@1 as sum]
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortPreservingMergeExec: [sum@0 ASC]
+      SortExec: expr=[sum@0 ASC], preserve_partitioning=[true]
+        ProjectionExec: expr=[a@0 + b@1 as sum]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[sum@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    ProjectionExec: expr=[a@0 + b@1 as sum]
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[sum@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        ProjectionExec: expr=[a@0 + b@1 as sum]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Since this projection is not trivial, increasing parallelism is beneficial
 
     Ok(())
@@ -2357,10 +2306,10 @@ fn repartition_ignores_transitively_with_projection() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortRequiredExec: [c@2 ASC]
-  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
-    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortRequiredExec: [c@2 ASC]
+      ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     // Since this projection is trivial, increasing parallelism is not beneficial
 
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
@@ -2394,10 +2343,10 @@ fn repartition_transitively_past_sort_with_projection() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Since this projection is trivial, increasing parallelism is not beneficial
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
@@ -2419,12 +2368,12 @@ fn repartition_transitively_past_sort_with_filter() -> Result<()> {
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [a@0 ASC]
-  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-    FilterExec: c@2 = 0
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortPreservingMergeExec: [a@0 ASC]
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+        FilterExec: c@2 = 0
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
 
@@ -2432,12 +2381,12 @@ SortPreservingMergeExec: [a@0 ASC]
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    FilterExec: c@2 = 0
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        FilterExec: c@2 = 0
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Expect repartition on the input of the filter (as it can benefit from additional parallelism)
 
     Ok(())
@@ -2468,13 +2417,13 @@ fn repartition_transitively_past_sort_with_projection_and_filter() -> Result<()>
     let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [a@0 ASC]
-  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortPreservingMergeExec: [a@0 ASC]
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+        ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
     // repartition is lowest down
@@ -2483,13 +2432,13 @@ SortPreservingMergeExec: [a@0 ASC]
     let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -2509,11 +2458,11 @@ fn parallelization_single_partition() -> Result<()> {
         test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_parquet_distrib,
                                                                                         @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
@@ -2521,11 +2470,11 @@ AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
     let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_csv_distrib,
                                                                                         @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_csv_distrib, plan_csv_sort);
 
@@ -2557,10 +2506,10 @@ fn parallelization_multiple_files() -> Result<()> {
         test_config_concurrency_3.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_3_distrib,
                                                                                         @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    DataSourceExec: file_groups={3 groups: [[x:0..50], [y:0..100], [x:50..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        DataSourceExec: file_groups={3 groups: [[x:0..50], [y:0..100], [x:50..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
     let plan_3_sort =
         test_config_concurrency_3.to_plan(plan.clone(), &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_3_distrib, plan_3_sort);
@@ -2570,10 +2519,10 @@ SortRequiredExec: [a@0 ASC]
         test_config_concurrency_8.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_8_distrib,
                                                                                         @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    DataSourceExec: file_groups={8 groups: [[x:0..25], [y:0..25], [x:25..50], [y:25..50], [x:50..75], [y:50..75], [x:75..100], [y:75..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        DataSourceExec: file_groups={8 groups: [[x:0..25], [y:0..25], [x:25..50], [y:25..50], [x:50..75], [y:50..75], [x:75..100], [y:75..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
     let plan_8_sort = test_config_concurrency_8.to_plan(plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_8_distrib, plan_8_sort);
 
@@ -2597,11 +2546,15 @@ fn parallelization_compressed_csv() -> Result<()> {
         for compression_type in compression_types {
             let plan = aggregate_exec_with_alias(
                 DataSourceExec::from_data_source(
-                    FileScanConfigBuilder::new(
-                        ObjectStoreUrl::parse("test:///").unwrap(),
-                        schema(),
-                        Arc::new(CsvSource::new(false, b',', b'"')),
-                    )
+                    FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+                        let options = CsvOptions {
+                            has_header: Some(false),
+                            delimiter: b',',
+                            quote: b'"',
+                            ..Default::default()
+                        };
+                        Arc::new(CsvSource::new(schema()).with_csv_options(options))
+                    })
                     .with_file(PartitionedFile::new("x".to_string(), 100))
                     .with_file_compression_type(compression_type)
                     .build(),
@@ -2617,21 +2570,21 @@ fn parallelization_compressed_csv() -> Result<()> {
                 // Compressed files cannot be partitioned
                 assert_plan!(plan_distrib,
                     @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+                AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+                  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+                    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+                      RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+                ");
             } else {
                 // Uncompressed files can be partitioned
                 assert_plan!(plan_distrib,
                     @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+                AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+                  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+                    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+                      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+                ");
             }
 
             let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
@@ -2656,11 +2609,11 @@ fn parallelization_two_partitions() -> Result<()> {
         test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_parquet_distrib,
                                                                                     @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Plan already has two partitions
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
@@ -2668,11 +2621,11 @@ AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
     // Test: with csv
     let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_csv_distrib, @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     // Plan already has two partitions
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_csv_distrib, plan_csv_sort);
@@ -2696,11 +2649,11 @@ fn parallelization_two_partitions_into_four() -> Result<()> {
     // Multiple source files split across partitions
     assert_plan!(plan_parquet_distrib,
                                                                                     @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Multiple source files split across partitions
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
@@ -2709,11 +2662,11 @@ AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
     let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     // Multiple source files split across partitions
     assert_plan!(plan_csv_distrib, @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     // Multiple source files split across partitions
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_csv_distrib, plan_csv_sort);
@@ -2738,11 +2691,11 @@ fn parallelization_sorted_limit() -> Result<()> {
     let plan_parquet_distrib =
         test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_parquet_distrib, @r"
-GlobalLimitExec: skip=0, fetch=100
-  LocalLimitExec: fetch=100
-    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    GlobalLimitExec: skip=0, fetch=100
+      LocalLimitExec: fetch=100
+        SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // data is sorted so can't repartition here
     // Doesn't parallelize for SortExec without preserve_partitioning
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
@@ -2752,11 +2705,11 @@ GlobalLimitExec: skip=0, fetch=100
     let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_csv_distrib,
         @r"
-GlobalLimitExec: skip=0, fetch=100
-  LocalLimitExec: fetch=100
-    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    GlobalLimitExec: skip=0, fetch=100
+      LocalLimitExec: fetch=100
+        SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     // data is sorted so can't repartition here
     // Doesn't parallelize for SortExec without preserve_partitioning
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
@@ -2787,14 +2740,14 @@ fn parallelization_limit_with_filter() -> Result<()> {
     // SortExec doesn't benefit from input partitioning
     assert_plan!(plan_parquet_distrib,
         @r"
-GlobalLimitExec: skip=0, fetch=100
-  CoalescePartitionsExec
-    LocalLimitExec: fetch=100
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    GlobalLimitExec: skip=0, fetch=100
+      CoalescePartitionsExec
+        LocalLimitExec: fetch=100
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+              SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
@@ -2805,14 +2758,14 @@ GlobalLimitExec: skip=0, fetch=100
     // SortExec doesn't benefit from input partitioning
     assert_plan!(plan_csv_distrib,
                                                                                     @r"
-GlobalLimitExec: skip=0, fetch=100
-  CoalescePartitionsExec
-    LocalLimitExec: fetch=100
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    GlobalLimitExec: skip=0, fetch=100
+      CoalescePartitionsExec
+        LocalLimitExec: fetch=100
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+              SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_csv_distrib, plan_csv_sort);
 
@@ -2891,13 +2844,13 @@ fn parallelization_union_inputs() -> Result<()> {
         test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_parquet_distrib,
                                                                                     @r"
-UnionExec
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    UnionExec
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     // Union doesn't benefit from input partitioning - no parallelism
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
@@ -2906,13 +2859,13 @@ UnionExec
     let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_csv_distrib,
                                                                                     @r"
-UnionExec
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-");
+    UnionExec
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
     // Union doesn't benefit from input partitioning - no parallelism
     let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_csv_distrib, plan_csv_sort);
@@ -3118,9 +3071,9 @@ fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()>
     // data should not be repartitioned / resorted
     assert_plan!(plan_parquet_distrib,
                                                                                     @r"
-ProjectionExec: expr=[a@0 as a2, c@2 as c2]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
@@ -3153,18 +3106,18 @@ fn parallelization_ignores_transitively_with_projection_csv() -> Result<()> {
     let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv);
     assert_plan!(plan_csv,
                                                                                         @r"
-SortPreservingMergeExec: [c2@1 ASC]
-  ProjectionExec: expr=[a@0 as a2, c@2 as c2]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
-");
+    SortPreservingMergeExec: [c2@1 ASC]
+      ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+    ");
 
     let test_config = TestConfig::default();
     let plan_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-ProjectionExec: expr=[a@0 as a2, c@2 as c2]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
-");
+    ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+    ");
     // Expected Outcome:
     // data should not be repartitioned / resorted
     let plan_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
@@ -3180,21 +3133,21 @@ fn remove_redundant_roundrobins() -> Result<()> {
     let physical_plan = repartition_exec(filter_exec(repartition));
     assert_plan!(physical_plan,
                                                                                         @r"
-RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
-  FilterExec: c@2 = 0
     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     let test_config = TestConfig::default();
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-FilterExec: c@2 = 0
-  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3222,11 +3175,11 @@ fn remove_unnecessary_spm_after_filter() -> Result<()> {
     // This is still satisfied since, after filter that column is constant.
     assert_plan!(plan_distrib,
                                                                                         @r"
-CoalescePartitionsExec
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c@2 ASC
-      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    CoalescePartitionsExec
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c@2 ASC
+          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3251,11 +3204,11 @@ fn preserve_ordering_through_repartition() -> Result<()> {
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [d@3 ASC]
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=d@3 ASC
-      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [d@3 ASC]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=d@3 ASC
+          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3279,23 +3232,23 @@ fn do_not_preserve_ordering_through_repartition() -> Result<()> {
     // Test: run EnforceDistribution, then EnforceSort.
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [a@0 ASC]
-  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-    FilterExec: c@2 = 0
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [a@0 ASC]
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+        FilterExec: c@2 = 0
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    FilterExec: c@2 = 0
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        FilterExec: c@2 = 0
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -3314,11 +3267,11 @@ fn no_need_for_sort_after_filter() -> Result<()> {
     let test_config = TestConfig::default();
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib, @r"
-CoalescePartitionsExec
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    CoalescePartitionsExec
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
     // After CoalescePartitionsExec c is still constant. Hence c@2 ASC ordering is already satisfied.
@@ -3350,24 +3303,24 @@ fn do_not_preserve_ordering_through_repartition2() -> Result<()> {
     // Test: run EnforceDistribution, then EnforceSort.
     assert_plan!(plan_distrib,
                                                                                         @r"
-SortPreservingMergeExec: [a@0 ASC]
-  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-    FilterExec: c@2 = 0
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortPreservingMergeExec: [a@0 ASC]
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+        FilterExec: c@2 = 0
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_sort,
                                                                                         @r"
-SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+              DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -3387,10 +3340,10 @@ fn do_not_preserve_ordering_through_repartition3() -> Result<()> {
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-FilterExec: c@2 = 0
-  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
-    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3410,10 +3363,10 @@ fn do_not_put_sort_when_input_is_invalid() -> Result<()> {
     // Ordering requirement of sort required exec is NOT satisfied
     // by existing ordering at the source.
     assert_plan!(physical_plan, @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     let mut config = ConfigOptions::new();
     config.execution.target_partitions = 10;
@@ -3423,11 +3376,11 @@ SortRequiredExec: [a@0 ASC]
     // Since at the start of the rule ordering requirement is not satisfied
     // EnforceDistribution rule doesn't satisfy this requirement either.
     assert_plan!(dist_plan, @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -3446,10 +3399,10 @@ fn put_sort_when_input_is_valid() -> Result<()> {
     // Ordering requirement of sort required exec is satisfied
     // by existing ordering at the source.
     assert_plan!(physical_plan, @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     let mut config = ConfigOptions::new();
     config.execution.target_partitions = 10;
@@ -3459,10 +3412,10 @@ SortRequiredExec: [a@0 ASC]
     // Since at the start of the rule ordering requirement is satisfied
     // EnforceDistribution rule satisfy this requirement also.
     assert_plan!(dist_plan, @r"
-SortRequiredExec: [a@0 ASC]
-  FilterExec: c@2 = 0
-    DataSourceExec: file_groups={10 groups: [[x:0..20], [y:0..20], [x:20..40], [y:20..40], [x:40..60], [y:40..60], [x:60..80], [y:60..80], [x:80..100], [y:80..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
-");
+    SortRequiredExec: [a@0 ASC]
+      FilterExec: c@2 = 0
+        DataSourceExec: file_groups={10 groups: [[x:0..20], [y:0..20], [x:20..40], [y:20..40], [x:40..60], [y:40..60], [x:60..80], [y:60..80], [x:80..100], [y:80..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    ");
 
     Ok(())
 }
@@ -3486,10 +3439,10 @@ fn do_not_add_unnecessary_hash() -> Result<()> {
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3516,14 +3469,14 @@ fn do_not_add_unnecessary_hash2() -> Result<()> {
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
     AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
-      RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
-          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
-");
+      AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+        AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+            AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+              RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+                DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
     // Since hash requirements of this operator is satisfied. There shouldn't be
     // a hash repartition here
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
@@ -3537,17 +3490,15 @@ fn optimize_away_unnecessary_repartition() -> Result<()> {
     let physical_plan = coalesce_partitions_exec(repartition_exec(parquet_exec()));
     assert_plan!(physical_plan,
                                                                                         @r"
-CoalescePartitionsExec
-  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    CoalescePartitionsExec
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     let test_config = TestConfig::default();
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
-                                                                                        @r"
-DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+                                                                                        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3561,23 +3512,23 @@ fn optimize_away_unnecessary_repartition2() -> Result<()> {
     )));
     assert_plan!(physical_plan,
                                                                                         @r"
-FilterExec: c@2 = 0
-  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-    CoalescePartitionsExec
-      FilterExec: c@2 = 0
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        CoalescePartitionsExec
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     let test_config = TestConfig::default();
     let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     assert_plan!(plan_distrib,
                                                                                         @r"
-FilterExec: c@2 = 0
-  FilterExec: c@2 = 0
-    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    FilterExec: c@2 = 0
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
     assert_plan!(plan_distrib, plan_sort);
 
@@ -3601,29 +3552,29 @@ async fn test_distribute_sort_parquet() -> Result<()> {
     // prior to optimization, this is the starting plan
     assert_plan!(physical_plan,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // what the enforce distribution run does.
     let plan_distribution =
         test_config.to_plan(physical_plan.clone(), &[Run::Distribution]);
     assert_plan!(plan_distribution,
                                                                                         @r"
-SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
-  CoalescePartitionsExec
-    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
 
     // what the sort parallelization (in enforce sorting), does after the enforce distribution changes
     let plan_both =
         test_config.to_plan(physical_plan, &[Run::Distribution, Run::Sorting]);
     assert_plan!(plan_both,
                                                                                         @r"
-SortPreservingMergeExec: [c@2 ASC]
-  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
-    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
-");
+    SortPreservingMergeExec: [c@2 ASC]
+      SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+        DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
     Ok(())
 }
 
@@ -3650,10 +3601,10 @@ async fn test_distribute_sort_memtable() -> Result<()> {
     // this is the final, optimized plan
     assert_plan!(physical_plan,
                                                                                         @r"
-SortPreservingMergeExec: [id@0 ASC NULLS LAST]
-  SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
-    DataSourceExec: partitions=3, partition_sizes=[34, 33, 33]
-");
+    SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+      SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+        DataSourceExec: partitions=3, partition_sizes=[34, 33, 33]
+    ");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index e3a0eb7e1aa6f..47e3adb455117 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -19,19 +19,20 @@ use std::sync::Arc;
 
 use crate::memory_limit::DummyStreamPartition;
 use crate::physical_optimizer::test_utils::{
-    aggregate_exec, bounded_window_exec, bounded_window_exec_with_partition,
-    check_integrity, coalesce_batches_exec, coalesce_partitions_exec, create_test_schema,
-    create_test_schema2, create_test_schema3, filter_exec, global_limit_exec,
-    hash_join_exec, local_limit_exec, memory_exec, parquet_exec, parquet_exec_with_sort,
-    projection_exec, repartition_exec, sort_exec, sort_exec_with_fetch, sort_expr,
-    sort_expr_options, sort_merge_join_exec, sort_preserving_merge_exec,
+    RequirementsTestExec, aggregate_exec, bounded_window_exec,
+    bounded_window_exec_with_partition, check_integrity, coalesce_batches_exec,
+    coalesce_partitions_exec, create_test_schema, create_test_schema2,
+    create_test_schema3, filter_exec, global_limit_exec, hash_join_exec,
+    local_limit_exec, memory_exec, parquet_exec, parquet_exec_with_sort, projection_exec,
+    repartition_exec, sort_exec, sort_exec_with_fetch, sort_expr, sort_expr_options,
+    sort_merge_join_exec, sort_preserving_merge_exec,
     sort_preserving_merge_exec_with_fetch, spr_repartition_exec, stream_exec_ordered,
-    union_exec, RequirementsTestExec,
+    union_exec,
 };
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::config::ConfigOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::tree_node::{TreeNode, TransformedResult};
 use datafusion_common::{Result,  TableReference};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
@@ -65,17 +66,22 @@ use datafusion_execution::TaskContext;
 use datafusion_catalog::streaming::StreamingTable;
 
 use futures::StreamExt;
-use insta::{assert_snapshot, Settings};
+use insta::{Settings, assert_snapshot};
 
 /// Create a sorted Csv exec
 fn csv_exec_sorted(
     schema: &SchemaRef,
     sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
 ) -> Arc<dyn ExecutionPlan> {
+    let options = CsvOptions {
+        has_header: Some(false),
+        delimiter: 0,
+        quote: 0,
+        ..Default::default()
+    };
     let mut builder = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema.clone(),
-        Arc::new(CsvSource::new(false, 0, 0)),
+        Arc::new(CsvSource::new(schema.clone()).with_csv_options(options)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100));
     if let Some(ordering) = LexOrdering::new(sort_exprs) {
@@ -361,8 +367,8 @@ async fn test_union_inputs_different_sorted2() -> Result<()> {
 
 #[tokio::test]
 // Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting
-async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true(
-) -> Result<()> {
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true()
+-> Result<()> {
     assert_snapshot!(
         union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
         @r"
@@ -387,8 +393,8 @@ async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_reparti
 
 #[tokio::test]
 // Test with `repartition_sorts` disabled, causing a full resort of the data
-async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false(
-) -> Result<()> {
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false()
+-> Result<()> {
     assert_snapshot!(
         union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
         @r"
@@ -659,21 +665,13 @@ async fn test_union_inputs_different_sorted7() -> Result<()> {
     // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering.
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r"
-    Input Plan:
+    Input / Optimized Plan:
     SortPreservingMergeExec: [nullable_col@0 ASC]
       UnionExec
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
-
-    Optimized Plan:
-    SortPreservingMergeExec: [nullable_col@0 ASC]
-      UnionExec
-        SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
-        SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
-          DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     ");
     // Union preserves the inputs ordering, and we should not change any of the SortExecs under UnionExec
 
@@ -773,8 +771,8 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> {
 }
 
 #[tokio::test]
-async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
-) -> Result<()> {
+async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns()
+-> Result<()> {
     let schema = create_test_schema()?;
     let source = parquet_exec(schema.clone());
     let ordering = [sort_expr_options(
@@ -1072,8 +1070,8 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> {
 }
 
 #[tokio::test]
-async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_requirement(
-) -> Result<()> {
+async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_requirement()
+-> Result<()> {
     let schema = create_test_schema()?;
     let source = parquet_exec(schema.clone());
     let ordering = [sort_expr_options(
@@ -1342,12 +1340,12 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
                 assert_snapshot!(test.run(), @r"
                 Input Plan:
                 SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
                 Optimized Plan:
-                SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                   SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                   SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1359,13 +1357,13 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
                 assert_snapshot!(test.run(), @r"
                 Input Plan:
                 SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
                 Optimized Plan:
                 SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
                       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1432,12 +1430,12 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
                 assert_snapshot!(test.run(), @r"
                 Input Plan:
                 SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
                 Optimized Plan:
-                SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                   SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                   SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]
@@ -1449,12 +1447,12 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
                 assert_snapshot!(test.run(), @r"
                 Input Plan:
                 SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
                 Optimized Plan:
-                SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                   SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                   SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]
@@ -1466,13 +1464,13 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
                 assert_snapshot!(test.run(), @r"
                 Input Plan:
                 SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
                 Optimized Plan:
                 SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false]
-                  SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
+                  SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
                     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
                       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
                     SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1515,13 +1513,13 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> {
     assert_snapshot!(test.run(), @r"
     Input Plan:
     SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]
-      SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
+      SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
     Optimized Plan:
     SortExec: expr=[col_b@3 ASC, nullable_col@0 ASC], preserve_partitioning=[false]
-      SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
+      SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
         SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
         SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1542,12 +1540,12 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> {
     assert_snapshot!(test.run(), @r"
     Input Plan:
     SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]
-      SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
+      SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
 
     Optimized Plan:
-    SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
+    SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
       SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
       SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]
@@ -1626,13 +1624,13 @@ async fn test_with_lost_ordering_unbounded() -> Result<()> {
     SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
       CoalescePartitionsExec
         RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]
 
     Optimized Plan:
     SortPreservingMergeExec: [a@0 ASC]
       RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]
     ");
 
@@ -1644,13 +1642,13 @@ async fn test_with_lost_ordering_unbounded() -> Result<()> {
     SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
       CoalescePartitionsExec
         RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]
 
     Optimized Plan:
     SortPreservingMergeExec: [a@0 ASC]
       RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]
     ");
 
@@ -1669,7 +1667,7 @@ async fn test_with_lost_ordering_bounded() -> Result<()> {
     SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
       CoalescePartitionsExec
         RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false
     ");
 
@@ -1681,14 +1679,14 @@ async fn test_with_lost_ordering_bounded() -> Result<()> {
     SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
       CoalescePartitionsExec
         RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false
 
     Optimized Plan:
     SortPreservingMergeExec: [a@0 ASC]
       SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
         RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false
     ");
 
@@ -1710,7 +1708,7 @@ async fn test_do_not_pushdown_through_spm() -> Result<()> {
     Input / Optimized Plan:
     SortExec: expr=[b@1 ASC], preserve_partitioning=[false]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false
     ");
 
@@ -1739,13 +1737,13 @@ async fn test_pushdown_through_spm() -> Result<()> {
     Input Plan:
     SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false
 
     Optimized Plan:
     SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
       SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true]
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false
     ");
     Ok(())
@@ -1769,7 +1767,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
     BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
 
@@ -1964,7 +1962,7 @@ async fn test_remove_unnecessary_sort2() -> Result<()> {
     assert_snapshot!(test.run(), @r"
     Input Plan:
     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
           SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
             SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
@@ -2011,7 +2009,7 @@ async fn test_remove_unnecessary_sort3() -> Result<()> {
     AggregateExec: mode=Final, gby=[], aggr=[]
       SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]
-          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
             SortPreservingMergeExec: [non_nullable_col@1 ASC]
               SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]
                 DataSourceExec: partitions=1, partition_sizes=[0]
@@ -2360,7 +2358,7 @@ async fn test_commutativity() -> Result<()> {
 
     assert_snapshot!(displayable(orig_plan.as_ref()).indent(true), @r#"
     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
         BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
index ef233e222912c..de7611ff211a5 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
@@ -31,7 +31,7 @@ use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::windows::{
-    create_window_expr, BoundedWindowAggExec, WindowAggExec,
+    BoundedWindowAggExec, WindowAggExec, create_window_expr,
 };
 use datafusion_physical_plan::{ExecutionPlan, InputOrderMode};
 use insta::assert_snapshot;
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
index de61149508904..f480de71d6285 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
@@ -18,7 +18,7 @@
 use std::sync::{Arc, LazyLock};
 
 use arrow::{
-    array::record_batch,
+    array::{Float64Array, Int32Array, RecordBatch, StringArray, record_batch},
     datatypes::{DataType, Field, Schema, SchemaRef},
     util::pretty::pretty_format_batches,
 };
@@ -27,8 +27,8 @@ use datafusion::{
     assert_batches_eq,
     logical_expr::Operator,
     physical_plan::{
-        expressions::{BinaryExpr, Column, Literal},
         PhysicalExpr,
+        expressions::{BinaryExpr, Column, Literal},
     },
     prelude::{ParquetReadOptions, SessionConfig, SessionContext},
     scalar::ScalarValue,
@@ -36,20 +36,25 @@ use datafusion::{
 use datafusion_catalog::memory::DataSourceExec;
 use datafusion_common::config::ConfigOptions;
 use datafusion_datasource::{
-    file_groups::FileGroup, file_scan_config::FileScanConfigBuilder, PartitionedFile,
+    PartitionedFile, file_groups::FileGroup, file_scan_config::FileScanConfigBuilder,
 };
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::math::random::RandomFunc;
-use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_functions_aggregate::{
+    count::count_udaf,
+    min_max::{max_udaf, min_udaf},
+};
+use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col};
 use datafusion_physical_expr::{
-    aggregate::AggregateExprBuilder, Partitioning, ScalarFunctionExpr,
+    Partitioning, ScalarFunctionExpr,
+    aggregate::{AggregateExprBuilder, AggregateFunctionExpr},
 };
-use datafusion_physical_expr::{expressions::col, LexOrdering, PhysicalSortExpr};
 use datafusion_physical_optimizer::{
-    filter_pushdown::FilterPushdown, PhysicalOptimizerRule,
+    PhysicalOptimizerRule, filter_pushdown::FilterPushdown,
 };
 use datafusion_physical_plan::{
+    ExecutionPlan,
     aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy},
     coalesce_batches::CoalesceBatchesExec,
     coalesce_partitions::CoalescePartitionsExec,
@@ -57,13 +62,13 @@ use datafusion_physical_plan::{
     filter::FilterExec,
     repartition::RepartitionExec,
     sorts::sort::SortExec,
-    ExecutionPlan,
 };
 
 use datafusion_physical_plan::union::UnionExec;
 use futures::StreamExt;
-use object_store::{memory::InMemory, ObjectStore};
-use util::{format_plan_for_test, OptimizationTest, TestNode, TestScanBuilder};
+use object_store::{ObjectStore, memory::InMemory};
+use regex::Regex;
+use util::{OptimizationTest, TestNode, TestScanBuilder, format_plan_for_test};
 
 use crate::physical_optimizer::filter_pushdown::util::TestSource;
 
@@ -177,12 +182,14 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() {
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
     // Create build side with limited values
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8View, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0])
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8View, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0])
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8View, false),
@@ -194,12 +201,14 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
-        ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
+            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("d", DataType::Utf8, false),
         Field::new("e", DataType::Utf8View, false),
@@ -272,13 +281,14 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() {
     stream.next().await.unwrap().unwrap();
 
     // Test that filters are pushed down correctly to each side of the join
+    // NOTE: We dropped the CASE expression here because we now optimize that away if there's only 1 partition
     insta::assert_snapshot!(
         format_plan_for_test(&plan),
         @r"
     - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false], filter=[e@4 IS NULL OR e@4 < bb]
     -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
     -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ]
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ]
     "
     );
 }
@@ -293,12 +303,14 @@ async fn test_static_filter_pushdown_through_hash_join() {
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
     // Create build side with limited values
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8View, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0])
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8View, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0])
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8View, false),
@@ -310,12 +322,14 @@ async fn test_static_filter_pushdown_through_hash_join() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
-        ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
+            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("d", DataType::Utf8, false),
         Field::new("e", DataType::Utf8View, false),
@@ -556,15 +570,14 @@ fn test_pushdown_through_aggregates_on_grouping_columns() {
         FilterExec::try_new(col_lit_predicate("a", "foo", &schema()), coalesce).unwrap(),
     );
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
     let group_by = PhysicalGroupBy::new_single(vec![
         (col("a", &schema()).unwrap(), "a".to_string()),
         (col("b", &schema()).unwrap(), "b".to_string()),
@@ -859,20 +872,17 @@ async fn test_topk_filter_passes_through_coalesce_partitions() {
     ];
 
     // Create a source that supports all batches
-    let source = Arc::new(TestSource::new(true, batches));
-
-    let base_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test://").unwrap(),
-        Arc::clone(&schema()),
-        source,
-    )
-    .with_file_groups(vec![
-        // Partition 0
-        FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]),
-        // Partition 1
-        FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]),
-    ])
-    .build();
+    let source = Arc::new(TestSource::new(schema(), true, batches));
+
+    let base_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source)
+            .with_file_groups(vec![
+                // Partition 0
+                FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]),
+                // Partition 1
+                FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]),
+            ])
+            .build();
 
     let scan = DataSourceExec::from_data_source(base_config);
 
@@ -972,12 +982,14 @@ async fn test_hashjoin_dynamic_filter_pushdown() {
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
     // Create build side with limited values
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -989,12 +1001,14 @@ async fn test_hashjoin_dynamic_filter_pushdown() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-        ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1077,7 +1091,7 @@ async fn test_hashjoin_dynamic_filter_pushdown() {
         @r"
     - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
     -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb ]
+    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
     "
     );
 }
@@ -1140,12 +1154,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     // +---------------+------------------------------------------------------------+
 
     // Create build side with limited values
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1157,12 +1173,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-        ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1308,10 +1326,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
     -         CoalesceBatchesExec: target_batch_size=8192
     -           RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1
-    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= ab AND a@0 <= ab AND b@1 >= bb AND b@1 <= bb OR a@0 >= aa AND a@0 <= aa AND b@1 >= ba AND b@1 <= ba ]
+    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ CASE hash_repartition % 12 WHEN 2 THEN a@0 >= ab AND a@0 <= ab AND b@1 >= bb AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:ab,c1:bb}]) WHEN 4 THEN a@0 >= aa AND a@0 <= aa AND b@1 >= ba AND b@1 <= ba AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}]) ELSE false END ]
     "
     );
 
+    // When hash collisions force all data into a single partition, we optimize away the CASE expression.
+    // This avoids calling create_hashes() for every row on the probe side, since hash % 1 == 0 always,
+    // meaning the WHEN 0 branch would always match. This optimization is also important for primary key
+    // joins or any scenario where all build-side data naturally lands in one partition.
     #[cfg(feature = "force_hash_collisions")]
     insta::assert_snapshot!(
         format!("{}", format_plan_for_test(&plan)),
@@ -1325,7 +1347,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
     -         CoalesceBatchesExec: target_batch_size=8192
     -           RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1
-    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb ]
+    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
     "
     );
 
@@ -1356,12 +1378,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     use datafusion_common::JoinType;
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1373,12 +1397,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-        ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1502,7 +1528,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
     -         CoalesceBatchesExec: target_batch_size=8192
     -           RepartitionExec: partitioning=Hash([a@0, b@1], 12), input_partitions=1
-    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb ]
+    -             DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
     "
     );
 
@@ -1535,10 +1561,9 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() {
 
     // Create test data for three tables: t1, t2, t3
     // t1: small table with limited values (will be build side of outer join)
-    let t1_batches =
-        vec![
-            record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(),
-        ];
+    let t1_batches = vec![
+        record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(),
+    ];
     let t1_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("x", DataType::Float64, false),
@@ -1549,12 +1574,14 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() {
         .build();
 
     // t2: larger table (will be probe side of inner join, build side of outer join)
-    let t2_batches = vec![record_batch!(
-        ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]),
-        ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]),
-        ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0])
-    )
-    .unwrap()];
+    let t2_batches = vec![
+        record_batch!(
+            ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]),
+            ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]),
+            ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0])
+        )
+        .unwrap(),
+    ];
     let t2_schema = Arc::new(Schema::new(vec![
         Field::new("b", DataType::Utf8, false),
         Field::new("c", DataType::Utf8, false),
@@ -1566,11 +1593,13 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() {
         .build();
 
     // t3: largest table (will be probe side of inner join)
-    let t3_batches = vec![record_batch!(
-        ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]),
-        ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
-    )
-    .unwrap()];
+    let t3_batches = vec![
+        record_batch!(
+            ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]),
+            ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
+        )
+        .unwrap(),
+    ];
     let t3_schema = Arc::new(Schema::new(vec![
         Field::new("d", DataType::Utf8, false),
         Field::new("z", DataType::Float64, false),
@@ -1670,8 +1699,8 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() {
     - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
     -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
     -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab ]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb ]
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ]
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ]
     "
     );
 }
@@ -1682,12 +1711,14 @@ async fn test_hashjoin_parent_filter_pushdown() {
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
     // Create build side with limited values
-    let build_batches = vec![record_batch!(
-        ("a", Utf8, ["aa", "ab"]),
-        ("b", Utf8, ["ba", "bb"]),
-        ("c", Float64, [1.0, 2.0])
-    )
-    .unwrap()];
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0])
+        )
+        .unwrap(),
+    ];
     let build_side_schema = Arc::new(Schema::new(vec![
         Field::new("a", DataType::Utf8, false),
         Field::new("b", DataType::Utf8, false),
@@ -1699,12 +1730,14 @@ async fn test_hashjoin_parent_filter_pushdown() {
         .build();
 
     // Create probe side with more values
-    let probe_batches = vec![record_batch!(
-        ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-        ("e", Utf8, ["ba", "bb", "bc", "bd"]),
-        ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-    )
-    .unwrap()];
+    let probe_batches = vec![
+        record_batch!(
+            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("e", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
+        )
+        .unwrap(),
+    ];
     let probe_side_schema = Arc::new(Schema::new(vec![
         Field::new("d", DataType::Utf8, false),
         Field::new("e", DataType::Utf8, false),
@@ -1827,7 +1860,7 @@ STORED AS PARQUET;
     assert!(explain.contains("output_rows=128")); // Read 1 row group
     assert!(explain.contains("t@0 < 1372708809")); // Dynamic filter was applied
     assert!(
-        explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99872"),
+        explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K"),
         "{explain}"
     );
     // Pushdown pruned most rows
@@ -1892,16 +1925,438 @@ fn col_lit_predicate(
     ))
 }
 
+// ==== Aggregate Dynamic Filter tests ====
+
+// ---- Test Utilities ----
+struct AggregateDynFilterCase<'a> {
+    schema: SchemaRef,
+    batches: Vec<RecordBatch>,
+    aggr_exprs: Vec<AggregateFunctionExpr>,
+    expected_before: Option<&'a str>,
+    expected_after: Option<&'a str>,
+    scan_support: bool,
+}
+
+async fn run_aggregate_dyn_filter_case(case: AggregateDynFilterCase<'_>) {
+    let AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs,
+        expected_before,
+        expected_after,
+        scan_support,
+    } = case;
+
+    let scan = TestScanBuilder::new(Arc::clone(&schema))
+        .with_support(scan_support)
+        .with_batches(batches)
+        .build();
+
+    let aggr_exprs: Vec<_> = aggr_exprs
+        .into_iter()
+        .map(|expr| Arc::new(expr) as Arc<AggregateFunctionExpr>)
+        .collect();
+    let aggr_len = aggr_exprs.len();
+
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::new_single(vec![]),
+            aggr_exprs,
+            vec![None; aggr_len],
+            scan,
+            Arc::clone(&schema),
+        )
+        .unwrap(),
+    );
+
+    let mut config = ConfigOptions::default();
+    config.execution.parquet.pushdown_filters = true;
+    config.optimizer.enable_dynamic_filter_pushdown = true;
+
+    let optimized = FilterPushdown::new_post_optimization()
+        .optimize(plan, &config)
+        .unwrap();
+
+    let before = format_plan_for_test(&optimized);
+    if let Some(expected) = expected_before {
+        assert!(
+            before.contains(expected),
+            "expected `{expected}` before execution, got: {before}"
+        );
+    } else {
+        assert!(
+            !before.contains("DynamicFilter ["),
+            "dynamic filter unexpectedly present before execution: {before}"
+        );
+    }
+
+    let session_ctx = SessionContext::new();
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let task_ctx = session_ctx.state().task_ctx();
+    let mut stream = optimized.execute(0, Arc::clone(&task_ctx)).unwrap();
+    let _ = stream.next().await.transpose().unwrap();
+
+    let after = format_plan_for_test(&optimized);
+    if let Some(expected) = expected_after {
+        assert!(
+            after.contains(expected),
+            "expected `{expected}` after execution, got: {after}"
+        );
+    } else {
+        assert!(
+            !after.contains("DynamicFilter ["),
+            "dynamic filter unexpectedly present after execution: {after}"
+        );
+    }
+}
+
+// ---- Test Cases ----
+// Cases covered below:
+// 1. `min(a)` and `max(a)` baseline.
+// 2. Unsupported expression input (`min(a+1)`).
+// 3. Multiple supported columns (same column vs different columns).
+// 4. Mixed supported + unsupported aggregates.
+// 5. Entirely NULL input to surface current bound behavior.
+// 6. End-to-end tests on parquet files
+
+/// `MIN(a)`: able to pushdown dynamic filter
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_min_simple() {
+    // Single min(a) showcases the base case.
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
+
+    let min_expr =
+        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("min_a")
+            .build()
+            .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        expected_after: Some("DynamicFilter [ a@0 < 1 ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// `MAX(a)`: able to pushdown dynamic filter
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_max_simple() {
+    // Single max(a) mirrors the base case on the upper bound.
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
+
+    let max_expr =
+        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("max_a")
+            .build()
+            .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![max_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        expected_after: Some("DynamicFilter [ a@0 > 8 ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// `MIN(a+1)`: Can't pushdown dynamic filter
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_min_expression_not_supported() {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
+
+    let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+        col("a", &schema).unwrap(),
+        Operator::Plus,
+        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+    ));
+    let min_expr = AggregateExprBuilder::new(min_udaf(), vec![expr])
+        .schema(Arc::clone(&schema))
+        .alias("min_a_plus_one")
+        .build()
+        .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_expr],
+        expected_before: None,
+        expected_after: None,
+        scan_support: true,
+    })
+    .await;
+}
+
+/// `MIN(a), MAX(a)`: Pushdown dynamic filter like `(a<1) or (a>8)`
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_min_max_same_column() {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
+
+    let min_expr =
+        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("min_a")
+            .build()
+            .unwrap();
+    let max_expr =
+        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("max_a")
+            .build()
+            .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_expr, max_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// `MIN(a), MAX(b)`: Pushdown dynamic filter like `(a<1) or (b>9)`
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_min_max_different_columns() {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, true),
+        Field::new("b", DataType::Int32, true),
+    ]));
+    let batches = vec![
+        record_batch!(("a", Int32, [5, 1, 3, 8]), ("b", Int32, [7, 2, 4, 9])).unwrap(),
+    ];
+
+    let min_expr =
+        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("min_a")
+            .build()
+            .unwrap();
+    let max_expr =
+        AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("max_b")
+            .build()
+            .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_expr, max_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        expected_after: Some("DynamicFilter [ a@0 < 1 OR b@1 > 9 ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// Mix of supported/unsupported aggregates retains only the valid ones.
+/// `MIN(a), MAX(a), MAX(b), MIN(c+1)`: Pushdown dynamic filter like `(a<1) or (a>8) OR (b>12)`
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_multiple_mixed_expressions() {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, true),
+        Field::new("b", DataType::Int32, true),
+        Field::new("c", DataType::Int32, true),
+    ]));
+    let batches = vec![
+        record_batch!(
+            ("a", Int32, [5, 1, 3, 8]),
+            ("b", Int32, [10, 4, 6, 12]),
+            ("c", Int32, [100, 70, 90, 110])
+        )
+        .unwrap(),
+    ];
+
+    let min_a = AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+        .schema(Arc::clone(&schema))
+        .alias("min_a")
+        .build()
+        .unwrap();
+    let max_a = AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
+        .schema(Arc::clone(&schema))
+        .alias("max_a")
+        .build()
+        .unwrap();
+    let max_b = AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
+        .schema(Arc::clone(&schema))
+        .alias("max_b")
+        .build()
+        .unwrap();
+    let expr_c: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+        col("c", &schema).unwrap(),
+        Operator::Plus,
+        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+    ));
+    let min_c_expr = AggregateExprBuilder::new(min_udaf(), vec![expr_c])
+        .schema(Arc::clone(&schema))
+        .alias("min_c_plus_one")
+        .build()
+        .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_a, max_a, max_b, min_c_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 OR b@1 > 12 ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// Don't tighten the dynamic filter if all inputs are null
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_min_all_nulls() {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [None, None, None, None])).unwrap()];
+
+    let min_expr =
+        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("min_a")
+            .build()
+            .unwrap();
+
+    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
+        schema,
+        batches,
+        aggr_exprs: vec![min_expr],
+        expected_before: Some("DynamicFilter [ empty ]"),
+        // After reading the input it hasn't a meaningful bound to update, so the
+        // predicate `true` means don't filter out anything
+        expected_after: Some("DynamicFilter [ true ]"),
+        scan_support: true,
+    })
+    .await;
+}
+
+/// Test aggregate dynamic filter is working when reading parquet files
+///
+/// Runs 'select max(id) from test_table where id > 1', and ensure some file ranges
+/// pruned by the dynamic filter.
+#[tokio::test]
+async fn test_aggregate_dynamic_filter_parquet_e2e() {
+    let config = SessionConfig::new()
+        .with_collect_statistics(true)
+        .with_target_partitions(2)
+        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true)
+        .set_bool("datafusion.execution.parquet.pushdown_filters", true);
+    let ctx = SessionContext::new_with_config(config);
+
+    let data_path = format!(
+        "{}/tests/data/test_statistics_per_partition/",
+        env!("CARGO_MANIFEST_DIR")
+    );
+
+    ctx.register_parquet("test_table", &data_path, ParquetReadOptions::default())
+        .await
+        .unwrap();
+
+    // partition 1:
+    //   files: ..03-01(id=4), ..03-02(id=3)
+    // partition 1:
+    //   files: ..03-03(id=2), ..03-04(id=1)
+    //
+    // In partition 1, after reading the first file, the dynamic filter will be update
+    // to "id > 4", so the `..03-02` file must be able to get pruned out
+    let df = ctx
+        .sql("explain analyze select max(id) from test_table where id > 1")
+        .await
+        .unwrap();
+
+    let result = df.collect().await.unwrap();
+
+    let formatted = pretty_format_batches(&result).unwrap();
+    let explain_analyze = format!("{formatted}");
+
+    // Capture "2" from "files_ranges_pruned_statistics=4 total → 2 matched"
+    let re = Regex::new(
+        r"files_ranges_pruned_statistics\s*=\s*(\d+)\s*total\s*[→>\-]\s*(\d+)\s*matched",
+    )
+    .unwrap();
+
+    if let Some(caps) = re.captures(&explain_analyze) {
+        let matched_num: i32 = caps[2].parse().unwrap();
+        assert!(
+            matched_num < 4,
+            "Total 4 files, if some pruned, the matched count is < 4"
+        );
+    } else {
+        unreachable!("metrics should exist")
+    }
+}
+
+/// Non-partial (Single) aggregates should skip dynamic filter initialization.
+#[test]
+fn test_aggregate_dynamic_filter_not_created_for_single_mode() {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
+
+    let scan = TestScanBuilder::new(Arc::clone(&schema))
+        .with_support(true)
+        .with_batches(batches)
+        .build();
+
+    let min_expr =
+        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
+            .schema(Arc::clone(&schema))
+            .alias("min_a")
+            .build()
+            .unwrap();
+
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Single,
+            PhysicalGroupBy::new_single(vec![]),
+            vec![min_expr.into()],
+            vec![None],
+            scan,
+            Arc::clone(&schema),
+        )
+        .unwrap(),
+    );
+
+    let mut config = ConfigOptions::default();
+    config.execution.parquet.pushdown_filters = true;
+    config.optimizer.enable_dynamic_filter_pushdown = true;
+
+    let optimized = FilterPushdown::new_post_optimization()
+        .optimize(plan, &config)
+        .unwrap();
+
+    let formatted = format_plan_for_test(&optimized);
+    assert!(
+        !formatted.contains("DynamicFilter ["),
+        "dynamic filter should not be created for AggregateMode::Single: {formatted}"
+    );
+}
+
 #[tokio::test]
 async fn test_aggregate_filter_pushdown() {
     // Test that filters can pass through AggregateExec even with aggregate functions
     // when the filter references grouping columns
     // Simulates: SELECT a, COUNT(b) FROM table WHERE a = 'x' GROUP BY a
 
-    let batches =
-        vec![
-            record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-        ];
+    let batches = vec![
+        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
+    ];
 
     let scan = TestScanBuilder::new(schema())
         .with_support(true)
@@ -1962,10 +2417,9 @@ async fn test_no_pushdown_filter_on_aggregate_result() {
     // SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5
     // The filter on 'cnt' cannot be pushed down because it's an aggregate result
 
-    let batches =
-        vec![
-            record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-        ];
+    let batches = vec![
+        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
+    ];
 
     let scan = TestScanBuilder::new(schema())
         .with_support(true)
@@ -2034,15 +2488,14 @@ fn test_pushdown_filter_on_non_first_grouping_column() {
     // The filter is on 'b' (second grouping column), should push down
     let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
 
     let group_by = PhysicalGroupBy::new_single(vec![
         (col("a", &schema()).unwrap(), "a".to_string()),
@@ -2085,15 +2538,14 @@ fn test_no_pushdown_grouping_sets_filter_on_missing_column() {
     // Test that filters on columns missing from some grouping sets are NOT pushed through
     let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
 
     // Create GROUPING SETS with (a, b) and (b)
     let group_by = PhysicalGroupBy::new(
@@ -2115,6 +2567,7 @@ fn test_no_pushdown_grouping_sets_filter_on_missing_column() {
             vec![false, false], // (a, b) - both present
             vec![true, false],  // (b) - a is NULL, b present
         ],
+        true,
     );
 
     let aggregate = Arc::new(
@@ -2155,15 +2608,14 @@ fn test_pushdown_grouping_sets_filter_on_common_column() {
     // Test that filters on columns present in ALL grouping sets ARE pushed through
     let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
 
     // Create GROUPING SETS with (a, b) and (b)
     let group_by = PhysicalGroupBy::new(
@@ -2185,6 +2637,7 @@ fn test_pushdown_grouping_sets_filter_on_common_column() {
             vec![false, false], // (a, b) - both present
             vec![true, false],  // (b) - a is NULL, b present
         ],
+        true,
     );
 
     let aggregate = Arc::new(
@@ -2226,15 +2679,14 @@ fn test_pushdown_with_empty_group_by() {
     // There are no grouping columns, so the filter should still push down
     let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
 
     // Empty GROUP BY - no grouping columns
     let group_by = PhysicalGroupBy::new_single(vec![]);
@@ -2286,15 +2738,14 @@ fn test_pushdown_with_computed_grouping_key() {
     )) as Arc<dyn PhysicalExpr>;
     let filter = Arc::new(FilterExec::try_new(predicate, scan).unwrap());
 
-    let aggregate_expr =
-        vec![
-            AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
-                .schema(schema())
-                .alias("cnt")
-                .build()
-                .map(Arc::new)
-                .unwrap(),
-        ];
+    let aggregate_expr = vec![
+        AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
+            .schema(schema())
+            .alias("cnt")
+            .build()
+            .map(Arc::new)
+            .unwrap(),
+    ];
 
     let c_plus_one = Arc::new(BinaryExpr::new(
         col("c", &schema()).unwrap(),
@@ -2333,3 +2784,731 @@ fn test_pushdown_with_computed_grouping_key() {
     "
     );
 }
+
+#[tokio::test]
+async fn test_hashjoin_dynamic_filter_all_partitions_empty() {
+    use datafusion_common::JoinType;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+
+    // Test scenario where all build-side partitions are empty
+    // This validates the code path that sets the filter to `false` when no rows can match
+
+    // Create empty build side
+    let build_batches = vec![];
+    let build_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+    ]));
+    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
+        .with_support(true)
+        .with_batches(build_batches)
+        .build();
+
+    // Create probe side with some data
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac"]),
+            ("b", Utf8, ["ba", "bb", "bc"])
+        )
+        .unwrap(),
+    ];
+    let probe_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+    ]));
+    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
+        .with_support(true)
+        .with_batches(probe_batches)
+        .build();
+
+    // Create RepartitionExec nodes for both sides
+    let partition_count = 4;
+
+    let build_hash_exprs = vec![
+        col("a", &build_side_schema).unwrap(),
+        col("b", &build_side_schema).unwrap(),
+    ];
+    let build_repartition = Arc::new(
+        RepartitionExec::try_new(
+            build_scan,
+            Partitioning::Hash(build_hash_exprs, partition_count),
+        )
+        .unwrap(),
+    );
+    let build_coalesce = Arc::new(CoalesceBatchesExec::new(build_repartition, 8192));
+
+    let probe_hash_exprs = vec![
+        col("a", &probe_side_schema).unwrap(),
+        col("b", &probe_side_schema).unwrap(),
+    ];
+    let probe_repartition = Arc::new(
+        RepartitionExec::try_new(
+            Arc::clone(&probe_scan),
+            Partitioning::Hash(probe_hash_exprs, partition_count),
+        )
+        .unwrap(),
+    );
+    let probe_coalesce = Arc::new(CoalesceBatchesExec::new(probe_repartition, 8192));
+
+    // Create HashJoinExec
+    let on = vec![
+        (
+            col("a", &build_side_schema).unwrap(),
+            col("a", &probe_side_schema).unwrap(),
+        ),
+        (
+            col("b", &build_side_schema).unwrap(),
+            col("b", &probe_side_schema).unwrap(),
+        ),
+    ];
+    let hash_join = Arc::new(
+        HashJoinExec::try_new(
+            build_coalesce,
+            probe_coalesce,
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::Partitioned,
+            datafusion_common::NullEquality::NullEqualsNothing,
+        )
+        .unwrap(),
+    );
+
+    let plan =
+        Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
+
+    // Apply the filter pushdown optimizer
+    let mut config = SessionConfig::new();
+    config.options_mut().execution.parquet.pushdown_filters = true;
+    let optimizer = FilterPushdown::new_post_optimization();
+    let plan = optimizer.optimize(plan, config.options()).unwrap();
+
+    insta::assert_snapshot!(
+        format_plan_for_test(&plan),
+        @r"
+    - CoalesceBatchesExec: target_batch_size=8192
+    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
+    -     CoalesceBatchesExec: target_batch_size=8192
+    -       RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1
+    -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
+    -     CoalesceBatchesExec: target_batch_size=8192
+    -       RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1
+    -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
+    "
+    );
+
+    // Put some data through the plan to check that the filter is updated to reflect the TopK state
+    let session_ctx = SessionContext::new_with_config(config);
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let state = session_ctx.state();
+    let task_ctx = state.task_ctx();
+    // Execute all partitions (required for partitioned hash join coordination)
+    let _batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
+        .await
+        .unwrap();
+
+    // Test that filters are pushed down correctly to each side of the join
+    insta::assert_snapshot!(
+        format_plan_for_test(&plan),
+        @r"
+    - CoalesceBatchesExec: target_batch_size=8192
+    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
+    -     CoalesceBatchesExec: target_batch_size=8192
+    -       RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1
+    -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
+    -     CoalesceBatchesExec: target_batch_size=8192
+    -       RepartitionExec: partitioning=Hash([a@0, b@1], 4), input_partitions=1
+    -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ false ]
+    "
+    );
+}
+
+#[tokio::test]
+async fn test_hashjoin_dynamic_filter_with_nulls() {
+    use datafusion_common::JoinType;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+
+    // Test scenario where build side has NULL values in join keys
+    // This validates NULL handling in bounds computation and filter generation
+
+    // Create build side with NULL values
+    let build_batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Utf8, true),  // nullable
+            Field::new("b", DataType::Int32, true), // nullable
+        ])),
+        vec![
+            Arc::new(StringArray::from(vec![Some("aa"), None, Some("ab")])),
+            Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
+        ],
+    )
+    .unwrap();
+    let build_batches = vec![build_batch];
+    let build_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, true),
+        Field::new("b", DataType::Int32, true),
+    ]));
+    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
+        .with_support(true)
+        .with_batches(build_batches)
+        .build();
+
+    // Create probe side with nullable fields
+    let probe_batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Utf8, true),
+            Field::new("b", DataType::Int32, true),
+            Field::new("c", DataType::Float64, false),
+        ])),
+        vec![
+            Arc::new(StringArray::from(vec![
+                Some("aa"),
+                Some("ab"),
+                Some("ac"),
+                None,
+            ])),
+            Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(4), Some(5)])),
+            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
+        ],
+    )
+    .unwrap();
+    let probe_batches = vec![probe_batch];
+    let probe_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, true),
+        Field::new("b", DataType::Int32, true),
+        Field::new("c", DataType::Float64, false),
+    ]));
+    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
+        .with_support(true)
+        .with_batches(probe_batches)
+        .build();
+
+    // Create HashJoinExec in CollectLeft mode (simpler for this test)
+    let on = vec![
+        (
+            col("a", &build_side_schema).unwrap(),
+            col("a", &probe_side_schema).unwrap(),
+        ),
+        (
+            col("b", &build_side_schema).unwrap(),
+            col("b", &probe_side_schema).unwrap(),
+        ),
+    ];
+    let hash_join = Arc::new(
+        HashJoinExec::try_new(
+            build_scan,
+            Arc::clone(&probe_scan),
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::CollectLeft,
+            datafusion_common::NullEquality::NullEqualsNothing,
+        )
+        .unwrap(),
+    );
+
+    let plan =
+        Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
+
+    // Apply the filter pushdown optimizer
+    let mut config = SessionConfig::new();
+    config.options_mut().execution.parquet.pushdown_filters = true;
+    let optimizer = FilterPushdown::new_post_optimization();
+    let plan = optimizer.optimize(plan, config.options()).unwrap();
+
+    insta::assert_snapshot!(
+        format_plan_for_test(&plan),
+        @r"
+    - CoalesceBatchesExec: target_batch_size=8192
+    -   HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
+    "
+    );
+
+    // Put some data through the plan to check that the filter is updated to reflect the TopK state
+    let session_ctx = SessionContext::new_with_config(config);
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let state = session_ctx.state();
+    let task_ctx = state.task_ctx();
+    // Execute all partitions (required for partitioned hash join coordination)
+    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
+        .await
+        .unwrap();
+
+    // Test that filters are pushed down correctly to each side of the join
+    insta::assert_snapshot!(
+        format_plan_for_test(&plan),
+        @r"
+    - CoalesceBatchesExec: target_batch_size=8192
+    -   HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
+    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ]
+    "
+    );
+
+    #[rustfmt::skip]
+    let expected = [
+        "+----+---+----+---+-----+",
+        "| a  | b | a  | b | c   |",
+        "+----+---+----+---+-----+",
+        "| aa | 1 | aa | 1 | 1.0 |",
+        "+----+---+----+---+-----+",
+    ];
+    assert_batches_eq!(&expected, &batches);
+}
+
+/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
+/// the HashTable strategy is used instead of InList strategy, even with small build sides.
+/// This test is identical to test_hashjoin_dynamic_filter_pushdown_partitioned except
+/// for the config setting that forces the HashTable strategy.
+#[tokio::test]
+async fn test_hashjoin_hash_table_pushdown_partitioned() {
+    use datafusion_common::JoinType;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+
+    // Create build side with limited values
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
+    let build_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+        Field::new("c", DataType::Float64, false),
+    ]));
+    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
+        .with_support(true)
+        .with_batches(build_batches)
+        .build();
+
+    // Create probe side with more values
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
+    let probe_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+        Field::new("e", DataType::Float64, false),
+    ]));
+    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
+        .with_support(true)
+        .with_batches(probe_batches)
+        .build();
+
+    // Create RepartitionExec nodes for both sides with hash partitioning on join keys
+    let partition_count = 12;
+
+    // Build side: DataSource -> RepartitionExec (Hash) -> CoalesceBatchesExec
+    let build_hash_exprs = vec![
+        col("a", &build_side_schema).unwrap(),
+        col("b", &build_side_schema).unwrap(),
+    ];
+    let build_repartition = Arc::new(
+        RepartitionExec::try_new(
+            build_scan,
+            Partitioning::Hash(build_hash_exprs, partition_count),
+        )
+        .unwrap(),
+    );
+    let build_coalesce = Arc::new(CoalesceBatchesExec::new(build_repartition, 8192));
+
+    // Probe side: DataSource -> RepartitionExec (Hash) -> CoalesceBatchesExec
+    let probe_hash_exprs = vec![
+        col("a", &probe_side_schema).unwrap(),
+        col("b", &probe_side_schema).unwrap(),
+    ];
+    let probe_repartition = Arc::new(
+        RepartitionExec::try_new(
+            Arc::clone(&probe_scan),
+            Partitioning::Hash(probe_hash_exprs, partition_count),
+        )
+        .unwrap(),
+    );
+    let probe_coalesce = Arc::new(CoalesceBatchesExec::new(probe_repartition, 8192));
+
+    // Create HashJoinExec with partitioned inputs
+    let on = vec![
+        (
+            col("a", &build_side_schema).unwrap(),
+            col("a", &probe_side_schema).unwrap(),
+        ),
+        (
+            col("b", &build_side_schema).unwrap(),
+            col("b", &probe_side_schema).unwrap(),
+        ),
+    ];
+    let hash_join = Arc::new(
+        HashJoinExec::try_new(
+            build_coalesce,
+            probe_coalesce,
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::Partitioned,
+            datafusion_common::NullEquality::NullEqualsNothing,
+        )
+        .unwrap(),
+    );
+
+    // Top-level CoalesceBatchesExec
+    let cb =
+        Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
+    // Top-level CoalescePartitionsExec
+    let cp = Arc::new(CoalescePartitionsExec::new(cb)) as Arc<dyn ExecutionPlan>;
+    // Add a sort for deterministic output
+    let plan = Arc::new(SortExec::new(
+        LexOrdering::new(vec![PhysicalSortExpr::new(
+            col("a", &probe_side_schema).unwrap(),
+            SortOptions::new(true, false), // descending, nulls_first
+        )])
+        .unwrap(),
+        cp,
+    )) as Arc<dyn ExecutionPlan>;
+
+    // Apply the optimization with config setting that forces HashTable strategy
+    let session_config = SessionConfig::default()
+        .with_batch_size(10)
+        .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
+        .set_bool("datafusion.execution.parquet.pushdown_filters", true)
+        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true);
+    let plan = FilterPushdown::new_post_optimization()
+        .optimize(plan, session_config.options())
+        .unwrap();
+    let session_ctx = SessionContext::new_with_config(session_config);
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let state = session_ctx.state();
+    let task_ctx = state.task_ctx();
+    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
+        .await
+        .unwrap();
+
+    // Verify that hash_lookup is used instead of IN (SET)
+    let plan_str = format_plan_for_test(&plan).to_string();
+    assert!(
+        plan_str.contains("hash_lookup"),
+        "Expected hash_lookup in plan but got: {plan_str}"
+    );
+    assert!(
+        !plan_str.contains("IN (SET)"),
+        "Expected no IN (SET) in plan but got: {plan_str}"
+    );
+
+    let result = format!("{}", pretty_format_batches(&batches).unwrap());
+
+    let probe_scan_metrics = probe_scan.metrics().unwrap();
+
+    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
+    assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
+
+    // Results should be identical to the InList version
+    insta::assert_snapshot!(
+        result,
+        @r"
+    +----+----+-----+----+----+-----+
+    | a  | b  | c   | a  | b  | e   |
+    +----+----+-----+----+----+-----+
+    | ab | bb | 2.0 | ab | bb | 2.0 |
+    | aa | ba | 1.0 | aa | ba | 1.0 |
+    +----+----+-----+----+----+-----+
+    ",
+    );
+}
+
+/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
+/// the HashTable strategy is used instead of InList strategy in CollectLeft mode.
+/// This test is identical to test_hashjoin_dynamic_filter_pushdown_collect_left except
+/// for the config setting that forces the HashTable strategy.
+#[tokio::test]
+async fn test_hashjoin_hash_table_pushdown_collect_left() {
+    use datafusion_common::JoinType;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+
+    let build_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab"]),
+            ("b", Utf8, ["ba", "bb"]),
+            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
+    let build_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+        Field::new("c", DataType::Float64, false),
+    ]));
+    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
+        .with_support(true)
+        .with_batches(build_batches)
+        .build();
+
+    // Create probe side with more values
+    let probe_batches = vec![
+        record_batch!(
+            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
+            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
+            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
+        )
+        .unwrap(),
+    ];
+    let probe_side_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Utf8, false),
+        Field::new("b", DataType::Utf8, false),
+        Field::new("e", DataType::Float64, false),
+    ]));
+    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
+        .with_support(true)
+        .with_batches(probe_batches)
+        .build();
+
+    // Create RepartitionExec nodes for both sides with hash partitioning on join keys
+    let partition_count = 12;
+
+    // Probe side: DataSource -> RepartitionExec(Hash) -> CoalesceBatchesExec
+    let probe_hash_exprs = vec![
+        col("a", &probe_side_schema).unwrap(),
+        col("b", &probe_side_schema).unwrap(),
+    ];
+    let probe_repartition = Arc::new(
+        RepartitionExec::try_new(
+            Arc::clone(&probe_scan),
+            Partitioning::Hash(probe_hash_exprs, partition_count), // create multi partitions on probSide
+        )
+        .unwrap(),
+    );
+    let probe_coalesce = Arc::new(CoalesceBatchesExec::new(probe_repartition, 8192));
+
+    let on = vec![
+        (
+            col("a", &build_side_schema).unwrap(),
+            col("a", &probe_side_schema).unwrap(),
+        ),
+        (
+            col("b", &build_side_schema).unwrap(),
+            col("b", &probe_side_schema).unwrap(),
+        ),
+    ];
+    let hash_join = Arc::new(
+        HashJoinExec::try_new(
+            build_scan,
+            probe_coalesce,
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::CollectLeft,
+            datafusion_common::NullEquality::NullEqualsNothing,
+        )
+        .unwrap(),
+    );
+
+    // Top-level CoalesceBatchesExec
+    let cb =
+        Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
+    // Top-level CoalescePartitionsExec
+    let cp = Arc::new(CoalescePartitionsExec::new(cb)) as Arc<dyn ExecutionPlan>;
+    // Add a sort for deterministic output
+    let plan = Arc::new(SortExec::new(
+        LexOrdering::new(vec![PhysicalSortExpr::new(
+            col("a", &probe_side_schema).unwrap(),
+            SortOptions::new(true, false), // descending, nulls_first
+        )])
+        .unwrap(),
+        cp,
+    )) as Arc<dyn ExecutionPlan>;
+
+    // Apply the optimization with config setting that forces HashTable strategy
+    let session_config = SessionConfig::default()
+        .with_batch_size(10)
+        .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
+        .set_bool("datafusion.execution.parquet.pushdown_filters", true)
+        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true);
+    let plan = FilterPushdown::new_post_optimization()
+        .optimize(plan, session_config.options())
+        .unwrap();
+    let session_ctx = SessionContext::new_with_config(session_config);
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let state = session_ctx.state();
+    let task_ctx = state.task_ctx();
+    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
+        .await
+        .unwrap();
+
+    // Verify that hash_lookup is used instead of IN (SET)
+    let plan_str = format_plan_for_test(&plan).to_string();
+    assert!(
+        plan_str.contains("hash_lookup"),
+        "Expected hash_lookup in plan but got: {plan_str}"
+    );
+    assert!(
+        !plan_str.contains("IN (SET)"),
+        "Expected no IN (SET) in plan but got: {plan_str}"
+    );
+
+    let result = format!("{}", pretty_format_batches(&batches).unwrap());
+
+    let probe_scan_metrics = probe_scan.metrics().unwrap();
+
+    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
+    assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
+
+    // Results should be identical to the InList version
+    insta::assert_snapshot!(
+        result,
+        @r"
+    +----+----+-----+----+----+-----+
+    | a  | b  | c   | a  | b  | e   |
+    +----+----+-----+----+----+-----+
+    | ab | bb | 2.0 | ab | bb | 2.0 |
+    | aa | ba | 1.0 | aa | ba | 1.0 |
+    +----+----+-----+----+----+-----+
+    ",
+    );
+}
+
+/// Test HashTable strategy with integer multi-column join keys.
+/// Verifies that hash_lookup works correctly with integer data types.
+#[tokio::test]
+async fn test_hashjoin_hash_table_pushdown_integer_keys() {
+    use datafusion_common::JoinType;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+
+    // Create build side with integer keys
+    let build_batches = vec![
+        record_batch!(
+            ("id1", Int32, [1, 2]),
+            ("id2", Int32, [10, 20]),
+            ("value", Float64, [100.0, 200.0])
+        )
+        .unwrap(),
+    ];
+    let build_side_schema = Arc::new(Schema::new(vec![
+        Field::new("id1", DataType::Int32, false),
+        Field::new("id2", DataType::Int32, false),
+        Field::new("value", DataType::Float64, false),
+    ]));
+    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
+        .with_support(true)
+        .with_batches(build_batches)
+        .build();
+
+    // Create probe side with more integer rows
+    let probe_batches = vec![
+        record_batch!(
+            ("id1", Int32, [1, 2, 3, 4]),
+            ("id2", Int32, [10, 20, 30, 40]),
+            ("data", Utf8, ["a", "b", "c", "d"])
+        )
+        .unwrap(),
+    ];
+    let probe_side_schema = Arc::new(Schema::new(vec![
+        Field::new("id1", DataType::Int32, false),
+        Field::new("id2", DataType::Int32, false),
+        Field::new("data", DataType::Utf8, false),
+    ]));
+    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
+        .with_support(true)
+        .with_batches(probe_batches)
+        .build();
+
+    // Create join on multiple integer columns
+    let on = vec![
+        (
+            col("id1", &build_side_schema).unwrap(),
+            col("id1", &probe_side_schema).unwrap(),
+        ),
+        (
+            col("id2", &build_side_schema).unwrap(),
+            col("id2", &probe_side_schema).unwrap(),
+        ),
+    ];
+    let hash_join = Arc::new(
+        HashJoinExec::try_new(
+            build_scan,
+            Arc::clone(&probe_scan),
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::CollectLeft,
+            datafusion_common::NullEquality::NullEqualsNothing,
+        )
+        .unwrap(),
+    );
+
+    let plan =
+        Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
+
+    // Apply optimization with forced HashTable strategy
+    let session_config = SessionConfig::default()
+        .with_batch_size(10)
+        .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
+        .set_bool("datafusion.execution.parquet.pushdown_filters", true)
+        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true);
+    let plan = FilterPushdown::new_post_optimization()
+        .optimize(plan, session_config.options())
+        .unwrap();
+    let session_ctx = SessionContext::new_with_config(session_config);
+    session_ctx.register_object_store(
+        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
+        Arc::new(InMemory::new()),
+    );
+    let state = session_ctx.state();
+    let task_ctx = state.task_ctx();
+    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
+        .await
+        .unwrap();
+
+    // Verify hash_lookup is used
+    let plan_str = format_plan_for_test(&plan).to_string();
+    assert!(
+        plan_str.contains("hash_lookup"),
+        "Expected hash_lookup in plan but got: {plan_str}"
+    );
+    assert!(
+        !plan_str.contains("IN (SET)"),
+        "Expected no IN (SET) in plan but got: {plan_str}"
+    );
+
+    let result = format!("{}", pretty_format_batches(&batches).unwrap());
+
+    let probe_scan_metrics = probe_scan.metrics().unwrap();
+    // Only 2 rows from probe side match the build side
+    assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
+
+    insta::assert_snapshot!(
+        result,
+        @r"
+    +-----+-----+-------+-----+-----+------+
+    | id1 | id2 | value | id1 | id2 | data |
+    +-----+-----+-------+-----+-----+------+
+    | 1   | 10  | 100.0 | 1   | 10  | a    |
+    | 2   | 20  | 200.0 | 2   | 20  | b    |
+    +-----+-----+-------+-----+-----+------+
+    ",
+    );
+}
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
index 7d8a9c7c2125c..1afdc4823f0a4 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
@@ -18,27 +18,24 @@
 use arrow::datatypes::SchemaRef;
 use arrow::{array::RecordBatch, compute::concat_batches};
 use datafusion::{datasource::object_store::ObjectStoreUrl, physical_plan::PhysicalExpr};
-use datafusion_common::{config::ConfigOptions, internal_err, Result, Statistics};
+use datafusion_common::{Result, config::ConfigOptions, internal_err};
 use datafusion_datasource::{
-    file::FileSource, file_scan_config::FileScanConfig,
+    PartitionedFile, file::FileSource, file_scan_config::FileScanConfig,
     file_scan_config::FileScanConfigBuilder, file_stream::FileOpenFuture,
-    file_stream::FileOpener, schema_adapter::DefaultSchemaAdapterFactory,
-    schema_adapter::SchemaAdapterFactory, source::DataSourceExec, PartitionedFile,
-    TableSchema,
+    file_stream::FileOpener, source::DataSourceExec,
 };
 use datafusion_physical_expr_common::physical_expr::fmt_sql;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::filter::batch_filter;
 use datafusion_physical_plan::filter_pushdown::{FilterPushdownPhase, PushedDown};
 use datafusion_physical_plan::{
-    displayable,
+    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, displayable,
     filter::FilterExec,
     filter_pushdown::{
         ChildFilterDescription, ChildPushdownResult, FilterDescription,
         FilterPushdownPropagation,
     },
     metrics::ExecutionPlanMetricsSet,
-    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
 };
 use futures::StreamExt;
 use futures::{FutureExt, Stream};
@@ -53,7 +50,6 @@ use std::{
 pub struct TestOpener {
     batches: Vec<RecordBatch>,
     batch_size: Option<usize>,
-    schema: Option<SchemaRef>,
     projection: Option<Vec<usize>>,
     predicate: Option<Arc<dyn PhysicalExpr>>,
 }
@@ -61,6 +57,9 @@ pub struct TestOpener {
 impl FileOpener for TestOpener {
     fn open(&self, _partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
         let mut batches = self.batches.clone();
+        if self.batches.is_empty() {
+            return Ok((async { Ok(TestStream::new(vec![]).boxed()) }).boxed());
+        }
         if let Some(batch_size) = self.batch_size {
             let batch = concat_batches(&batches[0].schema(), &batches)?;
             let mut new_batches = Vec::new();
@@ -71,23 +70,18 @@ impl FileOpener for TestOpener {
             }
             batches = new_batches.into_iter().collect();
         }
-        if let Some(schema) = &self.schema {
-            let factory = DefaultSchemaAdapterFactory::from_schema(Arc::clone(schema));
-            let (mapper, projection) = factory.map_schema(&batches[0].schema()).unwrap();
-            let mut new_batches = Vec::new();
-            for batch in batches {
-                let batch = if let Some(predicate) = &self.predicate {
-                    batch_filter(&batch, predicate)?
-                } else {
-                    batch
-                };
 
-                let batch = batch.project(&projection).unwrap();
-                let batch = mapper.map_batch(batch).unwrap();
-                new_batches.push(batch);
-            }
-            batches = new_batches;
+        let mut new_batches = Vec::new();
+        for batch in batches {
+            let batch = if let Some(predicate) = &self.predicate {
+                batch_filter(&batch, predicate)?
+            } else {
+                batch
+            };
+            new_batches.push(batch);
         }
+        batches = new_batches;
+
         if let Some(projection) = &self.projection {
             batches = batches
                 .into_iter()
@@ -102,26 +96,29 @@ impl FileOpener for TestOpener {
 }
 
 /// A placeholder data source that accepts filter pushdown
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct TestSource {
     support: bool,
     predicate: Option<Arc<dyn PhysicalExpr>>,
-    statistics: Option<Statistics>,
     batch_size: Option<usize>,
     batches: Vec<RecordBatch>,
-    schema: Option<SchemaRef>,
     metrics: ExecutionPlanMetricsSet,
     projection: Option<Vec<usize>>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    table_schema: datafusion_datasource::TableSchema,
 }
 
 impl TestSource {
-    pub fn new(support: bool, batches: Vec<RecordBatch>) -> Self {
+    pub fn new(schema: SchemaRef, support: bool, batches: Vec<RecordBatch>) -> Self {
+        let table_schema =
+            datafusion_datasource::TableSchema::new(Arc::clone(&schema), vec![]);
         Self {
             support,
             metrics: ExecutionPlanMetricsSet::new(),
             batches,
-            ..Default::default()
+            predicate: None,
+            batch_size: None,
+            projection: None,
+            table_schema,
         }
     }
 }
@@ -132,14 +129,13 @@ impl FileSource for TestSource {
         _object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(TestOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(TestOpener {
             batches: self.batches.clone(),
             batch_size: self.batch_size,
-            schema: self.schema.clone(),
             projection: self.projection.clone(),
             predicate: self.predicate.clone(),
-        })
+        }))
     }
 
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
@@ -157,43 +153,10 @@ impl FileSource for TestSource {
         })
     }
 
-    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
-        assert!(
-            schema.table_partition_cols().is_empty(),
-            "TestSource does not support partition columns"
-        );
-        Arc::new(TestSource {
-            schema: Some(schema.file_schema().clone()),
-            ..self.clone()
-        })
-    }
-
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(TestSource {
-            projection: config.projection_exprs.as_ref().map(|p| p.column_indices()),
-            ..self.clone()
-        })
-    }
-
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        Arc::new(TestSource {
-            statistics: Some(statistics),
-            ..self.clone()
-        })
-    }
-
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(self
-            .statistics
-            .as_ref()
-            .expect("statistics not set")
-            .clone())
-    }
-
     fn file_type(&self) -> &str {
         "test"
     }
@@ -247,18 +210,8 @@ impl FileSource for TestSource {
         }
     }
 
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
+    fn table_schema(&self) -> &datafusion_datasource::TableSchema {
+        &self.table_schema
     }
 }
 
@@ -289,14 +242,15 @@ impl TestScanBuilder {
     }
 
     pub fn build(self) -> Arc<dyn ExecutionPlan> {
-        let source = Arc::new(TestSource::new(self.support, self.batches));
-        let base_config = FileScanConfigBuilder::new(
-            ObjectStoreUrl::parse("test://").unwrap(),
+        let source = Arc::new(TestSource::new(
             Arc::clone(&self.schema),
-            source,
-        )
-        .with_file(PartitionedFile::new("test.parquet", 123))
-        .build();
+            self.support,
+            self.batches,
+        ));
+        let base_config =
+            FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source)
+                .with_file(PartitionedFile::new("test.parquet", 123))
+                .build();
         DataSourceExec::from_data_source(base_config)
     }
 }
@@ -335,11 +289,12 @@ impl TestStream {
     /// least one entry in data (for the schema)
     pub fn new(data: Vec<RecordBatch>) -> Self {
         // check that there is at least one entry in data and that all batches have the same schema
-        assert!(!data.is_empty(), "data must not be empty");
-        assert!(
-            data.iter().all(|batch| batch.schema() == data[0].schema()),
-            "all batches must have the same schema"
-        );
+        if let Some(first) = data.first() {
+            assert!(
+                data.iter().all(|batch| batch.schema() == first.schema()),
+                "all batches must have the same schema"
+            );
+        }
         Self {
             data,
             ..Default::default()
diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs
index f9d3a045469e1..37bcefd418bdb 100644
--- a/datafusion/core/tests/physical_optimizer/join_selection.rs
+++ b/datafusion/core/tests/physical_optimizer/join_selection.rs
@@ -26,27 +26,27 @@ use std::{
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{stats::Precision, ColumnStatistics, JoinType, ScalarValue};
+use datafusion_common::{ColumnStatistics, JoinType, ScalarValue, stats::Precision};
 use datafusion_common::{JoinSide, NullEquality};
 use datafusion_common::{Result, Statistics};
 use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
 use datafusion_expr::Operator;
+use datafusion_physical_expr::PhysicalExprRef;
 use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr};
 use datafusion_physical_expr::intervals::utils::check_support;
-use datafusion_physical_expr::PhysicalExprRef;
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr};
-use datafusion_physical_optimizer::join_selection::JoinSelection;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::join_selection::JoinSelection;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_physical_plan::displayable;
 use datafusion_physical_plan::joins::utils::ColumnIndex;
 use datafusion_physical_plan::joins::utils::JoinFilter;
 use datafusion_physical_plan::joins::{HashJoinExec, NestedLoopJoinExec, PartitionMode};
 use datafusion_physical_plan::projection::ProjectionExec;
-use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_physical_plan::{
-    execution_plan::{Boundedness, EmissionType},
     DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
+    execution_plan::{Boundedness, EmissionType},
 };
 
 use futures::Stream;
@@ -949,10 +949,10 @@ impl Stream for UnboundedStream {
         mut self: Pin<&mut Self>,
         _cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        if let Some(val) = self.batch_produce {
-            if val <= self.count {
-                return Poll::Ready(None);
-            }
+        if let Some(val) = self.batch_produce
+            && val <= self.count
+        {
+            return Poll::Ready(None);
         }
         self.count += 1;
         Poll::Ready(Some(Ok(self.batch.clone())))
@@ -1088,9 +1088,10 @@ pub struct StatisticsExec {
 impl StatisticsExec {
     pub fn new(stats: Statistics, schema: Schema) -> Self {
         assert_eq!(
-                stats.column_statistics.len(), schema.fields().len(),
-                "if defined, the column statistics vector length should be the number of fields"
-            );
+            stats.column_statistics.len(),
+            schema.fields().len(),
+            "if defined, the column statistics vector length should be the number of fields"
+        );
         let cache = Self::compute_properties(Arc::new(schema.clone()));
         Self {
             stats,
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
index 56d48901f284d..b32a9bbd25432 100644
--- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -27,16 +27,16 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::error::Result;
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{col, lit, BinaryExpr};
 use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr::expressions::{BinaryExpr, col, lit};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
-use datafusion_physical_optimizer::limit_pushdown::LimitPushdown;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::limit_pushdown::LimitPushdown;
 use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::repartition::RepartitionExec;
-use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, get_plan_string};
 
 fn create_schema() -> SchemaRef {
     Arc::new(Schema::new(vec![
@@ -96,51 +96,51 @@ fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero() ->
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(initial, expected_initial);
 
     let after_optimize =
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
-        ];
+        "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
 }
 
 #[test]
-fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
-) -> Result<()> {
+fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero()
+-> Result<()> {
     let schema = create_schema();
     let streaming_table = stream_exec(&schema);
     let global_limit = global_limit_exec(streaming_table, 2, Some(5));
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(initial, expected_initial);
 
     let after_optimize =
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
-        ];
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
 }
 
 #[test]
-fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
-) -> Result<()> {
+fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit()
+-> Result<()> {
     let schema = create_schema();
     let streaming_table = stream_exec(&schema);
     let repartition = repartition_exec(streaming_table)?;
@@ -152,14 +152,14 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    LocalLimitExec: fetch=5",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c3@2 > 0",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    LocalLimitExec: fetch=5",
+        "      CoalesceBatchesExec: target_batch_size=8192",
+        "        FilterExec: c3@2 > 0",
+        "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(initial, expected_initial);
 
     let after_optimize =
@@ -170,8 +170,8 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi
         "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
         "    FilterExec: c3@2 > 0",
         "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
@@ -187,30 +187,29 @@ fn pushes_global_limit_exec_through_projection_exec() -> Result<()> {
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    FilterExec: c3@2 > 0",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(initial, expected_initial);
 
     let after_optimize =
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  GlobalLimitExec: skip=0, fetch=5",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  FilterExec: c3@2 > 0, fetch=5",
+        "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
 }
 
 #[test]
-fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
-) -> Result<()> {
+fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version()
+-> Result<()> {
     let schema = create_schema();
     let streaming_table = stream_exec(&schema);
     let coalesce_batches = coalesce_batches_exec(streaming_table, 8192);
@@ -219,11 +218,11 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    CoalesceBatchesExec: target_batch_size=8192",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
 
     assert_eq!(initial, expected_initial);
 
@@ -231,10 +230,10 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
-            "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+        "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
@@ -258,14 +257,14 @@ fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> {
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  SortPreservingMergeExec: [c1@0 ASC]",
-            "    SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "          CoalesceBatchesExec: target_batch_size=8192",
-            "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  SortPreservingMergeExec: [c1@0 ASC]",
+        "    SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "          CoalesceBatchesExec: target_batch_size=8192",
+        "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
 
     assert_eq!(initial, expected_initial);
 
@@ -273,13 +272,13 @@ fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> {
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "SortPreservingMergeExec: [c1@0 ASC], fetch=5",
-            "  SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "        CoalesceBatchesExec: target_batch_size=8192",
-            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "SortPreservingMergeExec: [c1@0 ASC], fetch=5",
+        "  SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]",
+        "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "      ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "        CoalesceBatchesExec: target_batch_size=8192",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
@@ -297,23 +296,23 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions() -> R
 
     let initial = get_plan_string(&global_limit);
     let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    FilterExec: c3@2 > 0",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    FilterExec: c3@2 > 0",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(initial, expected_initial);
 
     let after_optimize =
         LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
 
     let expected = [
-            "CoalescePartitionsExec: fetch=5",
-            "  FilterExec: c3@2 > 0",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
+        "CoalescePartitionsExec: fetch=5",
+        "  FilterExec: c3@2 > 0, fetch=5",
+        "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true",
+    ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index ad15d6803413b..c523b4a752a82 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -21,8 +21,8 @@ use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
-    build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, schema,
-    TestAggregate,
+    TestAggregate, build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort,
+    schema,
 };
 
 use arrow::datatypes::DataType;
@@ -34,10 +34,10 @@ use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::{self, cast, col};
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use datafusion_physical_plan::{
+    ExecutionPlan,
     aggregates::{AggregateExec, AggregateMode},
     collect,
     limit::{GlobalLimitExec, LocalLimitExec},
-    ExecutionPlan,
 };
 
 async fn run_plan_and_format(plan: Arc<dyn ExecutionPlan>) -> Result<String> {
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 936c02eb2a02d..d11322cd26be9 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -17,18 +17,24 @@
 
 //! Physical Optimizer integration tests
 
+#[expect(clippy::needless_pass_by_value)]
 mod aggregate_statistics;
 mod combine_partial_final_agg;
+#[expect(clippy::needless_pass_by_value)]
 mod enforce_distribution;
 mod enforce_sorting;
 mod enforce_sorting_monotonicity;
+#[expect(clippy::needless_pass_by_value)]
 mod filter_pushdown;
 mod join_selection;
+#[expect(clippy::needless_pass_by_value)]
 mod limit_pushdown;
 mod limited_distinct_aggregation;
 mod partition_statistics;
 mod projection_pushdown;
+mod pushdown_sort;
 mod replace_with_order_preserving_variants;
 mod sanity_checker;
+#[expect(clippy::needless_pass_by_value)]
 mod test_utils;
 mod window_optimize;
diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
index 49dc5b845605d..468d25e0e57d0 100644
--- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
@@ -25,16 +25,16 @@ mod test {
     use datafusion::datasource::listing::ListingTable;
     use datafusion::prelude::SessionContext;
     use datafusion_catalog::TableProvider;
-    use datafusion_common::stats::Precision;
     use datafusion_common::Result;
+    use datafusion_common::stats::Precision;
     use datafusion_common::{ColumnStatistics, ScalarValue, Statistics};
-    use datafusion_execution::config::SessionConfig;
     use datafusion_execution::TaskContext;
+    use datafusion_execution::config::SessionConfig;
     use datafusion_expr_common::operator::Operator;
     use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
-    use datafusion_physical_expr::expressions::{binary, col, lit, Column};
     use datafusion_physical_expr::Partitioning;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    use datafusion_physical_expr::expressions::{Column, binary, col, lit};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
     use datafusion_physical_plan::aggregates::{
@@ -53,8 +53,8 @@ mod test {
     use datafusion_physical_plan::sorts::sort::SortExec;
     use datafusion_physical_plan::union::{InterleaveExec, UnionExec};
     use datafusion_physical_plan::{
-        execute_stream_partitioned, get_plan_string, ExecutionPlan,
-        ExecutionPlanProperties,
+        ExecutionPlan, ExecutionPlanProperties, execute_stream_partitioned,
+        get_plan_string,
     };
 
     use futures::TryStreamExt;
@@ -67,7 +67,7 @@ mod test {
     /// - Each partition has an "id" column (INT) with the following values:
     ///   - First partition: [3, 4]
     ///   - Second partition: [1, 2]
-    /// - Each row is 110 bytes in size
+    /// - Each partition has 16 bytes total (Int32 id: 4 bytes × 2 rows + Date32 date: 4 bytes × 2 rows)
     ///
     /// @param create_table_sql Optional parameter to set the create table SQL
     /// @param target_partition Optional parameter to set the target partitions
@@ -112,29 +112,51 @@ mod test {
             .unwrap()
     }
 
+    // Date32 values for test data (days since 1970-01-01):
+    // 2025-03-01 = 20148
+    // 2025-03-02 = 20149
+    // 2025-03-03 = 20150
+    // 2025-03-04 = 20151
+    const DATE_2025_03_01: i32 = 20148;
+    const DATE_2025_03_02: i32 = 20149;
+    const DATE_2025_03_03: i32 = 20150;
+    const DATE_2025_03_04: i32 = 20151;
+
     /// Helper function to create expected statistics for a partition with Int32 column
+    ///
+    /// If `date_range` is provided, includes exact statistics for the partition date column.
+    /// Partition column statistics are exact because all rows in a partition share the same value.
     fn create_partition_statistics(
         num_rows: usize,
         total_byte_size: usize,
         min_value: i32,
         max_value: i32,
-        include_date_column: bool,
+        date_range: Option<(i32, i32)>,
     ) -> Statistics {
+        // Int32 is 4 bytes per row
+        let int32_byte_size = num_rows * 4;
         let mut column_stats = vec![ColumnStatistics {
             null_count: Precision::Exact(0),
             max_value: Precision::Exact(ScalarValue::Int32(Some(max_value))),
             min_value: Precision::Exact(ScalarValue::Int32(Some(min_value))),
             sum_value: Precision::Absent,
             distinct_count: Precision::Absent,
+            byte_size: Precision::Exact(int32_byte_size),
         }];
 
-        if include_date_column {
+        if let Some((min_date, max_date)) = date_range {
+            // Partition column stats are computed from partition values:
+            // - null_count = 0 (partition values from paths are never null)
+            // - min/max are the merged partition values across files in the group
+            // - byte_size = num_rows * 4 (Date32 is 4 bytes per row)
+            let date32_byte_size = num_rows * 4;
             column_stats.push(ColumnStatistics {
-                null_count: Precision::Absent,
-                max_value: Precision::Absent,
-                min_value: Precision::Absent,
+                null_count: Precision::Exact(0),
+                max_value: Precision::Exact(ScalarValue::Date32(Some(max_date))),
+                min_value: Precision::Exact(ScalarValue::Date32(Some(min_date))),
                 sum_value: Precision::Absent,
                 distinct_count: Precision::Absent,
+                byte_size: Precision::Exact(date32_byte_size),
             });
         }
 
@@ -214,10 +236,22 @@ mod test {
         let statistics = (0..scan.output_partitioning().partition_count())
             .map(|idx| scan.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
-        let expected_statistic_partition_1 =
-            create_partition_statistics(2, 110, 3, 4, true);
-        let expected_statistic_partition_2 =
-            create_partition_statistics(2, 110, 1, 2, true);
+        // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02]
+        let expected_statistic_partition_1 = create_partition_statistics(
+            2,
+            16,
+            3,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_02)),
+        );
+        // Partition 2: ids [1,2], dates [2025-03-03, 2025-03-04]
+        let expected_statistic_partition_2 = create_partition_statistics(
+            2,
+            16,
+            1,
+            2,
+            Some((DATE_2025_03_03, DATE_2025_03_04)),
+        );
         // Check the statistics of each partition
         assert_eq!(statistics.len(), 2);
         assert_eq!(statistics[0], expected_statistic_partition_1);
@@ -246,10 +280,11 @@ mod test {
         let statistics = (0..projection.output_partitioning().partition_count())
             .map(|idx| projection.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
+        // Projection only includes id column, not the date partition column
         let expected_statistic_partition_1 =
-            create_partition_statistics(2, 8, 3, 4, false);
+            create_partition_statistics(2, 8, 3, 4, None);
         let expected_statistic_partition_2 =
-            create_partition_statistics(2, 8, 1, 2, false);
+            create_partition_statistics(2, 8, 1, 2, None);
         // Check the statistics of each partition
         assert_eq!(statistics.len(), 2);
         assert_eq!(statistics[0], expected_statistic_partition_1);
@@ -277,8 +312,14 @@ mod test {
         let statistics = (0..sort_exec.output_partitioning().partition_count())
             .map(|idx| sort_exec.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
-        let expected_statistic_partition =
-            create_partition_statistics(4, 220, 1, 4, true);
+        // All 4 files merged: ids [1-4], dates [2025-03-01, 2025-03-04]
+        let expected_statistic_partition = create_partition_statistics(
+            4,
+            32,
+            1,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_04)),
+        );
         assert_eq!(statistics.len(), 1);
         assert_eq!(statistics[0], expected_statistic_partition);
         // Check the statistics_by_partition with real results
@@ -291,10 +332,22 @@ mod test {
         let sort_exec: Arc<dyn ExecutionPlan> = Arc::new(
             SortExec::new(ordering.into(), scan_2).with_preserve_partitioning(true),
         );
-        let expected_statistic_partition_1 =
-            create_partition_statistics(2, 110, 3, 4, true);
-        let expected_statistic_partition_2 =
-            create_partition_statistics(2, 110, 1, 2, true);
+        // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02]
+        let expected_statistic_partition_1 = create_partition_statistics(
+            2,
+            16,
+            3,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_02)),
+        );
+        // Partition 2: ids [1,2], dates [2025-03-03, 2025-03-04]
+        let expected_statistic_partition_2 = create_partition_statistics(
+            2,
+            16,
+            1,
+            2,
+            Some((DATE_2025_03_03, DATE_2025_03_04)),
+        );
         let statistics = (0..sort_exec.output_partitioning().partition_count())
             .map(|idx| sort_exec.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
@@ -324,6 +377,8 @@ mod test {
         let filter: Arc<dyn ExecutionPlan> =
             Arc::new(FilterExec::try_new(predicate, scan)?);
         let full_statistics = filter.partition_statistics(None)?;
+        // Filter preserves original total_rows and byte_size from input
+        // (4 total rows = 2 partitions * 2 rows each, byte_size = 4 * 4 = 16 bytes for int32)
         let expected_full_statistic = Statistics {
             num_rows: Precision::Inexact(0),
             total_byte_size: Precision::Inexact(0),
@@ -334,6 +389,7 @@ mod test {
                     min_value: Precision::Exact(ScalarValue::Null),
                     sum_value: Precision::Exact(ScalarValue::Null),
                     distinct_count: Precision::Exact(0),
+                    byte_size: Precision::Exact(16),
                 },
                 ColumnStatistics {
                     null_count: Precision::Exact(0),
@@ -341,6 +397,7 @@ mod test {
                     min_value: Precision::Exact(ScalarValue::Null),
                     sum_value: Precision::Exact(ScalarValue::Null),
                     distinct_count: Precision::Exact(0),
+                    byte_size: Precision::Exact(16), // 4 rows * 4 bytes (Date32)
                 },
             ],
         };
@@ -350,8 +407,31 @@ mod test {
             .map(|idx| filter.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
         assert_eq!(statistics.len(), 2);
-        assert_eq!(statistics[0], expected_full_statistic);
-        assert_eq!(statistics[1], expected_full_statistic);
+        // Per-partition stats: each partition has 2 rows, byte_size = 2 * 4 = 8
+        let expected_partition_statistic = Statistics {
+            num_rows: Precision::Inexact(0),
+            total_byte_size: Precision::Inexact(0),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Null),
+                    min_value: Precision::Exact(ScalarValue::Null),
+                    sum_value: Precision::Exact(ScalarValue::Null),
+                    distinct_count: Precision::Exact(0),
+                    byte_size: Precision::Exact(8),
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Null),
+                    min_value: Precision::Exact(ScalarValue::Null),
+                    sum_value: Precision::Exact(ScalarValue::Null),
+                    distinct_count: Precision::Exact(0),
+                    byte_size: Precision::Exact(8), // 2 rows * 4 bytes (Date32)
+                },
+            ],
+        };
+        assert_eq!(statistics[0], expected_partition_statistic);
+        assert_eq!(statistics[1], expected_partition_statistic);
         Ok(())
     }
 
@@ -365,10 +445,22 @@ mod test {
             .collect::<Result<Vec<_>>>()?;
         // Check that we have 4 partitions (2 from each scan)
         assert_eq!(statistics.len(), 4);
-        let expected_statistic_partition_1 =
-            create_partition_statistics(2, 110, 3, 4, true);
-        let expected_statistic_partition_2 =
-            create_partition_statistics(2, 110, 1, 2, true);
+        // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02]
+        let expected_statistic_partition_1 = create_partition_statistics(
+            2,
+            16,
+            3,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_02)),
+        );
+        // Partition 2: ids [1,2], dates [2025-03-03, 2025-03-04]
+        let expected_statistic_partition_2 = create_partition_statistics(
+            2,
+            16,
+            1,
+            2,
+            Some((DATE_2025_03_03, DATE_2025_03_04)),
+        );
         // Verify first partition (from first scan)
         assert_eq!(statistics[0], expected_statistic_partition_1);
         // Verify second partition (from first scan)
@@ -416,9 +508,10 @@ mod test {
             .collect::<Result<Vec<_>>>()?;
         assert_eq!(stats.len(), 2);
 
+        // Each partition gets half of combined input, total_rows per partition = 4
         let expected_stats = Statistics {
             num_rows: Precision::Inexact(4),
-            total_byte_size: Precision::Inexact(220),
+            total_byte_size: Precision::Inexact(32),
             column_statistics: vec![
                 ColumnStatistics::new_unknown(),
                 ColumnStatistics::new_unknown(),
@@ -461,28 +554,76 @@ mod test {
             .collect::<Result<Vec<_>>>()?;
         // Check that we have 2 partitions
         assert_eq!(statistics.len(), 2);
-        let mut expected_statistic_partition_1 =
-            create_partition_statistics(8, 48400, 1, 4, true);
-        expected_statistic_partition_1
-            .column_statistics
-            .push(ColumnStatistics {
-                null_count: Precision::Exact(0),
-                max_value: Precision::Exact(ScalarValue::Int32(Some(4))),
-                min_value: Precision::Exact(ScalarValue::Int32(Some(3))),
-                sum_value: Precision::Absent,
-                distinct_count: Precision::Absent,
-            });
-        let mut expected_statistic_partition_2 =
-            create_partition_statistics(8, 48400, 1, 4, true);
-        expected_statistic_partition_2
-            .column_statistics
-            .push(ColumnStatistics {
-                null_count: Precision::Exact(0),
-                max_value: Precision::Exact(ScalarValue::Int32(Some(2))),
-                min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
-                sum_value: Precision::Absent,
-                distinct_count: Precision::Absent,
-            });
+        // Cross join output schema: [left.id, left.date, right.id]
+        // Cross join doesn't propagate Column's byte_size
+        let expected_statistic_partition_1 = Statistics {
+            num_rows: Precision::Exact(8),
+            total_byte_size: Precision::Exact(512),
+            column_statistics: vec![
+                // column 0: left.id (Int32, file column from t1)
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(4))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+                // column 1: left.date (Date32, partition column from t1)
+                // Partition column statistics are exact because all rows in a partition share the same value.
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
+                    min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+                // column 2: right.id (Int32, file column from t2) - right partition 0: ids [3,4]
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(4))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(3))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+            ],
+        };
+        let expected_statistic_partition_2 = Statistics {
+            num_rows: Precision::Exact(8),
+            total_byte_size: Precision::Exact(512),
+            column_statistics: vec![
+                // column 0: left.id (Int32, file column from t1)
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(4))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+                // column 1: left.date (Date32, partition column from t1)
+                // Partition column statistics are exact because all rows in a partition share the same value.
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
+                    min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+                // column 2: right.id (Int32, file column from t2) - right partition 1: ids [1,2]
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(2))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+            ],
+        };
         assert_eq!(statistics[0], expected_statistic_partition_1);
         assert_eq!(statistics[1], expected_statistic_partition_2);
 
@@ -500,10 +641,22 @@ mod test {
         let scan = create_scan_exec_with_statistics(None, Some(2)).await;
         let coalesce_batches: Arc<dyn ExecutionPlan> =
             Arc::new(CoalesceBatchesExec::new(scan, 2));
-        let expected_statistic_partition_1 =
-            create_partition_statistics(2, 110, 3, 4, true);
-        let expected_statistic_partition_2 =
-            create_partition_statistics(2, 110, 1, 2, true);
+        // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02]
+        let expected_statistic_partition_1 = create_partition_statistics(
+            2,
+            16,
+            3,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_02)),
+        );
+        // Partition 2: ids [1,2], dates [2025-03-03, 2025-03-04]
+        let expected_statistic_partition_2 = create_partition_statistics(
+            2,
+            16,
+            1,
+            2,
+            Some((DATE_2025_03_03, DATE_2025_03_04)),
+        );
         let statistics = (0..coalesce_batches.output_partitioning().partition_count())
             .map(|idx| coalesce_batches.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
@@ -525,8 +678,14 @@ mod test {
         let scan = create_scan_exec_with_statistics(None, Some(2)).await;
         let coalesce_partitions: Arc<dyn ExecutionPlan> =
             Arc::new(CoalescePartitionsExec::new(scan));
-        let expected_statistic_partition =
-            create_partition_statistics(4, 220, 1, 4, true);
+        // All files merged: ids [1-4], dates [2025-03-01, 2025-03-04]
+        let expected_statistic_partition = create_partition_statistics(
+            4,
+            32,
+            1,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_04)),
+        );
         let statistics = (0..coalesce_partitions.output_partitioning().partition_count())
             .map(|idx| coalesce_partitions.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
@@ -575,8 +734,14 @@ mod test {
             .map(|idx| global_limit.partition_statistics(Some(idx)))
             .collect::<Result<Vec<_>>>()?;
         assert_eq!(statistics.len(), 1);
-        let expected_statistic_partition =
-            create_partition_statistics(2, 110, 3, 4, true);
+        // GlobalLimit takes from first partition: ids [3,4], dates [2025-03-01, 2025-03-02]
+        let expected_statistic_partition = create_partition_statistics(
+            2,
+            16,
+            3,
+            4,
+            Some((DATE_2025_03_01, DATE_2025_03_02)),
+        );
         assert_eq!(statistics[0], expected_statistic_partition);
         Ok(())
     }
@@ -601,11 +766,13 @@ mod test {
             ),
         ]);
 
-        let aggr_expr = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
-            .schema(Arc::clone(&scan_schema))
-            .alias(String::from("COUNT(c)"))
-            .build()
-            .map(Arc::new)?];
+        let aggr_expr = vec![
+            AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
+                .schema(Arc::clone(&scan_schema))
+                .alias(String::from("COUNT(c)"))
+                .build()
+                .map(Arc::new)?,
+        ];
 
         let aggregate_exec_partial: Arc<dyn ExecutionPlan> =
             Arc::new(AggregateExec::try_new(
@@ -620,14 +787,15 @@ mod test {
         let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0);
         assert_snapshot!(
             plan_string,
-            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)], ordering_mode=Sorted"
         );
 
         let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?;
 
+        // Aggregate doesn't propagate num_rows and ColumnStatistics byte_size from input
         let expected_p0_statistics = Statistics {
             num_rows: Precision::Inexact(2),
-            total_byte_size: Precision::Absent,
+            total_byte_size: Precision::Inexact(16),
             column_statistics: vec![
                 ColumnStatistics {
                     null_count: Precision::Absent,
@@ -635,6 +803,7 @@ mod test {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(3))),
                     sum_value: Precision::Absent,
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics::new_unknown(),
                 ColumnStatistics::new_unknown(),
@@ -645,7 +814,7 @@ mod test {
 
         let expected_p1_statistics = Statistics {
             num_rows: Precision::Inexact(2),
-            total_byte_size: Precision::Absent,
+            total_byte_size: Precision::Inexact(16),
             column_statistics: vec![
                 ColumnStatistics {
                     null_count: Precision::Absent,
@@ -653,6 +822,7 @@ mod test {
                     min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
                     sum_value: Precision::Absent,
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics::new_unknown(),
                 ColumnStatistics::new_unknown(),
@@ -849,9 +1019,10 @@ mod test {
             .collect::<Result<Vec<_>>>()?;
         assert_eq!(statistics.len(), 3);
 
+        // Repartition preserves original total_rows from input (4 rows total)
         let expected_stats = Statistics {
             num_rows: Precision::Inexact(1),
-            total_byte_size: Precision::Inexact(73),
+            total_byte_size: Precision::Inexact(10),
             column_statistics: vec![
                 ColumnStatistics::new_unknown(),
                 ColumnStatistics::new_unknown(),
@@ -878,9 +1049,9 @@ mod test {
             partition_row_counts.push(total_rows);
         }
         assert_eq!(partition_row_counts.len(), 3);
-        assert_eq!(partition_row_counts[0], 2);
+        assert_eq!(partition_row_counts[0], 1);
         assert_eq!(partition_row_counts[1], 2);
-        assert_eq!(partition_row_counts[2], 0);
+        assert_eq!(partition_row_counts[2], 1);
 
         Ok(())
     }
@@ -898,9 +1069,11 @@ mod test {
         let result = repartition.partition_statistics(Some(2));
         assert!(result.is_err());
         let error = result.unwrap_err();
-        assert!(error
-            .to_string()
-            .contains("RepartitionExec invalid partition 2 (expected less than 2)"));
+        assert!(
+            error
+                .to_string()
+                .contains("RepartitionExec invalid partition 2 (expected less than 2)")
+        );
 
         let partitions = execute_stream_partitioned(
             repartition.clone(),
@@ -953,9 +1126,10 @@ mod test {
             .collect::<Result<Vec<_>>>()?;
         assert_eq!(stats.len(), 2);
 
+        // Repartition preserves original total_rows from input (4 rows total)
         let expected_stats = Statistics {
             num_rows: Precision::Inexact(2),
-            total_byte_size: Precision::Inexact(110),
+            total_byte_size: Precision::Inexact(16),
             column_statistics: vec![
                 ColumnStatistics::new_unknown(),
                 ColumnStatistics::new_unknown(),
diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
index 8631613c3925e..480f5c8cc97b1 100644
--- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
@@ -24,8 +24,9 @@ use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::physical_plan::CsvSource;
 use datafusion::datasource::source::DataSourceExec;
-use datafusion_common::config::ConfigOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::{JoinSide, JoinType, NullEquality, Result, ScalarValue};
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
@@ -34,30 +35,32 @@ use datafusion_expr::{
 };
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_physical_expr::expressions::{
-    binary, cast, col, BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr,
+    BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr, binary, cast, col,
 };
 use datafusion_physical_expr::{Distribution, Partitioning, ScalarFunctionExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{
     OrderingRequirements, PhysicalSortExpr, PhysicalSortRequirement,
 };
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_optimizer::output_requirements::OutputRequirementExec;
 use datafusion_physical_optimizer::projection_pushdown::ProjectionPushdown;
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::coop::CooperativeExec;
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
 use datafusion_physical_plan::joins::{
     HashJoinExec, NestedLoopJoinExec, PartitionMode, StreamJoinPartitionMode,
     SymmetricHashJoinExec,
 };
-use datafusion_physical_plan::projection::{update_expr, ProjectionExec, ProjectionExpr};
+use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr, update_expr};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::union::UnionExec;
-use datafusion_physical_plan::{displayable, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, displayable};
 
 use insta::assert_snapshot;
 use itertools::Itertools;
@@ -229,9 +232,11 @@ fn test_update_matching_exprs() -> Result<()> {
         .map(|(expr, alias)| ProjectionExpr::new(expr.clone(), alias.clone()))
         .collect();
     for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) {
-        assert!(update_expr(&expr, &child_exprs, true)?
-            .unwrap()
-            .eq(&expected_expr));
+        assert!(
+            update_expr(&expr, &child_exprs, true)?
+                .unwrap()
+                .eq(&expected_expr)
+        );
     }
 
     Ok(())
@@ -368,9 +373,11 @@ fn test_update_projected_exprs() -> Result<()> {
         .map(|(expr, alias)| ProjectionExpr::new(expr.clone(), alias.clone()))
         .collect();
     for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) {
-        assert!(update_expr(&expr, &proj_exprs, false)?
-            .unwrap()
-            .eq(&expected_expr));
+        assert!(
+            update_expr(&expr, &proj_exprs, false)?
+                .unwrap()
+                .eq(&expected_expr)
+        );
     }
 
     Ok(())
@@ -384,14 +391,20 @@ fn create_simple_csv_exec() -> Arc<dyn ExecutionPlan> {
         Field::new("d", DataType::Int32, true),
         Field::new("e", DataType::Int32, true),
     ]));
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(CsvSource::new(false, 0, 0)),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_projection_indices(Some(vec![0, 1, 2, 3, 4]))
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: 0,
+                quote: 0,
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema.clone()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x", 100))
+        .with_projection_indices(Some(vec![0, 1, 2, 3, 4]))
+        .unwrap()
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -403,14 +416,20 @@ fn create_projecting_csv_exec() -> Arc<dyn ExecutionPlan> {
         Field::new("c", DataType::Int32, true),
         Field::new("d", DataType::Int32, true),
     ]));
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(CsvSource::new(false, 0, 0)),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_projection_indices(Some(vec![3, 2, 1]))
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: 0,
+                quote: 0,
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema.clone()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x", 100))
+        .with_projection_indices(Some(vec![3, 2, 1]))
+        .unwrap()
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -432,8 +451,8 @@ fn test_csv_after_projection() -> Result<()> {
     let csv = create_projecting_csv_exec();
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("b", 2)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 0)), "d".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("b", 2)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 0)), "d"),
         ],
         csv.clone(),
     )?);
@@ -469,9 +488,9 @@ fn test_memory_after_projection() -> Result<()> {
     let memory = create_projecting_memory_exec();
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("d", 2)), "d".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("e", 3)), "e".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 1)), "a".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("d", 2)), "d"),
+            ProjectionExpr::new(Arc::new(Column::new("e", 3)), "e"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 1)), "a"),
         ],
         memory.clone(),
     )?);
@@ -575,9 +594,9 @@ fn test_streaming_table_after_projection() -> Result<()> {
     )?;
     let projection = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("e", 2)), "e".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d"),
+            ProjectionExpr::new(Arc::new(Column::new("e", 2)), "e"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
         ],
         Arc::new(streaming_table) as _,
     )?) as _;
@@ -642,28 +661,25 @@ fn test_projection_after_projection() -> Result<()> {
     let csv = create_simple_csv_exec();
     let child_projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("e", 4)), "new_e".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "new_b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("e", 4)), "new_e"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "new_b"),
         ],
         csv.clone(),
     )?);
     let top_projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("new_b", 3)), "new_b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("new_b", 3)), "new_b"),
             ProjectionExpr::new(
                 Arc::new(BinaryExpr::new(
                     Arc::new(Column::new("c", 0)),
                     Operator::Plus,
                     Arc::new(Column::new("new_e", 1)),
                 )),
-                "binary".to_string(),
-            ),
-            ProjectionExpr::new(
-                Arc::new(Column::new("new_b", 3)),
-                "newest_b".to_string(),
+                "binary",
             ),
+            ProjectionExpr::new(Arc::new(Column::new("new_b", 3)), "newest_b"),
         ],
         child_projection.clone(),
     )?);
@@ -692,10 +708,7 @@ fn test_projection_after_projection() -> Result<()> {
 
     assert_snapshot!(
         actual,
-        @r"
-    ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-    "
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b], file_type=csv, has_header=false"
     );
 
     Ok(())
@@ -731,9 +744,9 @@ fn test_output_req_after_projection() -> Result<()> {
     ));
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
         ],
         sort_req.clone(),
     )?);
@@ -762,8 +775,7 @@ fn test_output_req_after_projection() -> Result<()> {
         actual,
         @r"
     OutputRequirementExec: order_by=[(b@2, asc), (c@0 + new_a@1, asc)], dist_by=HashPartitioned[[new_a@1, b@2]])
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -805,10 +817,11 @@ fn test_output_req_after_projection() -> Result<()> {
         .required_input_distribution()[0]
         .clone()
     {
-        assert!(vec
-            .iter()
-            .zip(expected_distribution)
-            .all(|(actual, expected)| actual.eq(&expected)));
+        assert!(
+            vec.iter()
+                .zip(expected_distribution)
+                .all(|(actual, expected)| actual.eq(&expected))
+        );
     } else {
         panic!("Expected HashPartitioned distribution!");
     };
@@ -823,9 +836,9 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
         Arc::new(CoalescePartitionsExec::new(csv));
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_new".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_new"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d"),
         ],
         coalesce_partitions,
     )?);
@@ -853,8 +866,7 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
         actual,
         @r"
     CoalescePartitionsExec
-      ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[b, a@0 as a_new, d], file_type=csv, has_header=false
     "
     );
 
@@ -880,9 +892,9 @@ fn test_filter_after_projection() -> Result<()> {
     let filter = Arc::new(FilterExec::try_new(predicate, csv)?);
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_new".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_new"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d"),
         ],
         filter.clone(),
     )?) as _;
@@ -911,8 +923,7 @@ fn test_filter_after_projection() -> Result<()> {
         actual,
         @r"
     FilterExec: b@1 - a_new@0 > d@2 - a_new@0
-      ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a@0 as a_new, b, d], file_type=csv, has_header=false
     "
     );
 
@@ -975,17 +986,11 @@ fn test_join_after_projection() -> Result<()> {
     )?);
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c_from_left".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_from_left".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_from_left".to_string()),
-            ProjectionExpr::new(
-                Arc::new(Column::new("a", 5)),
-                "a_from_right".to_string(),
-            ),
-            ProjectionExpr::new(
-                Arc::new(Column::new("c", 7)),
-                "c_from_right".to_string(),
-            ),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 5)), "a_from_right"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c_from_right"),
         ],
         join,
     )?) as _;
@@ -1014,10 +1019,8 @@ fn test_join_after_projection() -> Result<()> {
         actual,
         @r"
     SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
-      ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a@0 as a_from_right, c@2 as c_from_right], file_type=csv, has_header=false
     "
     );
 
@@ -1106,16 +1109,16 @@ fn test_join_after_required_projection() -> Result<()> {
     )?);
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("a", 5)), "a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 6)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 8)), "d".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("e", 9)), "e".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("e", 4)), "e".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("a", 5)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 6)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 8)), "d"),
+            ProjectionExpr::new(Arc::new(Column::new("e", 9)), "e"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d"),
+            ProjectionExpr::new(Arc::new(Column::new("e", 4)), "e"),
         ],
         join,
     )?) as _;
@@ -1195,7 +1198,7 @@ fn test_nested_loop_join_after_projection() -> Result<()> {
     )?) as _;
 
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
-        vec![ProjectionExpr::new(col_left_c, "c".to_string())],
+        vec![ProjectionExpr::new(col_left_c, "c")],
         Arc::clone(&join),
     )?) as _;
     let initial = displayable(projection.as_ref()).indent(true).to_string();
@@ -1285,13 +1288,10 @@ fn test_hash_join_after_projection() -> Result<()> {
     )?);
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c_from_left".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_from_left".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_from_left".to_string()),
-            ProjectionExpr::new(
-                Arc::new(Column::new("c", 7)),
-                "c_from_right".to_string(),
-            ),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a_from_left"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c_from_right"),
         ],
         join.clone(),
     )?) as _;
@@ -1327,10 +1327,10 @@ fn test_hash_join_after_projection() -> Result<()> {
 
     let projection = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("c", 7)), "c"),
         ],
         join.clone(),
     )?);
@@ -1371,9 +1371,9 @@ fn test_repartition_after_projection() -> Result<()> {
     )?);
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_new".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d_new".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b_new"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("d", 3)), "d_new"),
         ],
         repartition,
     )?) as _;
@@ -1399,8 +1399,7 @@ fn test_repartition_after_projection() -> Result<()> {
         actual,
         @r"
     RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1
-      ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[b@1 as b_new, a, d@3 as d_new], file_type=csv, has_header=false
     "
     );
 
@@ -1441,9 +1440,9 @@ fn test_sort_after_projection() -> Result<()> {
     );
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
         ],
         Arc::new(sort_exec),
     )?) as _;
@@ -1470,8 +1469,7 @@ fn test_sort_after_projection() -> Result<()> {
         actual,
         @r"
     SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1495,9 +1493,9 @@ fn test_sort_preserving_after_projection() -> Result<()> {
     );
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
         ],
         Arc::new(sort_exec),
     )?) as _;
@@ -1524,8 +1522,7 @@ fn test_sort_preserving_after_projection() -> Result<()> {
         actual,
         @r"
     SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1538,9 +1535,9 @@ fn test_union_after_projection() -> Result<()> {
     let union = UnionExec::try_new(vec![csv.clone(), csv.clone(), csv])?;
     let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
-            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a".to_string()),
-            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b".to_string()),
+            ProjectionExpr::new(Arc::new(Column::new("c", 2)), "c"),
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "new_a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
         ],
         union.clone(),
     )?) as _;
@@ -1569,12 +1566,9 @@ fn test_union_after_projection() -> Result<()> {
         actual,
         @r"
     UnionExec
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1589,14 +1583,23 @@ fn partitioned_data_source() -> Arc<DataSourceExec> {
         Field::new("string_col", DataType::Utf8, true),
     ]));
 
+    let options = CsvOptions {
+        has_header: Some(false),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
+    let table_schema = TableSchema::new(
+        Arc::clone(&file_schema),
+        vec![Arc::new(Field::new("partition_col", DataType::Utf8, true))],
+    );
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        file_schema.clone(),
-        Arc::new(CsvSource::default()),
+        Arc::new(CsvSource::new(table_schema).with_csv_options(options)),
     )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_table_partition_cols(vec![Field::new("partition_col", DataType::Utf8, true)])
+    .with_file(PartitionedFile::new("x", 100))
     .with_projection_indices(Some(vec![0, 1, 2]))
+    .unwrap()
     .build();
 
     DataSourceExec::from_data_source(config)
@@ -1611,16 +1614,13 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
         vec![
             ProjectionExpr::new(
                 col("string_col", partitioned_schema.as_ref())?,
-                "string_col".to_string(),
+                "string_col",
             ),
             ProjectionExpr::new(
                 col("partition_col", partitioned_schema.as_ref())?,
-                "partition_col".to_string(),
-            ),
-            ProjectionExpr::new(
-                col("int_col", partitioned_schema.as_ref())?,
-                "int_col".to_string(),
+                "partition_col",
             ),
+            ProjectionExpr::new(col("int_col", partitioned_schema.as_ref())?, "int_col"),
         ],
         source,
     )?);
@@ -1634,10 +1634,7 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
-        @r"
-    ProjectionExec: expr=[string_col@1 as string_col, partition_col@2 as partition_col, int_col@0 as int_col]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
-    "
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[string_col, partition_col, int_col], file_type=csv, has_header=false"
     );
 
     Ok(())
@@ -1652,7 +1649,7 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
         vec![
             ProjectionExpr::new(
                 col("string_col", partitioned_schema.as_ref())?,
-                "string_col".to_string(),
+                "string_col",
             ),
             ProjectionExpr::new(
                 // CAST(partition_col, Utf8View)
@@ -1661,12 +1658,9 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
                     partitioned_schema.as_ref(),
                     DataType::Utf8View,
                 )?,
-                "partition_col".to_string(),
-            ),
-            ProjectionExpr::new(
-                col("int_col", partitioned_schema.as_ref())?,
-                "int_col".to_string(),
+                "partition_col",
             ),
+            ProjectionExpr::new(col("int_col", partitioned_schema.as_ref())?, "int_col"),
         ],
         source,
     )?);
@@ -1678,11 +1672,107 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
         .indent(true)
         .to_string();
     let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col], file_type=csv, has_header=false"
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_coalesce_batches_after_projection() -> Result<()> {
+    let csv = create_simple_csv_exec();
+    let filter = Arc::new(FilterExec::try_new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("c", 2)),
+            Operator::Gt,
+            Arc::new(Literal::new(ScalarValue::Int32(Some(0)))),
+        )),
+        csv,
+    )?);
+    let coalesce_batches: Arc<dyn ExecutionPlan> =
+        Arc::new(CoalesceBatchesExec::new(filter, 8192));
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+        vec![
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+        ],
+        coalesce_batches,
+    )?);
+
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[a@0 as a, b@1 as b]
+      CoalesceBatchesExec: target_batch_size=8192
+        FilterExec: c@2 > 0
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
+
+    let after_optimize =
+        ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
+
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
+    let actual = after_optimize_string.trim();
+
+    // Projection should be pushed down through CoalesceBatchesExec
+    assert_snapshot!(
+        actual,
+        @r"
+    CoalesceBatchesExec: target_batch_size=8192
+      FilterExec: c@2 > 0, projection=[a@0, b@1]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_cooperative_exec_after_projection() -> Result<()> {
+    let csv = create_simple_csv_exec();
+    let cooperative: Arc<dyn ExecutionPlan> = Arc::new(CooperativeExec::new(csv));
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
+        vec![
+            ProjectionExpr::new(Arc::new(Column::new("a", 0)), "a"),
+            ProjectionExpr::new(Arc::new(Column::new("b", 1)), "b"),
+        ],
+        cooperative,
+    )?);
+
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[a@0 as a, b@1 as b]
+      CooperativeExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
+
+    let after_optimize =
+        ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
+
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
+    let actual = after_optimize_string.trim();
+
+    // Projection should be pushed down through CooperativeExec
     assert_snapshot!(
         actual,
         @r"
-    ProjectionExec: expr=[string_col@1 as string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col@0 as int_col]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
+    CooperativeExec
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b], file_type=csv, has_header=false
     "
     );
 
diff --git a/datafusion/core/tests/physical_optimizer/pushdown_sort.rs b/datafusion/core/tests/physical_optimizer/pushdown_sort.rs
new file mode 100644
index 0000000000000..caef0fba052cb
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/pushdown_sort.rs
@@ -0,0 +1,1040 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for sort pushdown optimizer rule (Phase 1)
+//!
+//! Phase 1 tests verify that:
+//! 1. Reverse scan is enabled (reverse_row_groups=true)
+//! 2. SortExec is kept (because ordering is inexact)
+//! 3. output_ordering remains unchanged
+//! 4. Early termination is enabled for TopK queries
+//! 5. Prefix matching works correctly
+
+use datafusion_physical_expr::expressions;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::pushdown_sort::PushdownSort;
+use std::sync::Arc;
+
+use crate::physical_optimizer::test_utils::{
+    OptimizationTest, coalesce_batches_exec, coalesce_partitions_exec, parquet_exec,
+    parquet_exec_with_sort, projection_exec, projection_exec_with_alias,
+    repartition_exec, schema, simple_projection_exec, sort_exec, sort_exec_with_fetch,
+    sort_expr, sort_expr_named, test_scan_with_ordering,
+};
+
+#[test]
+fn test_sort_pushdown_disabled() {
+    // When pushdown is disabled, plan should remain unchanged
+    let schema = schema();
+    let source = parquet_exec(schema.clone());
+    let sort_exprs = LexOrdering::new(vec![sort_expr("a", &schema)]).unwrap();
+    let plan = sort_exec(sort_exprs, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), false),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_basic_phase1() {
+    // Phase 1: Reverse scan enabled, Sort kept, output_ordering unchanged
+    let schema = schema();
+
+    // Source has ASC NULLS LAST ordering (default)
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request DESC NULLS LAST ordering (exact reverse)
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_with_limit_phase1() {
+    // Phase 1: Sort with fetch enables early termination but keeps Sort
+    let schema = schema();
+
+    // Source has ASC ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request DESC ordering with limit
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec_with_fetch(desc_ordering, Some(10), source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: TopK(fetch=10), expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: TopK(fetch=10), expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_multiple_columns_phase1() {
+    // Phase 1: Sort on multiple columns - reverse multi-column ordering
+    let schema = schema();
+
+    // Source has [a DESC NULLS LAST, b ASC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone().reverse(), b.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request [a ASC NULLS FIRST, b DESC] ordering (exact reverse)
+    let reverse_ordering =
+        LexOrdering::new(vec![a.clone().asc().nulls_first(), b.reverse()]).unwrap();
+    let plan = sort_exec(reverse_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST, b@1 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+// ============================================================================
+// PREFIX MATCHING TESTS
+// ============================================================================
+
+#[test]
+fn test_prefix_match_single_column() {
+    // Test prefix matching: source has [a DESC, b ASC], query needs [a ASC]
+    // After reverse: [a ASC, b DESC] which satisfies [a ASC] prefix
+    let schema = schema();
+
+    // Source has [a DESC NULLS LAST, b ASC NULLS LAST] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone().reverse(), b]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request only [a ASC NULLS FIRST] - a prefix of the reversed ordering
+    let prefix_ordering = LexOrdering::new(vec![a.clone().asc().nulls_first()]).unwrap();
+    let plan = sort_exec(prefix_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST, b@1 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_prefix_match_with_limit() {
+    // Test prefix matching with LIMIT - important for TopK optimization
+    let schema = schema();
+
+    // Source has [a ASC, b DESC, c ASC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let c = sort_expr("c", &schema);
+    let source_ordering =
+        LexOrdering::new(vec![a.clone(), b.clone().reverse(), c]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request [a DESC NULLS LAST, b ASC NULLS FIRST] with LIMIT 100
+    // This is a prefix (2 columns) of the reversed 3-column ordering
+    let prefix_ordering =
+        LexOrdering::new(vec![a.reverse(), b.clone().asc().nulls_first()]).unwrap();
+    let plan = sort_exec_with_fetch(prefix_ordering, Some(100), source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: TopK(fetch=100), expr=[a@0 DESC NULLS LAST, b@1 ASC], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 DESC NULLS LAST, c@2 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: TopK(fetch=100), expr=[a@0 DESC NULLS LAST, b@1 ASC], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_prefix_match_through_transparent_nodes() {
+    // Test prefix matching works through transparent nodes
+    let schema = schema();
+
+    // Source has [a DESC NULLS LAST, b ASC, c DESC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let c = sort_expr("c", &schema);
+    let source_ordering =
+        LexOrdering::new(vec![a.clone().reverse(), b, c.reverse()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+    let coalesce = coalesce_batches_exec(source, 1024);
+    let repartition = repartition_exec(coalesce);
+
+    // Request only [a ASC NULLS FIRST] - prefix of reversed ordering
+    let prefix_ordering = LexOrdering::new(vec![a.clone().asc().nulls_first()]).unwrap();
+    let plan = sort_exec(prefix_ordering, repartition);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        -   RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+        -     CoalesceBatchesExec: target_batch_size=1024
+        -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST, b@1 ASC, c@2 DESC NULLS LAST], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          -   RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          -     CoalesceBatchesExec: target_batch_size=1024
+          -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_no_prefix_match_wrong_direction() {
+    // Test that prefix matching does NOT work if the direction is wrong
+    let schema = schema();
+
+    // Source has [a DESC, b ASC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone().reverse(), b]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request [a DESC] - same direction as source, NOT a reverse prefix
+    let same_direction = LexOrdering::new(vec![a.clone().reverse()]).unwrap();
+    let plan = sort_exec(same_direction, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST, b@1 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST, b@1 ASC], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_no_prefix_match_longer_than_source() {
+    // Test that prefix matching does NOT work if requested is longer than source
+    let schema = schema();
+
+    // Source has [a DESC] ordering (single column)
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone().reverse()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request [a ASC, b DESC] - longer than source, can't be a prefix
+    let longer_ordering =
+        LexOrdering::new(vec![a.clone().asc().nulls_first(), b.reverse()]).unwrap();
+    let plan = sort_exec(longer_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST], file_type=parquet
+    "
+    );
+}
+
+// ============================================================================
+// ORIGINAL TESTS
+// ============================================================================
+
+#[test]
+fn test_sort_through_coalesce_batches() {
+    // Sort pushes through CoalesceBatchesExec
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+    let coalesce = coalesce_batches_exec(source, 1024);
+
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, coalesce);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   CoalesceBatchesExec: target_batch_size=1024
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   CoalesceBatchesExec: target_batch_size=1024
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_through_repartition() {
+    // Sort should push through RepartitionExec
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+    let repartition = repartition_exec(source);
+
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, repartition);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_nested_sorts() {
+    // Nested sort operations - only innermost can be optimized
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let inner_sort = sort_exec(desc_ordering, source);
+
+    let sort_exprs2 = LexOrdering::new(vec![b]).unwrap();
+    let plan = sort_exec(sort_exprs2, inner_sort);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[b@1 ASC], preserve_partitioning=[false]
+        -   SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[b@1 ASC], preserve_partitioning=[false]
+          -   SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_non_sort_plans_unchanged() {
+    // Plans without SortExec should pass through unchanged
+    let schema = schema();
+    let source = parquet_exec(schema.clone());
+    let plan = coalesce_batches_exec(source, 1024);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - CoalesceBatchesExec: target_batch_size=1024
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      output:
+        Ok:
+          - CoalesceBatchesExec: target_batch_size=1024
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_optimizer_properties() {
+    // Test optimizer metadata
+    let optimizer = PushdownSort::new();
+
+    assert_eq!(optimizer.name(), "PushdownSort");
+    assert!(optimizer.schema_check());
+}
+
+#[test]
+fn test_sort_through_coalesce_partitions() {
+    // Sort should push through CoalescePartitionsExec
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+    let repartition = repartition_exec(source);
+    let coalesce_parts = coalesce_partitions_exec(repartition);
+
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, coalesce_parts);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   CoalescePartitionsExec
+        -     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+        -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   CoalescePartitionsExec
+          -     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_complex_plan_with_multiple_operators() {
+    // Test a complex plan with multiple operators between sort and source
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+    let coalesce_batches = coalesce_batches_exec(source, 1024);
+    let repartition = repartition_exec(coalesce_batches);
+    let coalesce_parts = coalesce_partitions_exec(repartition);
+
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, coalesce_parts);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   CoalescePartitionsExec
+        -     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
+        -       CoalesceBatchesExec: target_batch_size=1024
+        -         DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   CoalescePartitionsExec
+          -     RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          -       CoalesceBatchesExec: target_batch_size=1024
+          -         DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_multiple_sorts_different_columns() {
+    // Test nested sorts on different columns - only innermost can optimize
+    let schema = schema();
+    let a = sort_expr("a", &schema);
+    let c = sort_expr("c", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // First sort by column 'a' DESC (reverse of source)
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let sort1 = sort_exec(desc_ordering, source);
+
+    // Then sort by column 'c' (different column, can't optimize)
+    let sort_exprs2 = LexOrdering::new(vec![c]).unwrap();
+    let plan = sort_exec(sort_exprs2, sort1);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+        -   SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+          -   SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_no_pushdown_for_unordered_source() {
+    // Verify pushdown does NOT happen for sources without ordering
+    let schema = schema();
+    let source = parquet_exec(schema.clone()); // No output_ordering
+    let sort_exprs = LexOrdering::new(vec![sort_expr("a", &schema)]).unwrap();
+    let plan = sort_exec(sort_exprs, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_no_pushdown_for_non_reverse_sort() {
+    // Verify pushdown does NOT happen when sort doesn't reverse source ordering
+    let schema = schema();
+
+    // Source sorted by 'a' ASC
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Request sort by 'b' (different column)
+    let sort_exprs = LexOrdering::new(vec![b]).unwrap();
+    let plan = sort_exec(sort_exprs, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[b@1 ASC], preserve_partitioning=[false]
+        -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[b@1 ASC], preserve_partitioning=[false]
+          -   DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_through_blocking_node() {
+    // Test that pushdown works for inner sort even when outer sort is blocked
+    // Structure: Sort -> Aggregate (blocks pushdown) -> Sort -> Scan
+    // The outer sort can't push through aggregate, but the inner sort should still optimize
+    use datafusion_functions_aggregate::count::count_udaf;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    use datafusion_physical_plan::aggregates::{
+        AggregateExec, AggregateMode, PhysicalGroupBy,
+    };
+    use std::sync::Arc;
+
+    let schema = schema();
+
+    // Bottom: DataSource with [a ASC NULLS LAST] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Inner Sort: [a DESC NULLS FIRST] - exact reverse, CAN push down to source
+    let inner_sort_ordering = LexOrdering::new(vec![a.clone().reverse()]).unwrap();
+    let inner_sort = sort_exec(inner_sort_ordering, source);
+
+    // Middle: Aggregate (blocks pushdown from outer sort)
+    // GROUP BY a, COUNT(b)
+    let group_by = PhysicalGroupBy::new_single(vec![(
+        Arc::new(expressions::Column::new("a", 0)) as _,
+        "a".to_string(),
+    )]);
+
+    let count_expr = Arc::new(
+        AggregateExprBuilder::new(
+            count_udaf(),
+            vec![Arc::new(expressions::Column::new("b", 1)) as _],
+        )
+        .schema(Arc::clone(&schema))
+        .alias("COUNT(b)")
+        .build()
+        .unwrap(),
+    );
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            vec![count_expr],
+            vec![None],
+            inner_sort,
+            Arc::clone(&schema),
+        )
+        .unwrap(),
+    );
+
+    // Outer Sort: [a ASC] - this CANNOT push down through aggregate
+    let outer_sort_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let plan = sort_exec(outer_sort_ordering, aggregate);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        -   AggregateExec: mode=Final, gby=[a@0 as a], aggr=[COUNT(b)], ordering_mode=Sorted
+        -     SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          -   AggregateExec: mode=Final, gby=[a@0 as a], aggr=[COUNT(b)], ordering_mode=Sorted
+          -     SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+// ============================================================================
+// PROJECTION TESTS
+// ============================================================================
+
+#[test]
+fn test_sort_pushdown_through_simple_projection() {
+    // Sort pushes through projection with simple column references
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT a, b (simple column references)
+    let projection = simple_projection_exec(source, vec![0, 1]); // columns a, b
+
+    // Request [a DESC] - should push through projection to source
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_through_projection_with_alias() {
+    // Sort pushes through projection with column aliases
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT a AS id, b AS value
+    let projection = projection_exec_with_alias(source, vec![(0, "id"), (1, "value")]);
+
+    // Request [id DESC] - should map to [a DESC] and push down
+    let id_expr = sort_expr_named("id", 0);
+    let desc_ordering = LexOrdering::new(vec![id_expr.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[id@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 as id, b@1 as value]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[id@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 as id, b@1 as value]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_no_sort_pushdown_through_computed_projection() {
+    use datafusion_expr::Operator;
+
+    // Sort should NOT push through projection with computed columns
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT a+b as sum, c
+    let projection = projection_exec(
+        vec![
+            (
+                Arc::new(expressions::BinaryExpr::new(
+                    Arc::new(expressions::Column::new("a", 0)),
+                    Operator::Plus,
+                    Arc::new(expressions::Column::new("b", 1)),
+                )) as Arc<dyn PhysicalExpr>,
+                "sum".to_string(),
+            ),
+            (
+                Arc::new(expressions::Column::new("c", 2)) as Arc<dyn PhysicalExpr>,
+                "c".to_string(),
+            ),
+        ],
+        source,
+    )
+    .unwrap();
+
+    // Request [sum DESC] - should NOT push down (sum is computed)
+    let sum_expr = sort_expr_named("sum", 0);
+    let desc_ordering = LexOrdering::new(vec![sum_expr.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[sum@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 + b@1 as sum, c@2 as c]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[sum@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 + b@1 as sum, c@2 as c]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_projection_reordered_columns() {
+    // Sort pushes through projection that reorders columns
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT c, b, a (columns reordered)
+    let projection = simple_projection_exec(source, vec![2, 1, 0]); // c, b, a
+
+    // Request [a DESC] where a is now at index 2 in projection output
+    let a_expr_at_2 = sort_expr_named("a", 2);
+    let desc_ordering = LexOrdering::new(vec![a_expr_at_2.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@2 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[c@2 as c, b@1 as b, a@0 as a]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@2 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[c@2 as c, b@1 as b, a@0 as a]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_projection_with_limit() {
+    // Sort with LIMIT pushes through simple projection
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT a, b
+    let projection = simple_projection_exec(source, vec![0, 1]);
+
+    // Request [a DESC] with LIMIT 10
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec_with_fetch(desc_ordering, Some(10), projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: TopK(fetch=10), expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: TopK(fetch=10), expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_through_projection_and_coalesce() {
+    // Sort pushes through both projection and coalesce batches
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    let coalesce = coalesce_batches_exec(source, 1024);
+
+    // Projection: SELECT a, b
+    let projection = simple_projection_exec(coalesce, vec![0, 1]);
+
+    // Request [a DESC]
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+        -     CoalesceBatchesExec: target_batch_size=1024
+        -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 as a, b@1 as b]
+          -     CoalesceBatchesExec: target_batch_size=1024
+          -       DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_projection_subset_of_columns() {
+    // Sort pushes through projection that selects subset of columns
+    let schema = schema();
+
+    // Source has [a ASC, b ASC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone(), b.clone()]).unwrap();
+    let source = parquet_exec_with_sort(schema.clone(), vec![source_ordering]);
+
+    // Projection: SELECT a (subset of columns)
+    let projection = simple_projection_exec(source, vec![0]);
+
+    // Request [a DESC]
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, projection);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   ProjectionExec: expr=[a@0 as a]
+        -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=parquet
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   ProjectionExec: expr=[a@0 as a]
+          -     DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet, reverse_row_groups=true
+    "
+    );
+}
+
+// ============================================================================
+// TESTSCAN DEMONSTRATION TESTS
+// ============================================================================
+// These tests use TestScan to demonstrate how sort pushdown works more clearly
+// than ParquetExec. TestScan can accept ANY ordering (not just reverse) and
+// displays the requested ordering explicitly in the output.
+
+#[test]
+fn test_sort_pushdown_with_test_scan_basic() {
+    // Demonstrates TestScan showing requested ordering clearly
+    let schema = schema();
+
+    // Source has [a ASC] ordering
+    let a = sort_expr("a", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone()]).unwrap();
+    let source = test_scan_with_ordering(schema.clone(), source_ordering);
+
+    // Request [a DESC] ordering
+    let desc_ordering = LexOrdering::new(vec![a.reverse()]).unwrap();
+    let plan = sort_exec(desc_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+        -   TestScan: output_ordering=[a@0 ASC]
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]
+          -   TestScan: output_ordering=[a@0 ASC], requested_ordering=[a@0 DESC NULLS LAST]
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_with_test_scan_multi_column() {
+    // Demonstrates TestScan with multi-column ordering
+    let schema = schema();
+
+    // Source has [a ASC, b DESC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone(), b.clone().reverse()]).unwrap();
+    let source = test_scan_with_ordering(schema.clone(), source_ordering);
+
+    // Request [a DESC, b ASC] ordering (reverse of source)
+    let reverse_ordering = LexOrdering::new(vec![a.reverse(), b]).unwrap();
+    let plan = sort_exec(reverse_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 DESC NULLS LAST, b@1 ASC], preserve_partitioning=[false]
+        -   TestScan: output_ordering=[a@0 ASC, b@1 DESC NULLS LAST]
+      output:
+        Ok:
+          - SortExec: expr=[a@0 DESC NULLS LAST, b@1 ASC], preserve_partitioning=[false]
+          -   TestScan: output_ordering=[a@0 ASC, b@1 DESC NULLS LAST], requested_ordering=[a@0 DESC NULLS LAST, b@1 ASC]
+    "
+    );
+}
+
+#[test]
+fn test_sort_pushdown_with_test_scan_arbitrary_ordering() {
+    // Demonstrates that TestScan can accept ANY ordering (not just reverse)
+    // This is different from ParquetExec which only supports reverse scans
+    let schema = schema();
+
+    // Source has [a ASC, b ASC] ordering
+    let a = sort_expr("a", &schema);
+    let b = sort_expr("b", &schema);
+    let source_ordering = LexOrdering::new(vec![a.clone(), b.clone()]).unwrap();
+    let source = test_scan_with_ordering(schema.clone(), source_ordering);
+
+    // Request [a ASC, b DESC] - NOT a simple reverse, but TestScan accepts it
+    let mixed_ordering = LexOrdering::new(vec![a, b.reverse()]).unwrap();
+    let plan = sort_exec(mixed_ordering, source);
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownSort::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+        -   TestScan: output_ordering=[a@0 ASC, b@1 ASC]
+      output:
+        Ok:
+          - SortExec: expr=[a@0 ASC, b@1 DESC NULLS LAST], preserve_partitioning=[false]
+          -   TestScan: output_ordering=[a@0 ASC, b@1 ASC], requested_ordering=[a@0 ASC, b@1 DESC NULLS LAST]
+    "
+    );
+}
diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
index 066e52614a12e..d93081f5ceb80 100644
--- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -50,8 +50,8 @@ use datafusion_physical_plan::{
     collect, displayable, ExecutionPlan, Partitioning,
 };
 
-use object_store::memory::InMemory;
 use object_store::ObjectStore;
+use object_store::memory::InMemory;
 use rstest::rstest;
 use url::Url;
 
@@ -138,7 +138,8 @@ impl ReplaceTest {
             assert!(
                 res.is_ok(),
                 "Some errors occurred while executing the optimized physical plan: {:?}\nPlan: {}",
-                res.unwrap_err(), optimized_plan_string
+                res.unwrap_err(),
+                optimized_plan_string
             );
         }
 
@@ -192,7 +193,7 @@ async fn test_replace_multiple_input_repartition_1(
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -202,13 +203,13 @@ async fn test_replace_multiple_input_repartition_1(
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                   StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -218,13 +219,13 @@ async fn test_replace_multiple_input_repartition_1(
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                   DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -275,21 +276,21 @@ async fn test_with_inter_children_change_only(
               SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC]
               FilterExec: c@1 > 3
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     SortPreservingMergeExec: [a@0 ASC]
                       RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]
             ");
         },
@@ -300,11 +301,11 @@ async fn test_with_inter_children_change_only(
               SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC
             ");
         },
@@ -315,21 +316,21 @@ async fn test_with_inter_children_change_only(
               SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC]
               FilterExec: c@1 > 3
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     SortPreservingMergeExec: [a@0 ASC]
                       RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC
             ");
         }
@@ -375,14 +376,14 @@ async fn test_replace_multiple_input_repartition_2(
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                   FilterExec: c@1 > 3
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
                 FilterExec: c@1 > 3
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -393,7 +394,7 @@ async fn test_replace_multiple_input_repartition_2(
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                   FilterExec: c@1 > 3
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -404,14 +405,14 @@ async fn test_replace_multiple_input_repartition_2(
               SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                   FilterExec: c@1 > 3
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
                 FilterExec: c@1 > 3
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -460,7 +461,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps(
                 CoalesceBatchesExec: target_batch_size=8192
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
@@ -468,7 +469,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps(
               CoalesceBatchesExec: target_batch_size=8192
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -480,7 +481,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps(
                 CoalesceBatchesExec: target_batch_size=8192
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -492,7 +493,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps(
                 CoalesceBatchesExec: target_batch_size=8192
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
@@ -500,7 +501,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps(
               CoalesceBatchesExec: target_batch_size=8192
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -551,7 +552,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps_2(
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                       CoalesceBatchesExec: target_batch_size=8192
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
@@ -560,7 +561,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps_2(
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
                     CoalesceBatchesExec: target_batch_size=8192
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -573,7 +574,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps_2(
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                       CoalesceBatchesExec: target_batch_size=8192
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -586,7 +587,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps_2(
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
                       CoalesceBatchesExec: target_batch_size=8192
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
@@ -595,7 +596,7 @@ async fn test_replace_multiple_input_repartition_with_extra_steps_2(
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
                     CoalesceBatchesExec: target_batch_size=8192
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -639,7 +640,7 @@ async fn test_not_replacing_when_no_need_to_preserve_sorting(
               CoalesceBatchesExec: target_batch_size=8192
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -650,7 +651,7 @@ async fn test_not_replacing_when_no_need_to_preserve_sorting(
               CoalesceBatchesExec: target_batch_size=8192
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
                 // Expected bounded results same with and without flag, because there is no executor  with ordering requirement
@@ -662,7 +663,7 @@ async fn test_not_replacing_when_no_need_to_preserve_sorting(
               CoalesceBatchesExec: target_batch_size=8192
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -712,7 +713,7 @@ async fn test_with_multiple_replaceable_repartitions(
                   CoalesceBatchesExec: target_batch_size=8192
                     FilterExec: c@1 > 3
                       RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
@@ -721,7 +722,7 @@ async fn test_with_multiple_replaceable_repartitions(
                 CoalesceBatchesExec: target_batch_size=8192
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -734,7 +735,7 @@ async fn test_with_multiple_replaceable_repartitions(
                   CoalesceBatchesExec: target_batch_size=8192
                     FilterExec: c@1 > 3
                       RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -747,7 +748,7 @@ async fn test_with_multiple_replaceable_repartitions(
                   CoalesceBatchesExec: target_batch_size=8192
                     FilterExec: c@1 > 3
                       RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                           DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
@@ -756,7 +757,7 @@ async fn test_with_multiple_replaceable_repartitions(
                 CoalesceBatchesExec: target_batch_size=8192
                   FilterExec: c@1 > 3
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -804,7 +805,7 @@ async fn test_not_replace_with_different_orderings(
             SortPreservingMergeExec: [c@1 ASC]
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -814,7 +815,7 @@ async fn test_not_replace_with_different_orderings(
             SortPreservingMergeExec: [c@1 ASC]
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
                 // Expected bounded results same with and without flag, because ordering requirement of the executor is
@@ -826,7 +827,7 @@ async fn test_not_replace_with_different_orderings(
             SortPreservingMergeExec: [c@1 ASC]
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -870,13 +871,13 @@ async fn test_with_lost_ordering(
             SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
               CoalescePartitionsExec
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                   StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -886,7 +887,7 @@ async fn test_with_lost_ordering(
             SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
               CoalescePartitionsExec
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -896,13 +897,13 @@ async fn test_with_lost_ordering(
             SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
               CoalescePartitionsExec
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
             SortPreservingMergeExec: [a@0 ASC NULLS LAST]
               RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST
-                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                   DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -956,22 +957,22 @@ async fn test_with_lost_and_kept_ordering(
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[c@1 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
 
             Optimized:
             SortPreservingMergeExec: [c@1 ASC]
               FilterExec: c@1 > 3
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     SortExec: expr=[c@1 ASC], preserve_partitioning=[false]
                       CoalescePartitionsExec
                         RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                             StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -982,11 +983,11 @@ async fn test_with_lost_and_kept_ordering(
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[c@1 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         },
@@ -997,22 +998,22 @@ async fn test_with_lost_and_kept_ordering(
               SortExec: expr=[c@1 ASC], preserve_partitioning=[true]
                 FilterExec: c@1 > 3
                   RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                       SortExec: expr=[c@1 ASC], preserve_partitioning=[false]
                         CoalescePartitionsExec
                           RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                               DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
 
             Optimized:
             SortPreservingMergeExec: [c@1 ASC]
               FilterExec: c@1 > 3
                 RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC
-                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                  RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                     SortExec: expr=[c@1 ASC], preserve_partitioning=[false]
                       CoalescePartitionsExec
                         RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                             DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
         }
@@ -1077,11 +1078,11 @@ async fn test_with_multiple_child_trees(
                 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]
                   CoalesceBatchesExec: target_batch_size=4096
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
                   CoalesceBatchesExec: target_batch_size=4096
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]
             ");
         },
@@ -1093,11 +1094,11 @@ async fn test_with_multiple_child_trees(
                 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]
                   CoalesceBatchesExec: target_batch_size=4096
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
                   CoalesceBatchesExec: target_batch_size=4096
                     RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8
-                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+                      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
                         DataSourceExec: partitions=1, partition_sizes=[1], output_ordering=a@0 ASC NULLS LAST
             ");
                 // Expected bounded results same with and without flag, because ordering get lost during intermediate executor anyway.
@@ -1248,7 +1249,10 @@ fn test_plan_with_order_preserving_variants_preserves_fetch() -> Result<()> {
         )],
     );
     let res = plan_with_order_preserving_variants(requirements, false, true, Some(15));
-    assert_contains!(res.unwrap_err().to_string(), "CoalescePartitionsExec fetch [10] should be greater than or equal to SortExec fetch [15]");
+    assert_contains!(
+        res.unwrap_err().to_string(),
+        "CoalescePartitionsExec fetch [10] should be greater than or equal to SortExec fetch [15]"
+    );
 
     // Test sort is without fetch, expected to get the fetch value from the coalesced
     let requirements = OrderPreservationContext::new(
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index 9867ed1733413..217570846d56e 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -30,13 +30,13 @@ use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTab
 use datafusion::prelude::{CsvReadOptions, SessionContext};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{JoinType, Result, ScalarValue};
-use datafusion_physical_expr::expressions::{col, Literal};
 use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr::expressions::{Literal, col};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_optimizer::sanity_checker::SanityCheckPlan;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::sanity_checker::SanityCheckPlan;
 use datafusion_physical_plan::repartition::RepartitionExec;
-use datafusion_physical_plan::{displayable, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, displayable};
 
 use async_trait::async_trait;
 
@@ -555,11 +555,11 @@ async fn test_sort_merge_join_satisfied() -> Result<()> {
     assert_snapshot!(
         actual,
         @r"
-    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
-      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+    SortMergeJoinExec: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[0]
-      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "
@@ -605,8 +605,8 @@ async fn test_sort_merge_join_order_missing() -> Result<()> {
     assert_snapshot!(
         actual,
         @r"
-    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
-      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+    SortMergeJoinExec: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[0]
       RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
@@ -653,11 +653,11 @@ async fn test_sort_merge_join_dist_missing() -> Result<()> {
     assert_snapshot!(
         actual,
         @r"
-    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
-      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+    SortMergeJoinExec: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[0]
-      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
         SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "
diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs
index 8ca33f3d4abb9..5b50181d7fd3e 100644
--- a/datafusion/core/tests/physical_optimizer/test_utils.rs
+++ b/datafusion/core/tests/physical_optimizer/test_utils.rs
@@ -18,7 +18,7 @@
 //! Test utilities for physical optimizer tests
 
 use std::any::Any;
-use std::fmt::Formatter;
+use std::fmt::{Display, Formatter};
 use std::sync::{Arc, LazyLock};
 
 use arrow::array::Int32Array;
@@ -33,25 +33,29 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
-use datafusion_common::{ColumnStatistics, JoinType, NullEquality, Result, Statistics};
+use datafusion_common::{
+    ColumnStatistics, JoinType, NullEquality, Result, Statistics, internal_err,
+};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{WindowFrame, WindowFunctionDefinition};
 use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_physical_expr::EquivalenceProperties;
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::{self, col};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, OrderingRequirements, PhysicalSortExpr,
 };
-use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::joins::utils::{JoinFilter, JoinOn};
 use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode, SortMergeJoinExec};
@@ -63,18 +67,17 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::tree_node::PlanContext;
 use datafusion_physical_plan::union::UnionExec;
-use datafusion_physical_plan::windows::{create_window_expr, BoundedWindowAggExec};
+use datafusion_physical_plan::windows::{BoundedWindowAggExec, create_window_expr};
 use datafusion_physical_plan::{
-    displayable, DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode,
-    Partitioning, PlanProperties,
+    DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode, Partitioning,
+    PlanProperties, SortOrderPushdownResult, displayable,
 };
 
 /// Create a non sorted parquet exec
 pub fn parquet_exec(schema: SchemaRef) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
     .build();
@@ -89,8 +92,7 @@ pub(crate) fn parquet_exec_with_sort(
 ) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
     .with_output_ordering(output_ordering)
@@ -106,6 +108,7 @@ fn int64_stats() -> ColumnStatistics {
         max_value: Precision::Exact(1_000_000.into()),
         min_value: Precision::Exact(0.into()),
         distinct_count: Precision::Absent,
+        byte_size: Precision::Absent,
     }
 }
 
@@ -127,17 +130,13 @@ pub(crate) fn parquet_exec_with_stats(file_size: u64) -> Arc<DataSourceExec> {
 
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(ParquetSource::new(Default::default())),
+        Arc::new(ParquetSource::new(schema())),
     )
     .with_file(PartitionedFile::new("x".to_string(), file_size))
     .with_statistics(statistics)
     .build();
 
-    assert_eq!(
-        config.file_source.statistics().unwrap().num_rows,
-        Precision::Inexact(10000)
-    );
+    assert_eq!(config.statistics().num_rows, Precision::Inexact(10000));
     DataSourceExec::from_data_source(config)
 }
 
@@ -467,10 +466,11 @@ impl ExecutionPlan for RequirementsTestExec {
     }
 
     fn required_input_ordering(&self) -> Vec<Option<OrderingRequirements>> {
-        vec![self
-            .required_input_ordering
-            .as_ref()
-            .map(|ordering| OrderingRequirements::from(ordering.clone()))]
+        vec![
+            self.required_input_ordering
+                .as_ref()
+                .map(|ordering| OrderingRequirements::from(ordering.clone())),
+        ]
     }
 
     fn maintains_input_order(&self) -> Vec<bool> {
@@ -704,3 +704,278 @@ impl TestAggregate {
         }
     }
 }
+
+/// A harness for testing physical optimizers.
+#[derive(Debug)]
+pub struct OptimizationTest {
+    input: Vec<String>,
+    output: Result<Vec<String>, String>,
+}
+
+impl OptimizationTest {
+    pub fn new<O>(
+        input_plan: Arc<dyn ExecutionPlan>,
+        opt: O,
+        enable_sort_pushdown: bool,
+    ) -> Self
+    where
+        O: PhysicalOptimizerRule,
+    {
+        let input = format_execution_plan(&input_plan);
+        let input_schema = input_plan.schema();
+
+        let mut config = ConfigOptions::new();
+        config.optimizer.enable_sort_pushdown = enable_sort_pushdown;
+        let output_result = opt.optimize(input_plan, &config);
+        let output = output_result
+            .and_then(|plan| {
+                if opt.schema_check() && (plan.schema() != input_schema) {
+                    internal_err!(
+                        "Schema mismatch:\n\nBefore:\n{:?}\n\nAfter:\n{:?}",
+                        input_schema,
+                        plan.schema()
+                    )
+                } else {
+                    Ok(plan)
+                }
+            })
+            .map(|plan| format_execution_plan(&plan))
+            .map_err(|e| e.to_string());
+
+        Self { input, output }
+    }
+}
+
+impl Display for OptimizationTest {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        writeln!(f, "OptimizationTest:")?;
+        writeln!(f, "  input:")?;
+        for line in &self.input {
+            writeln!(f, "    - {line}")?;
+        }
+        writeln!(f, "  output:")?;
+        match &self.output {
+            Ok(output) => {
+                writeln!(f, "    Ok:")?;
+                for line in output {
+                    writeln!(f, "      - {line}")?;
+                }
+            }
+            Err(err) => {
+                writeln!(f, "    Err: {err}")?;
+            }
+        }
+        Ok(())
+    }
+}
+
+pub fn format_execution_plan(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
+    format_lines(&displayable(plan.as_ref()).indent(false).to_string())
+}
+
+fn format_lines(s: &str) -> Vec<String> {
+    s.trim().split('\n').map(|s| s.to_string()).collect()
+}
+
+/// Create a simple ProjectionExec with column indices (simplified version)
+pub fn simple_projection_exec(
+    input: Arc<dyn ExecutionPlan>,
+    columns: Vec<usize>,
+) -> Arc<dyn ExecutionPlan> {
+    let schema = input.schema();
+    let exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = columns
+        .iter()
+        .map(|&i| {
+            let field = schema.field(i);
+            (
+                Arc::new(expressions::Column::new(field.name(), i))
+                    as Arc<dyn PhysicalExpr>,
+                field.name().to_string(),
+            )
+        })
+        .collect();
+
+    projection_exec(exprs, input).unwrap()
+}
+
+/// Create a ProjectionExec with column aliases
+pub fn projection_exec_with_alias(
+    input: Arc<dyn ExecutionPlan>,
+    columns: Vec<(usize, &str)>,
+) -> Arc<dyn ExecutionPlan> {
+    let schema = input.schema();
+    let exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = columns
+        .iter()
+        .map(|&(i, alias)| {
+            (
+                Arc::new(expressions::Column::new(schema.field(i).name(), i))
+                    as Arc<dyn PhysicalExpr>,
+                alias.to_string(),
+            )
+        })
+        .collect();
+
+    projection_exec(exprs, input).unwrap()
+}
+
+/// Create a sort expression with custom name and index
+pub fn sort_expr_named(name: &str, index: usize) -> PhysicalSortExpr {
+    PhysicalSortExpr {
+        expr: Arc::new(expressions::Column::new(name, index)),
+        options: SortOptions::default(),
+    }
+}
+
+/// A test data source that can display any requested ordering
+/// This is useful for testing sort pushdown behavior
+#[derive(Debug, Clone)]
+pub struct TestScan {
+    schema: SchemaRef,
+    output_ordering: Vec<LexOrdering>,
+    plan_properties: PlanProperties,
+    // Store the requested ordering for display
+    requested_ordering: Option<LexOrdering>,
+}
+
+impl TestScan {
+    /// Create a new TestScan with the given schema and output ordering
+    pub fn new(schema: SchemaRef, output_ordering: Vec<LexOrdering>) -> Self {
+        let eq_properties = if !output_ordering.is_empty() {
+            // Convert Vec<LexOrdering> to the format expected by new_with_orderings
+            // We need to extract the inner Vec<PhysicalSortExpr> from each LexOrdering
+            let orderings: Vec<Vec<PhysicalSortExpr>> = output_ordering
+                .iter()
+                .map(|lex_ordering| {
+                    // LexOrdering implements IntoIterator, so we can collect it
+                    lex_ordering.iter().cloned().collect()
+                })
+                .collect();
+
+            EquivalenceProperties::new_with_orderings(Arc::clone(&schema), orderings)
+        } else {
+            EquivalenceProperties::new(Arc::clone(&schema))
+        };
+
+        let plan_properties = PlanProperties::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            EmissionType::Incremental,
+            Boundedness::Bounded,
+        );
+
+        Self {
+            schema,
+            output_ordering,
+            plan_properties,
+            requested_ordering: None,
+        }
+    }
+
+    /// Create a TestScan with a single output ordering
+    pub fn with_ordering(schema: SchemaRef, ordering: LexOrdering) -> Self {
+        Self::new(schema, vec![ordering])
+    }
+}
+
+impl DisplayAs for TestScan {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "TestScan")?;
+                if !self.output_ordering.is_empty() {
+                    write!(f, ": output_ordering=[")?;
+                    // Format the ordering in a readable way
+                    for (i, sort_expr) in self.output_ordering[0].iter().enumerate() {
+                        if i > 0 {
+                            write!(f, ", ")?;
+                        }
+                        write!(f, "{sort_expr}")?;
+                    }
+                    write!(f, "]")?;
+                }
+                // This is the key part - show what ordering was requested
+                if let Some(ref req) = self.requested_ordering {
+                    write!(f, ", requested_ordering=[")?;
+                    for (i, sort_expr) in req.iter().enumerate() {
+                        if i > 0 {
+                            write!(f, ", ")?;
+                        }
+                        write!(f, "{sort_expr}")?;
+                    }
+                    write!(f, "]")?;
+                }
+                Ok(())
+            }
+            DisplayFormatType::TreeRender => {
+                write!(f, "TestScan")
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for TestScan {
+    fn name(&self) -> &str {
+        "TestScan"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.plan_properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if children.is_empty() {
+            Ok(self)
+        } else {
+            internal_err!("TestScan should have no children")
+        }
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        internal_err!("TestScan is for testing optimizer only, not for execution")
+    }
+
+    fn partition_statistics(&self, _partition: Option<usize>) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    // This is the key method - implement sort pushdown
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // For testing purposes, accept ANY ordering request
+        // and create a new TestScan that shows what was requested
+        let requested_ordering = LexOrdering::new(order.to_vec());
+
+        let mut new_scan = self.clone();
+        new_scan.requested_ordering = requested_ordering;
+
+        // Always return Inexact to keep the Sort node (like Phase 1 behavior)
+        Ok(SortOrderPushdownResult::Inexact {
+            inner: Arc::new(new_scan),
+        })
+    }
+}
+
+/// Helper function to create a TestScan with ordering
+pub fn test_scan_with_ordering(
+    schema: SchemaRef,
+    ordering: LexOrdering,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(TestScan::with_ordering(schema, ordering))
+}
diff --git a/datafusion/core/tests/physical_optimizer/window_optimize.rs b/datafusion/core/tests/physical_optimizer/window_optimize.rs
index fc1e6444d756e..796f6b6259716 100644
--- a/datafusion/core/tests/physical_optimizer/window_optimize.rs
+++ b/datafusion/core/tests/physical_optimizer/window_optimize.rs
@@ -26,10 +26,10 @@ mod test {
     use datafusion_expr::WindowFrame;
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_physical_expr::aggregate::AggregateExprBuilder;
-    use datafusion_physical_expr::expressions::{col, Column};
+    use datafusion_physical_expr::expressions::{Column, col};
     use datafusion_physical_expr::window::PlainAggregateWindowExpr;
     use datafusion_physical_plan::windows::BoundedWindowAggExec;
-    use datafusion_physical_plan::{common, ExecutionPlan, InputOrderMode};
+    use datafusion_physical_plan::{ExecutionPlan, InputOrderMode, common};
     use std::sync::Arc;
 
     /// Test case for <https://github.com/apache/datafusion/issues/16308>
diff --git a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
deleted file mode 100644
index c3c92a9028d67..0000000000000
--- a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
+++ /dev/null
@@ -1,363 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::RecordBatch;
-use arrow_schema::{DataType, Field, Schema, SchemaRef};
-use bytes::{BufMut, BytesMut};
-use datafusion::common::Result;
-use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::physical_plan::{
-    ArrowSource, CsvSource, FileSource, JsonSource, ParquetSource,
-};
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion::prelude::SessionContext;
-use datafusion_common::ColumnStatistics;
-use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-use datafusion_datasource::schema_adapter::{
-    SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
-};
-use datafusion_datasource::source::DataSourceExec;
-use datafusion_execution::object_store::ObjectStoreUrl;
-use object_store::{memory::InMemory, path::Path, ObjectStore};
-use parquet::arrow::ArrowWriter;
-
-async fn write_parquet(batch: RecordBatch, store: Arc<dyn ObjectStore>, path: &str) {
-    let mut out = BytesMut::new().writer();
-    {
-        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.finish().unwrap();
-    }
-    let data = out.into_inner().freeze();
-    store.put(&Path::from(path), data.into()).await.unwrap();
-}
-
-/// A schema adapter factory that transforms column names to uppercase
-#[derive(Debug, PartialEq)]
-struct UppercaseAdapterFactory {}
-
-impl SchemaAdapterFactory for UppercaseAdapterFactory {
-    fn create(
-        &self,
-        projected_table_schema: SchemaRef,
-        _table_schema: SchemaRef,
-    ) -> Box<dyn SchemaAdapter> {
-        Box::new(UppercaseAdapter {
-            table_schema: projected_table_schema,
-        })
-    }
-}
-
-/// Schema adapter that transforms column names to uppercase
-#[derive(Debug)]
-struct UppercaseAdapter {
-    table_schema: SchemaRef,
-}
-
-impl SchemaAdapter for UppercaseAdapter {
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.table_schema.field(index);
-        let uppercase_name = field.name().to_uppercase();
-        file_schema
-            .fields()
-            .iter()
-            .position(|f| f.name().to_uppercase() == uppercase_name)
-    }
-
-    fn map_schema(
-        &self,
-        file_schema: &Schema,
-    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let mut projection = Vec::new();
-
-        // Map each field in the table schema to the corresponding field in the file schema
-        for table_field in self.table_schema.fields() {
-            let uppercase_name = table_field.name().to_uppercase();
-            if let Some(pos) = file_schema
-                .fields()
-                .iter()
-                .position(|f| f.name().to_uppercase() == uppercase_name)
-            {
-                projection.push(pos);
-            }
-        }
-
-        let mapper = UppercaseSchemaMapper {
-            output_schema: self.output_schema(),
-            projection: projection.clone(),
-        };
-
-        Ok((Arc::new(mapper), projection))
-    }
-}
-
-impl UppercaseAdapter {
-    fn output_schema(&self) -> SchemaRef {
-        let fields: Vec<Field> = self
-            .table_schema
-            .fields()
-            .iter()
-            .map(|f| {
-                Field::new(
-                    f.name().to_uppercase().as_str(),
-                    f.data_type().clone(),
-                    f.is_nullable(),
-                )
-            })
-            .collect();
-
-        Arc::new(Schema::new(fields))
-    }
-}
-
-#[derive(Debug)]
-struct UppercaseSchemaMapper {
-    output_schema: SchemaRef,
-    projection: Vec<usize>,
-}
-
-impl SchemaMapper for UppercaseSchemaMapper {
-    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let columns = self
-            .projection
-            .iter()
-            .map(|&i| batch.column(i).clone())
-            .collect::<Vec<_>>();
-        Ok(RecordBatch::try_new(self.output_schema.clone(), columns)?)
-    }
-
-    fn map_column_statistics(
-        &self,
-        stats: &[ColumnStatistics],
-    ) -> Result<Vec<ColumnStatistics>> {
-        Ok(self
-            .projection
-            .iter()
-            .map(|&i| stats.get(i).cloned().unwrap_or_default())
-            .collect())
-    }
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
-
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
-
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    // Create a table schema with uppercase column names
-    let table_schema = Arc::new(Schema::new(vec![
-        Field::new("ID", DataType::Int32, false),
-        Field::new("NAME", DataType::Utf8, true),
-    ]));
-
-    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
-        .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has the uppercase column names
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "ID");
-    assert_eq!(result_schema.field(1).name(), "NAME");
-
-    Ok(())
-}
-
-#[cfg(feature = "parquet")]
-#[tokio::test]
-async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
-) -> Result<()> {
-    // Create test data
-    let batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, true),
-        ])),
-        vec![
-            Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])),
-            Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])),
-        ],
-    )?;
-
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let store_url = ObjectStoreUrl::parse("memory://").unwrap();
-    let path = "test.parquet";
-    write_parquet(batch.clone(), store.clone(), path).await;
-
-    // Get the actual file size from the object store
-    let object_meta = store.head(&Path::from(path)).await?;
-    let file_size = object_meta.size;
-
-    // Create a session context and register the object store
-    let ctx = SessionContext::new();
-    ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
-
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
-    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
-        .with_file(PartitionedFile::new(path, file_size))
-        .build();
-
-    // Create a data source executor
-    let exec = DataSourceExec::from_data_source(config);
-
-    // Collect results
-    let task_ctx = ctx.task_ctx();
-    let stream = exec.execute(0, task_ctx)?;
-    let batches = datafusion::physical_plan::common::collect(stream).await?;
-
-    // There should be one batch
-    assert_eq!(batches.len(), 1);
-
-    // Verify the schema has the original column names (schema adapter not applied in DataSourceExec)
-    let result_schema = batches[0].schema();
-    assert_eq!(result_schema.field(0).name(), "id");
-    assert_eq!(result_schema.field(1).name(), "name");
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
-    // This test verifies that the same schema adapter factory can be reused
-    // across different file source types. This is important for ensuring that:
-    // 1. The schema adapter factory interface works uniformly across all source types
-    // 2. The factory can be shared and cloned efficiently using Arc
-    // 3. Various data source implementations correctly implement the schema adapter factory pattern
-
-    // Create a test factory
-    let factory = Arc::new(UppercaseAdapterFactory {});
-
-    // Test ArrowSource
-    {
-        let source = ArrowSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test ParquetSource
-    #[cfg(feature = "parquet")]
-    {
-        let source = ParquetSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test CsvSource
-    {
-        let source = CsvSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    // Test JsonSource
-    {
-        let source = JsonSource::default();
-        let source_with_adapter = source
-            .clone()
-            .with_schema_adapter_factory(factory.clone())
-            .unwrap();
-
-        let base_source: Arc<dyn FileSource> = source.into();
-        assert!(base_source.schema_adapter_factory().is_none());
-        assert!(source_with_adapter.schema_adapter_factory().is_some());
-
-        let retrieved_factory = source_with_adapter.schema_adapter_factory().unwrap();
-        assert_eq!(
-            format!("{:?}", retrieved_factory.as_ref()),
-            format!("{:?}", factory.as_ref())
-        );
-    }
-
-    Ok(())
-}
diff --git a/datafusion/core/tests/sql/aggregates/basic.rs b/datafusion/core/tests/sql/aggregates/basic.rs
index 4b421b5294e01..d1b376b735ab9 100644
--- a/datafusion/core/tests/sql/aggregates/basic.rs
+++ b/datafusion/core/tests/sql/aggregates/basic.rs
@@ -365,7 +365,7 @@ async fn count_distinct_dictionary_all_null_values() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +-----+---------------+
     | cnt | count(t.num2) |
     +-----+---------------+
@@ -375,7 +375,7 @@ async fn count_distinct_dictionary_all_null_values() -> Result<()> {
     | 0   | 1             |
     | 0   | 1             |
     +-----+---------------+
-    "###
+    "
     );
 
     // Test with multiple partitions
@@ -430,13 +430,13 @@ async fn count_distinct_dictionary_mixed_values() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +------------------------+
     | count(DISTINCT t.dict) |
     +------------------------+
     | 2                      |
     +------------------------+
-    "###
+    "
     );
 
     Ok(())
diff --git a/datafusion/core/tests/sql/aggregates/dict_nulls.rs b/datafusion/core/tests/sql/aggregates/dict_nulls.rs
index da4b2c8d25c9d..f9e15a71a20f8 100644
--- a/datafusion/core/tests/sql/aggregates/dict_nulls.rs
+++ b/datafusion/core/tests/sql/aggregates/dict_nulls.rs
@@ -34,7 +34,7 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results_count),
-        @r###"
+        @r"
     +----------------+-----+
     | dict_null_keys | cnt |
     +----------------+-----+
@@ -42,7 +42,7 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
     | group_a        | 2   |
     | group_b        | 1   |
     +----------------+-----+
-    "###
+    "
     );
 
     // Test SUM null handling with extended data
@@ -69,7 +69,7 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results_min),
-        @r###"
+        @r"
     +----------------+---------+
     | dict_null_keys | minimum |
     +----------------+---------+
@@ -78,7 +78,7 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
     | group_b        | 1       |
     | group_c        | 7       |
     +----------------+---------+
-    "###
+    "
     );
 
     // Test MEDIAN null handling with median data
@@ -168,7 +168,7 @@ async fn test_first_last_value_order_by_null_handling() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +------------+-------+--------------------+---------------------+-------------------+--------------------+
     | dict_group | value | first_ignore_nulls | first_respect_nulls | last_ignore_nulls | last_respect_nulls |
     +------------+-------+--------------------+---------------------+-------------------+--------------------+
@@ -178,7 +178,7 @@ async fn test_first_last_value_order_by_null_handling() -> Result<()> {
     | group_a    |       | 5                  |                     | 20                |                    |
     | group_b    |       | 5                  |                     | 20                |                    |
     +------------+-------+--------------------+---------------------+-------------------+--------------------+
-    "###
+    "
     );
 
     Ok(())
@@ -249,7 +249,7 @@ async fn test_first_last_value_group_by_dict_nulls() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +----------------+-----------+----------+-----+
     | dict_null_keys | first_val | last_val | cnt |
     +----------------+-----------+----------+-----+
@@ -257,7 +257,7 @@ async fn test_first_last_value_group_by_dict_nulls() -> Result<()> {
     | group_a        | 10        | 50       | 2   |
     | group_b        | 30        | 30       | 1   |
     +----------------+-----------+----------+-----+
-    "###
+    "
     );
 
     // Test GROUP BY with null values in dictionary
@@ -275,7 +275,7 @@ async fn test_first_last_value_group_by_dict_nulls() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results2),
-        @r###"
+        @r"
     +----------------+-----------+----------+-----+
     | dict_null_vals | first_val | last_val | cnt |
     +----------------+-----------+----------+-----+
@@ -283,7 +283,7 @@ async fn test_first_last_value_group_by_dict_nulls() -> Result<()> {
     | val_x          | 10        | 50       | 2   |
     | val_y          | 30        | 30       | 1   |
     +----------------+-----------+----------+-----+
-    "###
+    "
     );
 
     Ok(())
@@ -394,7 +394,7 @@ async fn test_count_distinct_with_fuzz_table_dict_nulls() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&results),
-        @r###"
+        @r"
     +--------+----------+---------------------+------+------+
     | u8_low | utf8_low | dictionary_utf8_low | col1 | col2 |
     +--------+----------+---------------------+------+------+
@@ -405,7 +405,7 @@ async fn test_count_distinct_with_fuzz_table_dict_nulls() -> Result<()> {
     | 20     | text_e   |                     | 0    | 1    |
     | 25     | text_f   | group_gamma         | 1    | 1    |
     +--------+----------+---------------------+------+------+
-    "###
+    "
     );
 
     Ok(())
diff --git a/datafusion/core/tests/sql/aggregates/mod.rs b/datafusion/core/tests/sql/aggregates/mod.rs
index 321c158628e43..ede40d5c4ceca 100644
--- a/datafusion/core/tests/sql/aggregates/mod.rs
+++ b/datafusion/core/tests/sql/aggregates/mod.rs
@@ -20,15 +20,15 @@
 use super::*;
 use arrow::{
     array::{
-        types::UInt32Type, Decimal128Array, DictionaryArray, DurationNanosecondArray,
-        Int32Array, LargeBinaryArray, StringArray, TimestampMicrosecondArray,
-        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+        Decimal128Array, DictionaryArray, DurationNanosecondArray, Int32Array,
+        LargeBinaryArray, StringArray, TimestampMicrosecondArray, UInt8Array,
+        UInt16Array, UInt32Array, UInt64Array, types::UInt32Type,
     },
     datatypes::{DataType, Field, Schema, TimeUnit},
     record_batch::RecordBatch,
 };
 use datafusion::{
-    common::{test_util::batches_to_string, Result},
+    common::{Result, test_util::batches_to_string},
     execution::{config::SessionConfig, context::SessionContext},
 };
 use datafusion_catalog::MemTable;
@@ -959,8 +959,8 @@ impl FuzzTimestampTestData {
 }
 
 /// Sets up test contexts for fuzz table with timestamps and both single and multiple partitions
-pub async fn setup_fuzz_timestamp_test_contexts(
-) -> Result<(SessionContext, SessionContext)> {
+pub async fn setup_fuzz_timestamp_test_contexts()
+-> Result<(SessionContext, SessionContext)> {
     let test_data = FuzzTimestampTestData::new();
 
     // Single partition context
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 26b71b5496f29..75cd78e47aff5 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -61,12 +61,9 @@ async fn explain_analyze_baseline_metrics() {
     assert_metrics!(
         &formatted,
         "AggregateExec: mode=Partial, gby=[]",
-        "metrics=[output_rows=3, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "AggregateExec: mode=Partial, gby=[]",
-        "output_bytes="
+        "metrics=[output_rows=3, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=3"
     );
 
     assert_metrics!(
@@ -75,59 +72,68 @@ async fn explain_analyze_baseline_metrics() {
         "reduction_factor=5.1% (5/99)"
     );
 
-    assert_metrics!(
-        &formatted,
-        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
-        "metrics=[output_rows=5, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
-        "output_bytes="
-    );
-    assert_metrics!(
-        &formatted,
-        "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
-        "metrics=[output_rows=99, elapsed_compute="
-    );
+    {
+        let expected_batch_count_after_repartition =
+            if cfg!(not(feature = "force_hash_collisions")) {
+                "output_batches=3"
+            } else {
+                "output_batches=1"
+            };
+
+        assert_metrics!(
+            &formatted,
+            "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+
+        assert_metrics!(
+            &formatted,
+            "RepartitionExec: partitioning=Hash([c1@0], 3), input_partitions=3",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+
+        assert_metrics!(
+            &formatted,
+            "ProjectionExec: expr=[]",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+    }
+
     assert_metrics!(
         &formatted,
         "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
-        "output_bytes="
+        "metrics=[output_rows=99, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=1"
     );
+
     assert_metrics!(
         &formatted,
         "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
         "selectivity=99% (99/100)"
     );
-    assert_metrics!(
-        &formatted,
-        "ProjectionExec: expr=[]",
-        "metrics=[output_rows=5, elapsed_compute="
-    );
-    assert_metrics!(&formatted, "ProjectionExec: expr=[]", "output_bytes=");
-    assert_metrics!(
-        &formatted,
-        "CoalesceBatchesExec: target_batch_size=4096",
-        "metrics=[output_rows=5, elapsed_compute"
-    );
-    assert_metrics!(
-        &formatted,
-        "CoalesceBatchesExec: target_batch_size=4096",
-        "output_bytes="
-    );
+
     assert_metrics!(
         &formatted,
         "UnionExec",
-        "metrics=[output_rows=3, elapsed_compute="
+        "metrics=[output_rows=3, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=3"
     );
-    assert_metrics!(&formatted, "UnionExec", "output_bytes=");
+
     assert_metrics!(
         &formatted,
         "WindowAggExec",
-        "metrics=[output_rows=1, elapsed_compute="
+        "metrics=[output_rows=1, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=1"
     );
-    assert_metrics!(&formatted, "WindowAggExec", "output_bytes=");
 
     fn expected_to_have_metrics(plan: &dyn ExecutionPlan) -> bool {
         use datafusion::physical_plan;
@@ -228,9 +234,13 @@ async fn explain_analyze_level() {
 
     for (level, needle, should_contain) in [
         (ExplainAnalyzeLevel::Summary, "spill_count", false),
+        (ExplainAnalyzeLevel::Summary, "output_batches", false),
         (ExplainAnalyzeLevel::Summary, "output_rows", true),
+        (ExplainAnalyzeLevel::Summary, "output_bytes", true),
         (ExplainAnalyzeLevel::Dev, "spill_count", true),
         (ExplainAnalyzeLevel::Dev, "output_rows", true),
+        (ExplainAnalyzeLevel::Dev, "output_bytes", true),
+        (ExplainAnalyzeLevel::Dev, "output_batches", true),
     ] {
         let plan = collect_plan(sql, level).await;
         assert_eq!(
@@ -336,12 +346,12 @@ async fn csv_explain_plans() {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Explain
       Projection: aggregate_test_100.c1
         Filter: aggregate_test_100.c2 > Int64(10)
           TableScan: aggregate_test_100
-    "###
+    "
     );
     //
     // verify the grahviz format of the plan
@@ -407,13 +417,12 @@ async fn csv_explain_plans() {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Explain
       Projection: aggregate_test_100.c1
         Filter: aggregate_test_100.c2 > Int8(10)
           TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
-
-    "###
+    "
     );
     //
     // verify the grahviz format of the plan
@@ -553,12 +562,12 @@ async fn csv_explain_verbose_plans() {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Explain
       Projection: aggregate_test_100.c1
         Filter: aggregate_test_100.c2 > Int64(10)
           TableScan: aggregate_test_100
-    "###
+    "
     );
     //
     // verify the grahviz format of the plan
@@ -624,12 +633,12 @@ async fn csv_explain_verbose_plans() {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Explain
       Projection: aggregate_test_100.c1
         Filter: aggregate_test_100.c2 > Int8(10)
           TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
-    "###
+    "
     );
     //
     // verify the grahviz format of the plan
@@ -748,19 +757,17 @@ async fn test_physical_plan_display_indent() {
 
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     SortPreservingMergeExec: [the_min@2 DESC], fetch=10
       SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]
         ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]
           AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]
-            CoalesceBatchesExec: target_batch_size=4096
-              RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000
-                AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]
-                  CoalesceBatchesExec: target_batch_size=4096
-                    FilterExec: c12@1 < 10
-                      RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
-                        DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], file_type=csv, has_header=true
-    "###
+            RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000
+              AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]
+                FilterExec: c12@1 < 10
+                  RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
+                    DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], file_type=csv, has_header=true
+    "
     );
 }
 
@@ -794,19 +801,13 @@ async fn test_physical_plan_display_indent_multi_children() {
 
     assert_snapshot!(
         actual,
-        @r###"
-    CoalesceBatchesExec: target_batch_size=4096
-      HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0]
-        CoalesceBatchesExec: target_batch_size=4096
-          RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000
-            RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
-        CoalesceBatchesExec: target_batch_size=4096
-          RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000
-            RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
-              ProjectionExec: expr=[c1@0 as c2]
-                DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
-    "###
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0]
+      RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+      RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1@0 as c2], file_type=csv, has_header=true
+    "
     );
 }
 
@@ -845,8 +846,7 @@ async fn csv_explain_analyze_order_by() {
 
     // Ensure that the ordering is not optimized away from the plan
     // https://github.com/apache/datafusion/issues/6379
-    let needle =
-        "SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=100, elapsed_compute";
+    let needle = "SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=100, elapsed_compute";
     assert_contains!(&formatted, needle);
 }
 
@@ -872,6 +872,7 @@ async fn parquet_explain_analyze() {
         &formatted,
         "row_groups_pruned_statistics=1 total \u{2192} 1 matched"
     );
+    assert_contains!(&formatted, "scan_efficiency_ratio=14%");
 
     // The order of metrics is expected to be the same as the actual pruning order
     // (file-> row-group -> page)
@@ -885,7 +886,7 @@ async fn parquet_explain_analyze() {
         (i_file < i_rowgroup_stat)
             && (i_rowgroup_stat < i_rowgroup_bloomfilter)
             && (i_rowgroup_bloomfilter < i_page),
-            "The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index."
+        "The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index."
     );
 }
 
@@ -997,16 +998,14 @@ async fn parquet_recursive_projection_pushdown() -> Result<()> {
       RecursiveQueryExec: name=number_series, is_distinct=false
         CoalescePartitionsExec
           ProjectionExec: expr=[id@0 as id, 1 as level]
-            CoalesceBatchesExec: target_batch_size=8192
-              FilterExec: id@0 = 1
-                RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES), input_partitions=1
-                  DataSourceExec: file_groups={1 group: [[TMP_DIR/hierarchy.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 = 1, pruning_predicate=id_null_count@2 != row_count@3 AND id_min@0 <= 1 AND 1 <= id_max@1, required_guarantees=[id in (1)]
+            FilterExec: id@0 = 1
+              RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[TMP_DIR/hierarchy.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 = 1, pruning_predicate=id_null_count@2 != row_count@3 AND id_min@0 <= 1 AND 1 <= id_max@1, required_guarantees=[id in (1)]
         CoalescePartitionsExec
           ProjectionExec: expr=[id@0 + 1 as ns.id + Int64(1), level@1 + 1 as ns.level + Int64(1)]
-            CoalesceBatchesExec: target_batch_size=8192
-              FilterExec: id@0 < 10
-                RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES), input_partitions=1
-                  WorkTableExec: name=number_series
+            FilterExec: id@0 < 10
+              RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES), input_partitions=1
+                WorkTableExec: name=number_series
     "
     );
 
@@ -1082,11 +1081,11 @@ async fn explain_physical_plan_only() {
 
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     physical_plan
     ProjectionExec: expr=[2 as count(*)]
       PlaceholderRowExec
-    "###
+    "
     );
 }
 
@@ -1140,3 +1139,24 @@ async fn nested_loop_join_selectivity() {
         );
     }
 }
+
+#[tokio::test]
+async fn explain_analyze_hash_join() {
+    let sql = "EXPLAIN ANALYZE \
+            SELECT * \
+            FROM generate_series(10) as t1(a) \
+            JOIN generate_series(20) as t2(b) \
+            ON t1.a=t2.b";
+
+    for (level, needle, should_contain) in [
+        (ExplainAnalyzeLevel::Summary, "probe_hit_rate", true),
+        (ExplainAnalyzeLevel::Summary, "avg_fanout", true),
+    ] {
+        let plan = collect_plan(sql, level).await;
+        assert_eq!(
+            plan.contains(needle),
+            should_contain,
+            "plan for level {level:?} unexpected content: {plan}"
+        );
+    }
+}
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 7a59834475920..7c0e89ee96418 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -38,14 +38,16 @@ async fn join_change_in_planner() -> Result<()> {
         Field::new("a2", DataType::UInt32, false),
     ]));
     // Specify the ordering:
-    let file_sort_order = vec![[col("a1")]
-        .into_iter()
-        .map(|e| {
-            let ascending = true;
-            let nulls_first = false;
-            e.sort(ascending, nulls_first)
-        })
-        .collect::<Vec<_>>()];
+    let file_sort_order = vec![
+        [col("a1")]
+            .into_iter()
+            .map(|e| {
+                let ascending = true;
+                let nulls_first = false;
+                e.sort(ascending, nulls_first)
+            })
+            .collect::<Vec<_>>(),
+    ];
     register_unbounded_file_with_ordering(
         &ctx,
         schema.clone(),
@@ -72,14 +74,10 @@ async fn join_change_in_planner() -> Result<()> {
         actual,
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1, maintains_sort_order=true
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1, maintains_sort_order=true
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
     "
     );
     Ok(())
@@ -99,14 +97,16 @@ async fn join_no_order_on_filter() -> Result<()> {
         Field::new("a3", DataType::UInt32, false),
     ]));
     // Specify the ordering:
-    let file_sort_order = vec![[col("a1")]
-        .into_iter()
-        .map(|e| {
-            let ascending = true;
-            let nulls_first = false;
-            e.sort(ascending, nulls_first)
-        })
-        .collect::<Vec<_>>()];
+    let file_sort_order = vec![
+        [col("a1")]
+            .into_iter()
+            .map(|e| {
+                let ascending = true;
+                let nulls_first = false;
+                e.sort(ascending, nulls_first)
+            })
+            .collect::<Vec<_>>(),
+    ];
     register_unbounded_file_with_ordering(
         &ctx,
         schema.clone(),
@@ -133,14 +133,10 @@ async fn join_no_order_on_filter() -> Result<()> {
         actual,
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a3@0 AS Int64) > CAST(a3@1 AS Int64) + 3 AND CAST(a3@0 AS Int64) < CAST(a3@1 AS Int64) + 10
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1, maintains_sort_order=true
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1, maintains_sort_order=true
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
     "
     );
     Ok(())
@@ -176,14 +172,10 @@ async fn join_change_in_planner_without_sort() -> Result<()> {
         actual,
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
-      CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+      RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+        StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
     "
     );
     Ok(())
@@ -214,7 +206,10 @@ async fn join_change_in_planner_without_sort_not_allowed() -> Result<()> {
     match df.create_physical_plan().await {
         Ok(_) => panic!("Expecting error."),
         Err(e) => {
-            assert_eq!(e.strip_backtrace(), "SanityCheckPlan\ncaused by\nError during planning: Join operation cannot operate on a non-prunable stream without enabling the 'allow_symmetric_joins_without_pruning' configuration flag")
+            assert_eq!(
+                e.strip_backtrace(),
+                "SanityCheckPlan\ncaused by\nError during planning: Join operation cannot operate on a non-prunable stream without enabling the 'allow_symmetric_joins_without_pruning' configuration flag"
+            )
         }
     }
     Ok(())
@@ -295,16 +290,12 @@ async fn unparse_cross_join() -> Result<()> {
         .await?;
 
     let unopt_sql = plan_to_sql(df.logical_plan())?;
-    assert_snapshot!(unopt_sql, @r#"
-        SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0)
-    "#);
+    assert_snapshot!(unopt_sql, @"SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0)");
 
     let optimized_plan = df.into_optimized_plan()?;
 
     let opt_sql = plan_to_sql(&optimized_plan)?;
-    assert_snapshot!(opt_sql, @r#"
-        SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0)
-    "#);
+    assert_snapshot!(opt_sql, @"SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0)");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 743c8750b5215..9a1dc5502ee60 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -24,10 +24,10 @@ use arrow::{
 
 use datafusion::error::Result;
 use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan};
-use datafusion::physical_plan::collect;
-use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_plan::ExecutionPlanVisitor;
+use datafusion::physical_plan::collect;
+use datafusion::physical_plan::metrics::MetricValue;
 use datafusion::prelude::*;
 use datafusion::test_util;
 use datafusion::{execution::context::SessionContext, physical_plan::displayable};
@@ -40,18 +40,24 @@ use std::io::Write;
 use std::path::PathBuf;
 use tempfile::TempDir;
 
-/// A macro to assert that some particular line contains two substrings
+/// A macro to assert that some particular line contains the given substrings
 ///
-/// Usage: `assert_metrics!(actual, operator_name, metrics)`
+/// Usage: `assert_metrics!(actual, operator_name, metrics_1, metrics_2, ...)`
 macro_rules! assert_metrics {
-    ($ACTUAL: expr, $OPERATOR_NAME: expr, $METRICS: expr) => {
+    ($ACTUAL: expr, $OPERATOR_NAME: expr, $($METRICS: expr),+) => {
         let found = $ACTUAL
             .lines()
-            .any(|line| line.contains($OPERATOR_NAME) && line.contains($METRICS));
+            .any(|line| line.contains($OPERATOR_NAME) $( && line.contains($METRICS))+);
+
+        let mut metrics = String::new();
+        $(metrics.push_str(format!(" '{}',", $METRICS).as_str());)+
+        // remove the last `,` from the string
+        metrics.pop();
+
         assert!(
             found,
-            "Can not find a line with both '{}' and '{}' in\n\n{}",
-            $OPERATOR_NAME, $METRICS, $ACTUAL
+            "Cannot find a line with operator name '{}' and metrics containing values {} in :\n\n{}",
+            $OPERATOR_NAME, metrics, $ACTUAL
         );
     };
 }
@@ -64,6 +70,7 @@ mod path_partition;
 mod runtime_config;
 pub mod select;
 mod sql_api;
+mod unparser;
 
 async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
     let testdata = test_util::arrow_test_data();
@@ -329,8 +336,7 @@ async fn nyc() -> Result<()> {
     match &optimized_plan {
         LogicalPlan::Aggregate(Aggregate { input, .. }) => match input.as_ref() {
             LogicalPlan::TableScan(TableScan {
-                ref projected_schema,
-                ..
+                projected_schema, ..
             }) => {
                 assert_eq!(2, projected_schema.fields().len());
                 assert_eq!(projected_schema.field(0).name(), "passenger_count");
diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
index 05cc723ef05fb..c6f920584dc2b 100644
--- a/datafusion/core/tests/sql/path_partition.rs
+++ b/datafusion/core/tests/sql/path_partition.rs
@@ -31,14 +31,13 @@ use datafusion::{
         listing::{ListingOptions, ListingTable, ListingTableConfig},
     },
     error::Result,
-    physical_plan::ColumnStatistics,
     prelude::SessionContext,
     test_util::{self, arrow_test_data, parquet_test_data},
 };
 use datafusion_catalog::TableProvider;
+use datafusion_common::ScalarValue;
 use datafusion_common::stats::Precision;
 use datafusion_common::test_util::batches_to_sort_string;
-use datafusion_common::ScalarValue;
 use datafusion_execution::config::SessionConfig;
 
 use async_trait::async_trait;
@@ -46,11 +45,11 @@ use bytes::Bytes;
 use chrono::{TimeZone, Utc};
 use futures::stream::{self, BoxStream};
 use insta::assert_snapshot;
+use object_store::{Attributes, MultipartUpload, PutMultipartOptions, PutPayload};
 use object_store::{
-    path::Path, GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta,
-    ObjectStore, PutOptions, PutResult,
+    GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore,
+    PutOptions, PutResult, path::Path,
 };
-use object_store::{Attributes, MultipartUpload, PutMultipartOptions, PutPayload};
 use url::Url;
 
 #[tokio::test]
@@ -464,10 +463,19 @@ async fn parquet_statistics() -> Result<()> {
     assert_eq!(stat_cols.len(), 4);
     // stats for the first col are read from the parquet file
     assert_eq!(stat_cols[0].null_count, Precision::Exact(3));
-    // TODO assert partition column (1,2,3) stats once implemented (#1186)
-    assert_eq!(stat_cols[1], ColumnStatistics::new_unknown(),);
-    assert_eq!(stat_cols[2], ColumnStatistics::new_unknown(),);
-    assert_eq!(stat_cols[3], ColumnStatistics::new_unknown(),);
+    // Partition column statistics (year=2021 for all 3 rows)
+    assert_eq!(stat_cols[1].null_count, Precision::Exact(0));
+    assert_eq!(
+        stat_cols[1].min_value,
+        Precision::Exact(ScalarValue::Int32(Some(2021)))
+    );
+    assert_eq!(
+        stat_cols[1].max_value,
+        Precision::Exact(ScalarValue::Int32(Some(2021)))
+    );
+    // month and day are Utf8 partition columns with statistics
+    assert_eq!(stat_cols[2].null_count, Precision::Exact(0));
+    assert_eq!(stat_cols[3].null_count, Precision::Exact(0));
 
     //// WITH PROJECTION ////
     let dataframe = ctx.sql("SELECT mycol, day FROM t WHERE day='28'").await?;
@@ -479,8 +487,16 @@ async fn parquet_statistics() -> Result<()> {
     assert_eq!(stat_cols.len(), 2);
     // stats for the first col are read from the parquet file
     assert_eq!(stat_cols[0].null_count, Precision::Exact(1));
-    // TODO assert partition column stats once implemented (#1186)
-    assert_eq!(stat_cols[1], ColumnStatistics::new_unknown());
+    // Partition column statistics for day='28' (1 row)
+    assert_eq!(stat_cols[1].null_count, Precision::Exact(0));
+    assert_eq!(
+        stat_cols[1].min_value,
+        Precision::Exact(ScalarValue::Utf8(Some("28".to_string())))
+    );
+    assert_eq!(
+        stat_cols[1].max_value,
+        Precision::Exact(ScalarValue::Utf8(Some("28".to_string())))
+    );
 
     Ok(())
 }
diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs
index 9627d7bccdb04..d85892c254570 100644
--- a/datafusion/core/tests/sql/runtime_config.rs
+++ b/datafusion/core/tests/sql/runtime_config.rs
@@ -18,9 +18,14 @@
 //! Tests for runtime configuration SQL interface
 
 use std::sync::Arc;
+use std::time::Duration;
 
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::context::TaskContext;
+use datafusion::prelude::SessionConfig;
+use datafusion_execution::cache::DefaultListFilesCache;
+use datafusion_execution::cache::cache_manager::CacheManagerConfig;
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 use datafusion_physical_plan::common::collect;
 
 #[tokio::test]
@@ -233,6 +238,93 @@ async fn test_test_metadata_cache_limit() {
     assert_eq!(get_limit(&ctx), 123 * 1024);
 }
 
+#[tokio::test]
+async fn test_list_files_cache_limit() {
+    let list_files_cache = Arc::new(DefaultListFilesCache::default());
+
+    let rt = RuntimeEnvBuilder::new()
+        .with_cache_manager(
+            CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)),
+        )
+        .build_arc()
+        .unwrap();
+
+    let ctx = SessionContext::new_with_config_rt(SessionConfig::default(), rt);
+
+    let update_limit = async |ctx: &SessionContext, limit: &str| {
+        ctx.sql(
+            format!("SET datafusion.runtime.list_files_cache_limit = '{limit}'").as_str(),
+        )
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+    };
+
+    let get_limit = |ctx: &SessionContext| -> usize {
+        ctx.task_ctx()
+            .runtime_env()
+            .cache_manager
+            .get_list_files_cache()
+            .unwrap()
+            .cache_limit()
+    };
+
+    update_limit(&ctx, "100M").await;
+    assert_eq!(get_limit(&ctx), 100 * 1024 * 1024);
+
+    update_limit(&ctx, "2G").await;
+    assert_eq!(get_limit(&ctx), 2 * 1024 * 1024 * 1024);
+
+    update_limit(&ctx, "123K").await;
+    assert_eq!(get_limit(&ctx), 123 * 1024);
+}
+
+#[tokio::test]
+async fn test_list_files_cache_ttl() {
+    let list_files_cache = Arc::new(DefaultListFilesCache::default());
+
+    let rt = RuntimeEnvBuilder::new()
+        .with_cache_manager(
+            CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)),
+        )
+        .build_arc()
+        .unwrap();
+
+    let ctx = SessionContext::new_with_config_rt(SessionConfig::default(), rt);
+
+    let update_limit = async |ctx: &SessionContext, limit: &str| {
+        ctx.sql(
+            format!("SET datafusion.runtime.list_files_cache_ttl = '{limit}'").as_str(),
+        )
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+    };
+
+    let get_limit = |ctx: &SessionContext| -> Duration {
+        ctx.task_ctx()
+            .runtime_env()
+            .cache_manager
+            .get_list_files_cache()
+            .unwrap()
+            .cache_ttl()
+            .unwrap()
+    };
+
+    update_limit(&ctx, "1m").await;
+    assert_eq!(get_limit(&ctx), Duration::from_secs(60));
+
+    update_limit(&ctx, "30s").await;
+    assert_eq!(get_limit(&ctx), Duration::from_secs(30));
+
+    update_limit(&ctx, "1m30s").await;
+    assert_eq!(get_limit(&ctx), Duration::from_secs(90));
+}
+
 #[tokio::test]
 async fn test_unknown_runtime_config() {
     let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index 8a0f620627384..6126793145efd 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -18,8 +18,7 @@
 use std::collections::HashMap;
 
 use super::*;
-use datafusion::assert_batches_eq;
-use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue};
+use datafusion_common::{ParamValues, ScalarValue, metadata::ScalarAndMetadata};
 use insta::assert_snapshot;
 
 #[tokio::test]
@@ -223,10 +222,10 @@ async fn test_parameter_invalid_types() -> Result<()> {
         .await;
     assert_snapshot!(results.unwrap_err().strip_backtrace(),
         @r"
-        type_coercion
-        caused by
-        Error during planning: Cannot infer common argument type for comparison operation List(nullable Int32) = Int32
-        ");
+    type_coercion
+    caused by
+    Error during planning: Cannot infer common argument type for comparison operation List(Int32) = Int32
+    ");
     Ok(())
 }
 
@@ -343,26 +342,20 @@ async fn test_query_parameters_with_metadata() -> Result<()> {
         ]))
         .unwrap();
 
-    // df_with_params_replaced.schema() is not correct here
-    // https://github.com/apache/datafusion/issues/18102
-    let batches = df_with_params_replaced.clone().collect().await.unwrap();
-    let schema = batches[0].schema();
-
+    let schema = df_with_params_replaced.schema();
     assert_eq!(schema.field(0).data_type(), &DataType::UInt32);
     assert_eq!(schema.field(0).metadata(), &metadata1);
     assert_eq!(schema.field(1).data_type(), &DataType::Utf8);
     assert_eq!(schema.field(1).metadata(), &metadata2);
 
-    assert_batches_eq!(
-        [
-            "+----+-----+",
-            "| $1 | $2  |",
-            "+----+-----+",
-            "| 1  | two |",
-            "+----+-----+",
-        ],
-        &batches
-    );
+    let batches = df_with_params_replaced.collect().await.unwrap();
+    assert_snapshot!(batches_to_sort_string(&batches), @r"
+    +----+-----+
+    | $1 | $2  |
+    +----+-----+
+    | 1  | two |
+    +----+-----+
+    ");
 
     Ok(())
 }
@@ -421,3 +414,20 @@ async fn test_select_no_projection() -> Result<()> {
     ");
     Ok(())
 }
+
+#[tokio::test]
+async fn test_select_cast_date_literal_to_timestamp_overflow() -> Result<()> {
+    let ctx = SessionContext::new();
+    let err = ctx
+        .sql("SELECT CAST(DATE '9999-12-31' AS TIMESTAMP)")
+        .await?
+        .collect()
+        .await
+        .unwrap_err();
+
+    assert_contains!(
+        err.to_string(),
+        "Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"
+    );
+    Ok(())
+}
diff --git a/datafusion/core/tests/sql/unparser.rs b/datafusion/core/tests/sql/unparser.rs
new file mode 100644
index 0000000000000..8b56bf67a261c
--- /dev/null
+++ b/datafusion/core/tests/sql/unparser.rs
@@ -0,0 +1,462 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! SQL Unparser Roundtrip Integration Tests
+//!
+//! This module tests the [`Unparser`] by running queries through a complete roundtrip:
+//! the original SQL is parsed into a logical plan, unparsed back to SQL, then that
+//! generated SQL is parsed and executed. The results are compared to verify semantic
+//! equivalence.
+//!
+//! ## Test Strategy
+//!
+//! Uses real-world benchmark queries (TPC-H and Clickbench) to validate that:
+//! 1. The unparser produces syntactically valid SQL
+//! 2. The unparsed SQL is semantically equivalent (produces identical results)
+//!
+//! ## Query Suites
+//!
+//! - **TPC-H**: Standard decision-support benchmark with 22 complex analytical queries
+//! - **Clickbench**: Web analytics benchmark with 43 queries against a denormalized schema
+//!
+//! [`Unparser`]: datafusion_sql::unparser::Unparser
+
+use std::fs::ReadDir;
+use std::future::Future;
+
+use arrow::array::RecordBatch;
+use datafusion::common::Result;
+use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use datafusion_common::Column;
+use datafusion_expr::Expr;
+use datafusion_physical_plan::ExecutionPlanProperties;
+use datafusion_sql::unparser::Unparser;
+use datafusion_sql::unparser::dialect::DefaultDialect;
+use itertools::Itertools;
+
+/// Paths to benchmark query files (supports running from repo root or different working directories).
+const BENCHMARK_PATHS: &[&str] = &["../../benchmarks/", "./benchmarks/"];
+
+/// Reads all `.sql` files from a directory and converts them to test queries.
+///
+/// Skips files that:
+/// - Are not regular files
+/// - Don't have a `.sql` extension
+/// - Contain multiple SQL statements (indicated by `;\n`)
+///
+/// Multi-statement files are skipped because the unparser doesn't support
+/// DML statements like `CREATE VIEW` that appear in multi-statement Clickbench queries.
+fn iterate_queries(dir: ReadDir) -> Vec<TestQuery> {
+    let mut queries = vec![];
+    for entry in dir.flatten() {
+        let Ok(file_type) = entry.file_type() else {
+            continue;
+        };
+        if !file_type.is_file() {
+            continue;
+        }
+        let path = entry.path();
+        let Some(ext) = path.extension() else {
+            continue;
+        };
+        if ext != "sql" {
+            continue;
+        }
+        let name = path.file_stem().unwrap().to_string_lossy().to_string();
+        if let Ok(mut contents) = std::fs::read_to_string(entry.path()) {
+            // If the query contains ;\n it has DML statements like CREATE VIEW which the unparser doesn't support; skip it
+            contents = contents.trim().to_string();
+            if contents.contains(";\n") {
+                println!("Skipping query with multiple statements: {name}");
+                continue;
+            }
+            queries.push(TestQuery {
+                sql: contents,
+                name,
+            });
+        }
+    }
+    queries
+}
+
+/// A SQL query loaded from a benchmark file for roundtrip testing.
+///
+/// Each query is identified by its filename (without extension) and contains
+/// the full SQL text to be tested.
+struct TestQuery {
+    /// The SQL query text to test.
+    sql: String,
+    /// The query identifier (typically the filename without .sql extension).
+    name: String,
+}
+
+/// Collect SQL for Clickbench queries.
+fn clickbench_queries() -> Vec<TestQuery> {
+    let mut queries = vec![];
+    for path in BENCHMARK_PATHS {
+        let dir = format!("{path}queries/clickbench/queries/");
+        println!("Reading Clickbench queries from {dir}");
+        if let Ok(dir) = std::fs::read_dir(dir) {
+            let read = iterate_queries(dir);
+            println!("Found {} Clickbench queries", read.len());
+            queries.extend(read);
+        }
+    }
+    queries.sort_unstable_by_key(|q| {
+        q.name
+            .split('q')
+            .next_back()
+            .and_then(|num| num.parse::<u32>().ok())
+    });
+    queries
+}
+
+/// Collect SQL for TPC-H queries.
+fn tpch_queries() -> Vec<TestQuery> {
+    let mut queries = vec![];
+    for path in BENCHMARK_PATHS {
+        let dir = format!("{path}queries/");
+        println!("Reading TPC-H queries from {dir}");
+        if let Ok(dir) = std::fs::read_dir(dir) {
+            let read = iterate_queries(dir);
+            queries.extend(read);
+        }
+    }
+    println!("Total TPC-H queries found: {}", queries.len());
+    queries.sort_unstable_by_key(|q| q.name.clone());
+    queries
+}
+
+/// Create a new SessionContext for testing that has all Clickbench tables registered.
+async fn clickbench_test_context() -> Result<SessionContext> {
+    let ctx = SessionContext::new();
+    ctx.register_parquet(
+        "hits",
+        "tests/data/clickbench_hits_10.parquet",
+        ParquetReadOptions::default(),
+    )
+    .await?;
+    // Sanity check we found the table by querying it's schema, it should not be empty
+    // Otherwise if the path is wrong the tests will all fail in confusing ways
+    let df = ctx.sql("SELECT * FROM hits LIMIT 1").await?;
+    assert!(
+        !df.schema().fields().is_empty(),
+        "Clickbench 'hits' table not registered correctly"
+    );
+    Ok(ctx)
+}
+
+/// Create a new SessionContext for testing that has all TPC-H tables registered.
+async fn tpch_test_context() -> Result<SessionContext> {
+    let ctx = SessionContext::new();
+    let data_dir = "tests/data/";
+    // All tables have the pattern "tpch_<table_name>_small.parquet"
+    for table in [
+        "customer", "lineitem", "nation", "orders", "part", "partsupp", "region",
+        "supplier",
+    ] {
+        let path = format!("{data_dir}tpch_{table}_small.parquet");
+        ctx.register_parquet(table, &path, ParquetReadOptions::default())
+            .await?;
+        // Sanity check we found the table by querying it's schema, it should not be empty
+        // Otherwise if the path is wrong the tests will all fail in confusing ways
+        let df = ctx.sql(&format!("SELECT * FROM {table} LIMIT 1")).await?;
+        assert!(
+            !df.schema().fields().is_empty(),
+            "TPC-H '{table}' table not registered correctly"
+        );
+    }
+    Ok(ctx)
+}
+
+/// Sorts record batches by all columns for deterministic comparison.
+///
+/// When comparing query results, we need a canonical ordering so that
+/// semantically equivalent results compare as equal. This function sorts
+/// by all columns in the schema to achieve that.
+async fn sort_batches(
+    ctx: &SessionContext,
+    batches: Vec<RecordBatch>,
+) -> Result<Vec<RecordBatch>> {
+    let mut df = ctx.read_batches(batches)?;
+    let schema = df.schema().as_arrow().clone();
+    let sort_exprs = schema
+        .fields()
+        .iter()
+        // Use Column directly, col() causes the column names to be normalized to lowercase
+        .map(|f| {
+            Expr::Column(Column::new_unqualified(f.name().to_string())).sort(true, false)
+        })
+        .collect_vec();
+    if !sort_exprs.is_empty() {
+        df = df.sort(sort_exprs)?;
+    }
+    df.collect().await
+}
+
+/// The outcome of running a single roundtrip test.
+///
+/// A successful test produces [`TestCaseResult::Success`].
+/// All other variants capture different failure modes with enough context to diagnose the issue.
+enum TestCaseResult {
+    /// The unparsed SQL produced identical results to the original.
+    Success,
+
+    /// Both queries executed but produced different results.
+    ///
+    /// This indicates a semantic bug in the unparser where the generated SQL
+    /// has different meaning than the original.
+    ResultsMismatch { original: String, unparsed: String },
+
+    /// The unparser failed to convert the logical plan to SQL.
+    ///
+    /// This may indicate an unsupported SQL feature or a bug in the unparser.
+    UnparseError { original: String, error: String },
+
+    /// The original SQL failed to execute.
+    ///
+    /// This indicates a problem with the test setup (missing tables,
+    /// invalid test data) rather than an unparser issue.
+    ExecutionError { original: String, error: String },
+
+    /// The unparsed SQL failed to execute, even though the original succeeded.
+    ///
+    /// This indicates the unparser generated syntactically invalid SQL or SQL
+    /// that references non-existent columns/tables.
+    UnparsedExecutionError {
+        original: String,
+        unparsed: String,
+        error: String,
+    },
+}
+
+impl TestCaseResult {
+    /// Returns true if the test case represents a failure
+    /// (anything other than [`TestCaseResult::Success`]).
+    fn is_failure(&self) -> bool {
+        !matches!(self, TestCaseResult::Success)
+    }
+
+    /// Formats a detailed error message for the test case into a string.
+    fn format_error(&self, name: &str) -> String {
+        match self {
+            TestCaseResult::Success => String::new(),
+            TestCaseResult::ResultsMismatch { original, unparsed } => {
+                format!(
+                    "Results mismatch for {name}.\nOriginal SQL:\n{original}\n\nUnparsed SQL:\n{unparsed}"
+                )
+            }
+            TestCaseResult::UnparseError { original, error } => {
+                format!("Unparse error for {name}: {error}\nOriginal SQL:\n{original}")
+            }
+            TestCaseResult::ExecutionError { original, error } => {
+                format!("Execution error for {name}: {error}\nOriginal SQL:\n{original}")
+            }
+            TestCaseResult::UnparsedExecutionError {
+                original,
+                unparsed,
+                error,
+            } => {
+                format!(
+                    "Unparsed execution error for {name}: {error}\nOriginal SQL:\n{original}\n\nUnparsed SQL:\n{unparsed}"
+                )
+            }
+        }
+    }
+}
+
+/// Executes a roundtrip test for a single SQL query.
+///
+/// This is the core test logic that:
+/// 1. Parses the original SQL and creates a logical plan
+/// 2. Unparses the logical plan back to SQL
+/// 3. Executes both the original and unparsed queries
+/// 4. Compares the results (sorting if the query has no ORDER BY)
+///
+/// This always uses [`DefaultDialect`] for unparsing.
+///
+/// # Arguments
+///
+/// * `ctx` - Session context with tables registered
+/// * `original` - The original SQL query to test
+///
+/// # Returns
+///
+/// A [`TestCaseResult`] indicating success or the specific failure mode.
+async fn collect_results(ctx: &SessionContext, original: &str) -> TestCaseResult {
+    let unparser = Unparser::new(&DefaultDialect {});
+
+    // Parse and create logical plan from original SQL
+    let df = match ctx.sql(original).await {
+        Ok(df) => df,
+        Err(e) => {
+            return TestCaseResult::ExecutionError {
+                original: original.to_string(),
+                error: e.to_string(),
+            };
+        }
+    };
+
+    // Unparse the logical plan back to SQL
+    let unparsed = match unparser.plan_to_sql(df.logical_plan()) {
+        Ok(sql) => format!("{sql:#}"),
+        Err(e) => {
+            return TestCaseResult::UnparseError {
+                original: original.to_string(),
+                error: e.to_string(),
+            };
+        }
+    };
+
+    let is_sorted = match ctx.state().create_physical_plan(df.logical_plan()).await {
+        Ok(plan) => plan.equivalence_properties().output_ordering().is_some(),
+        Err(e) => {
+            return TestCaseResult::ExecutionError {
+                original: original.to_string(),
+                error: e.to_string(),
+            };
+        }
+    };
+
+    // Collect results from original query
+    let mut expected = match df.collect().await {
+        Ok(batches) => batches,
+        Err(e) => {
+            return TestCaseResult::ExecutionError {
+                original: original.to_string(),
+                error: e.to_string(),
+            };
+        }
+    };
+
+    // Parse and execute the unparsed SQL
+    let actual_df = match ctx.sql(&unparsed).await {
+        Ok(df) => df,
+        Err(e) => {
+            return TestCaseResult::UnparsedExecutionError {
+                original: original.to_string(),
+                unparsed,
+                error: e.to_string(),
+            };
+        }
+    };
+
+    // Collect results from unparsed query
+    let mut actual = match actual_df.collect().await {
+        Ok(batches) => batches,
+        Err(e) => {
+            return TestCaseResult::UnparsedExecutionError {
+                original: original.to_string(),
+                unparsed,
+                error: e.to_string(),
+            };
+        }
+    };
+
+    // Sort if needed for comparison
+    if !is_sorted {
+        expected = match sort_batches(ctx, expected).await {
+            Ok(batches) => batches,
+            Err(e) => {
+                return TestCaseResult::ExecutionError {
+                    original: original.to_string(),
+                    error: format!("Failed to sort expected results: {e}"),
+                };
+            }
+        };
+        actual = match sort_batches(ctx, actual).await {
+            Ok(batches) => batches,
+            Err(e) => {
+                return TestCaseResult::UnparsedExecutionError {
+                    original: original.to_string(),
+                    unparsed,
+                    error: format!("Failed to sort actual results: {e}"),
+                };
+            }
+        };
+    }
+
+    if expected != actual {
+        TestCaseResult::ResultsMismatch {
+            original: original.to_string(),
+            unparsed,
+        }
+    } else {
+        TestCaseResult::Success
+    }
+}
+
+/// Runs roundtrip tests for a collection of queries and reports results.
+///
+/// Iterates through all queries, running each through [`collect_results`].
+/// Prints colored status (green checkmark for success, red X for failure)
+/// and panics at the end if any tests failed, with detailed error messages.
+///
+/// # Type Parameters
+///
+/// * `F` - Factory function that creates fresh session contexts
+/// * `Fut` - Future type returned by the context factory
+///
+/// # Panics
+///
+/// Panics if any query fails the roundtrip test, displaying all failures.
+async fn run_roundtrip_tests<F, Fut>(
+    suite_name: &str,
+    queries: Vec<TestQuery>,
+    create_context: F,
+) where
+    F: Fn() -> Fut,
+    Fut: Future<Output = Result<SessionContext>>,
+{
+    let mut errors: Vec<String> = vec![];
+    for sql in queries {
+        let ctx = match create_context().await {
+            Ok(ctx) => ctx,
+            Err(e) => {
+                println!("\x1b[31m✗\x1b[0m {} query: {}", suite_name, sql.name);
+                errors.push(format!("Failed to create context for {}: {}", sql.name, e));
+                continue;
+            }
+        };
+        let result = collect_results(&ctx, &sql.sql).await;
+        if result.is_failure() {
+            println!("\x1b[31m✗\x1b[0m {} query: {}", suite_name, sql.name);
+            errors.push(result.format_error(&sql.name));
+        } else {
+            println!("\x1b[32m✓\x1b[0m {} query: {}", suite_name, sql.name);
+        }
+    }
+    if !errors.is_empty() {
+        panic!(
+            "{} {} test(s) failed:\n\n{}",
+            errors.len(),
+            suite_name,
+            errors.join("\n\n---\n\n")
+        );
+    }
+}
+
+#[tokio::test]
+async fn test_clickbench_unparser_roundtrip() {
+    run_roundtrip_tests("Clickbench", clickbench_queries(), clickbench_test_context)
+        .await;
+}
+
+#[tokio::test]
+async fn test_tpch_unparser_roundtrip() {
+    run_roundtrip_tests("TPC-H", tpch_queries(), tpch_test_context).await;
+}
diff --git a/datafusion/core/tests/tpc-ds/30.sql b/datafusion/core/tests/tpc-ds/30.sql
index 78f34b807e5b5..80624f49006a9 100644
--- a/datafusion/core/tests/tpc-ds/30.sql
+++ b/datafusion/core/tests/tpc-ds/30.sql
@@ -14,7 +14,7 @@ with customer_total_return as
          ,ca_state)
   select  c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
        ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-       ,c_last_review_date_sk,ctr_total_return
+       ,c_last_review_date,ctr_total_return
  from customer_total_return ctr1
      ,customer_address
      ,customer
@@ -26,7 +26,7 @@ with customer_total_return as
        and ctr1.ctr_customer_sk = c_customer_sk
  order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
                   ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-                  ,c_last_review_date_sk,ctr_total_return
+                  ,c_last_review_date,ctr_total_return
 limit 100;
 
 
diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs
index 252d76d0f9d92..3ad74962bc2c0 100644
--- a/datafusion/core/tests/tpcds_planning.rs
+++ b/datafusion/core/tests/tpcds_planning.rs
@@ -1052,9 +1052,12 @@ async fn regression_test(query_no: u8, create_physical: bool) -> Result<()> {
     for sql in &sql {
         let df = ctx.sql(sql).await?;
         let (state, plan) = df.into_parts();
-        let plan = state.optimize(&plan)?;
         if create_physical {
             let _ = state.create_physical_plan(&plan).await?;
+        } else {
+            // Run the logical optimizer even if we are not creating the physical plan
+            // to ensure it will properly succeed
+            let _ = state.optimize(&plan)?;
         }
     }
 
diff --git a/datafusion/core/tests/tracing/asserting_tracer.rs b/datafusion/core/tests/tracing/asserting_tracer.rs
index 292e066e5f121..700f9f3308466 100644
--- a/datafusion/core/tests/tracing/asserting_tracer.rs
+++ b/datafusion/core/tests/tracing/asserting_tracer.rs
@@ -21,7 +21,7 @@ use std::ops::Deref;
 use std::sync::{Arc, LazyLock};
 
 use datafusion_common::{HashMap, HashSet};
-use datafusion_common_runtime::{set_join_set_tracer, JoinSetTracer};
+use datafusion_common_runtime::{JoinSetTracer, set_join_set_tracer};
 use futures::future::BoxFuture;
 use tokio::sync::{Mutex, MutexGuard};
 
diff --git a/datafusion/core/tests/tracing/traceable_object_store.rs b/datafusion/core/tests/tracing/traceable_object_store.rs
index 60ef1cc5d6b6a..00aa4ea3f36d9 100644
--- a/datafusion/core/tests/tracing/traceable_object_store.rs
+++ b/datafusion/core/tests/tracing/traceable_object_store.rs
@@ -20,8 +20,8 @@
 use crate::tracing::asserting_tracer::assert_traceability;
 use futures::stream::BoxStream;
 use object_store::{
-    path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
-    ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult,
+    GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    PutMultipartOptions, PutOptions, PutPayload, PutResult, path::Path,
 };
 use std::fmt::{Debug, Display, Formatter};
 use std::sync::Arc;
diff --git a/datafusion/core/tests/user_defined/expr_planner.rs b/datafusion/core/tests/user_defined/expr_planner.rs
index 07d289cab06c2..c5e5af731359f 100644
--- a/datafusion/core/tests/user_defined/expr_planner.rs
+++ b/datafusion/core/tests/user_defined/expr_planner.rs
@@ -26,9 +26,9 @@ use datafusion::logical_expr::Operator;
 use datafusion::prelude::*;
 use datafusion::sql::sqlparser::ast::BinaryOperator;
 use datafusion_common::ScalarValue;
+use datafusion_expr::BinaryExpr;
 use datafusion_expr::expr::Alias;
 use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
-use datafusion_expr::BinaryExpr;
 
 #[derive(Debug)]
 struct MyCustomPlanner;
@@ -77,25 +77,25 @@ async fn plan_and_collect(sql: &str) -> Result<Vec<RecordBatch>> {
 #[tokio::test]
 async fn test_custom_operators_arrow() {
     let actual = plan_and_collect("select 'foo'->'bar';").await.unwrap();
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r#"
     +----------------------------+
     | Utf8("foo") || Utf8("bar") |
     +----------------------------+
     | foobar                     |
     +----------------------------+
-    "###);
+    "#);
 }
 
 #[tokio::test]
 async fn test_custom_operators_long_arrow() {
     let actual = plan_and_collect("select 1->>2;").await.unwrap();
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +---------------------+
     | Int64(1) + Int64(2) |
     +---------------------+
     | 3                   |
     +---------------------+
-    "###);
+    ");
 }
 
 #[tokio::test]
@@ -103,13 +103,13 @@ async fn test_question_select() {
     let actual = plan_and_collect("select a ? 2 from (select 1 as a);")
         .await
         .unwrap();
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +--------------+
     | a ? Int64(2) |
     +--------------+
     | true         |
     +--------------+
-    "###);
+    ");
 }
 
 #[tokio::test]
@@ -117,11 +117,11 @@ async fn test_question_filter() {
     let actual = plan_and_collect("select a from (select 1 as a) where a ? 2;")
         .await
         .unwrap();
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +---+
     | a |
     +---+
     | 1 |
     +---+
-    "###);
+    ");
 }
diff --git a/datafusion/core/tests/user_defined/insert_operation.rs b/datafusion/core/tests/user_defined/insert_operation.rs
index e0a3e98604ae4..7ad00dece1b24 100644
--- a/datafusion/core/tests/user_defined/insert_operation.rs
+++ b/datafusion/core/tests/user_defined/insert_operation.rs
@@ -25,12 +25,12 @@ use datafusion::{
 };
 use datafusion_catalog::{Session, TableProvider};
 use datafusion_common::config::Dialect;
-use datafusion_expr::{dml::InsertOp, Expr, TableType};
+use datafusion_expr::{Expr, TableType, dml::InsertOp};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 use datafusion_physical_plan::execution_plan::SchedulingType;
 use datafusion_physical_plan::{
-    execution_plan::{Boundedness, EmissionType},
     DisplayAs, ExecutionPlan, PlanProperties,
+    execution_plan::{Boundedness, EmissionType},
 };
 
 #[tokio::test]
diff --git a/datafusion/core/tests/user_defined/mod.rs b/datafusion/core/tests/user_defined/mod.rs
index 5d84cdb692830..bc9949f5d681c 100644
--- a/datafusion/core/tests/user_defined/mod.rs
+++ b/datafusion/core/tests/user_defined/mod.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/// Tests for user defined Async Scalar functions
+mod user_defined_async_scalar_functions;
+
 /// Tests for user defined Scalar functions
 mod user_defined_scalar_functions;
 
@@ -33,5 +36,8 @@ mod user_defined_table_functions;
 /// Tests for Expression Planner
 mod expr_planner;
 
+/// Tests for Relation Planner extensions
+mod relation_planner;
+
 /// Tests for insert operations
 mod insert_operation;
diff --git a/datafusion/core/tests/user_defined/relation_planner.rs b/datafusion/core/tests/user_defined/relation_planner.rs
new file mode 100644
index 0000000000000..bda9b37ebea68
--- /dev/null
+++ b/datafusion/core/tests/user_defined/relation_planner.rs
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for the RelationPlanner extension point
+
+use std::sync::Arc;
+
+use arrow::array::{Int64Array, RecordBatch, StringArray};
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion::catalog::memory::MemTable;
+use datafusion::common::test_util::batches_to_string;
+use datafusion::prelude::*;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::Expr;
+use datafusion_expr::logical_plan::builder::LogicalPlanBuilder;
+use datafusion_expr::planner::{
+    PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
+};
+use datafusion_sql::sqlparser::ast::TableFactor;
+use insta::assert_snapshot;
+
+// ============================================================================
+// Test Planners - Example Implementations
+// ============================================================================
+
+// The planners in this section are deliberately minimal, static examples used
+// only for tests. In real applications a `RelationPlanner` would typically
+// construct richer logical plans tailored to external systems or custom
+// semantics rather than hard-coded in-memory tables.
+//
+// For more realistic examples, see `datafusion-examples/examples/relation_planner/`:
+// - `table_sample.rs`: Full TABLESAMPLE implementation (parsing → execution)
+// - `pivot_unpivot.rs`: PIVOT/UNPIVOT via SQL rewriting
+// - `match_recognize.rs`: MATCH_RECOGNIZE logical planning
+
+/// Helper to build simple static values-backed virtual tables used by the
+/// example planners below.
+fn plan_static_values_table(
+    relation: TableFactor,
+    table_name: &str,
+    column_name: &str,
+    values: Vec<ScalarValue>,
+) -> Result<RelationPlanning> {
+    match relation {
+        TableFactor::Table { name, alias, .. }
+            if name.to_string().eq_ignore_ascii_case(table_name) =>
+        {
+            let rows = values
+                .into_iter()
+                .map(|v| vec![Expr::Literal(v, None)])
+                .collect::<Vec<_>>();
+
+            let plan = LogicalPlanBuilder::values(rows)?
+                .project(vec![col("column1").alias(column_name)])?
+                .build()?;
+
+            Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+        }
+        other => Ok(RelationPlanning::Original(other)),
+    }
+}
+
+/// Example planner that provides a virtual `numbers` table with values
+/// 1, 2, 3.
+#[derive(Debug)]
+struct NumbersPlanner;
+
+impl RelationPlanner for NumbersPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        _context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        plan_static_values_table(
+            relation,
+            "numbers",
+            "number",
+            vec![
+                ScalarValue::Int64(Some(1)),
+                ScalarValue::Int64(Some(2)),
+                ScalarValue::Int64(Some(3)),
+            ],
+        )
+    }
+}
+
+/// Example planner that provides a virtual `colors` table with three string
+/// values: `red`, `green`, `blue`.
+#[derive(Debug)]
+struct ColorsPlanner;
+
+impl RelationPlanner for ColorsPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        _context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        plan_static_values_table(
+            relation,
+            "colors",
+            "color",
+            vec![
+                ScalarValue::Utf8(Some("red".into())),
+                ScalarValue::Utf8(Some("green".into())),
+                ScalarValue::Utf8(Some("blue".into())),
+            ],
+        )
+    }
+}
+
+/// Alternative implementation of `numbers` (returns 100, 200) used to
+/// demonstrate planner precedence (last registered planner wins).
+#[derive(Debug)]
+struct AlternativeNumbersPlanner;
+
+impl RelationPlanner for AlternativeNumbersPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        _context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        plan_static_values_table(
+            relation,
+            "numbers",
+            "number",
+            vec![ScalarValue::Int64(Some(100)), ScalarValue::Int64(Some(200))],
+        )
+    }
+}
+
+/// Example planner that intercepts nested joins and samples both sides (limit 2)
+/// before joining, demonstrating recursive planning with `context.plan()`.
+#[derive(Debug)]
+struct SamplingJoinPlanner;
+
+impl RelationPlanner for SamplingJoinPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        match relation {
+            TableFactor::NestedJoin {
+                table_with_joins,
+                alias,
+                ..
+            } if table_with_joins.joins.len() == 1 => {
+                // Use context.plan() to recursively plan both sides
+                // This ensures other planners (like NumbersPlanner) can handle them
+                let left = context.plan(table_with_joins.relation.clone())?;
+                let right = context.plan(table_with_joins.joins[0].relation.clone())?;
+
+                // Sample each table to 2 rows
+                let left_sampled =
+                    LogicalPlanBuilder::from(left).limit(0, Some(2))?.build()?;
+
+                let right_sampled =
+                    LogicalPlanBuilder::from(right).limit(0, Some(2))?.build()?;
+
+                // Cross join: 2 rows × 2 rows = 4 rows (instead of 3×3=9 without sampling)
+                let plan = LogicalPlanBuilder::from(left_sampled)
+                    .cross_join(right_sampled)?
+                    .build()?;
+
+                Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias)))
+            }
+            other => Ok(RelationPlanning::Original(other)),
+        }
+    }
+}
+
+/// Example planner that never handles any relation and always delegates by
+/// returning `RelationPlanning::Original`.
+#[derive(Debug)]
+struct PassThroughPlanner;
+
+impl RelationPlanner for PassThroughPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        _context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        // Never handles anything - always delegates
+        Ok(RelationPlanning::Original(relation))
+    }
+}
+
+/// Example planner that shows how planners can block specific constructs and
+/// surface custom error messages by rejecting `UNNEST` relations (here framed
+/// as a mock premium feature check).
+#[derive(Debug)]
+struct PremiumFeaturePlanner;
+
+impl RelationPlanner for PremiumFeaturePlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        _context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        match relation {
+            TableFactor::UNNEST { .. } => Err(datafusion_common::DataFusionError::Plan(
+                "UNNEST is a premium feature! Please upgrade to DataFusion Pro™ \
+                     to unlock advanced array operations."
+                    .to_string(),
+            )),
+            other => Ok(RelationPlanning::Original(other)),
+        }
+    }
+}
+
+// ============================================================================
+// Test Helpers - SQL Execution
+// ============================================================================
+
+/// Execute SQL and return results with better error messages.
+async fn execute_sql(ctx: &SessionContext, sql: &str) -> Result<Vec<RecordBatch>> {
+    let df = ctx.sql(sql).await?;
+    df.collect().await
+}
+
+/// Execute SQL and convert to string format for snapshot comparison.
+async fn execute_sql_to_string(ctx: &SessionContext, sql: &str) -> String {
+    let batches = execute_sql(ctx, sql)
+        .await
+        .expect("SQL execution should succeed");
+    batches_to_string(&batches)
+}
+
+// ============================================================================
+// Test Helpers - Context Builders
+// ============================================================================
+
+/// Create a SessionContext with a catalog table containing Int64 and Utf8 columns.
+///
+/// Creates a table with the specified name and sample data for fallback/integration tests.
+fn create_context_with_catalog_table(
+    table_name: &str,
+    id_values: Vec<i64>,
+    name_values: Vec<&str>,
+) -> SessionContext {
+    let ctx = SessionContext::new();
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("name", DataType::Utf8, false),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(Int64Array::from(id_values)),
+            Arc::new(StringArray::from(name_values)),
+        ],
+    )
+    .unwrap();
+
+    let table = MemTable::try_new(schema, vec![vec![batch]]).unwrap();
+    ctx.register_table(table_name, Arc::new(table)).unwrap();
+
+    ctx
+}
+
+/// Create a SessionContext with a simple single-column Int64 table.
+///
+/// Useful for basic tests that need a real catalog table.
+fn create_context_with_simple_table(
+    table_name: &str,
+    values: Vec<i64>,
+) -> SessionContext {
+    let ctx = SessionContext::new();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "value",
+        DataType::Int64,
+        true,
+    )]));
+
+    let batch =
+        RecordBatch::try_new(schema.clone(), vec![Arc::new(Int64Array::from(values))])
+            .unwrap();
+
+    let table = MemTable::try_new(schema, vec![vec![batch]]).unwrap();
+    ctx.register_table(table_name, Arc::new(table)).unwrap();
+
+    ctx
+}
+
+// ============================================================================
+// TESTS: Ordered from Basic to Complex
+// ============================================================================
+
+/// Comprehensive test suite for RelationPlanner extension point.
+/// Tests are ordered from simplest smoke test to most complex scenarios.
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Small extension trait to make test setup read fluently.
+    trait TestSessionExt {
+        fn with_planner<P: RelationPlanner + 'static>(self, planner: P) -> Self;
+    }
+
+    impl TestSessionExt for SessionContext {
+        fn with_planner<P: RelationPlanner + 'static>(self, planner: P) -> Self {
+            self.register_relation_planner(Arc::new(planner)).unwrap();
+            self
+        }
+    }
+
+    /// Session context with only the `NumbersPlanner` registered.
+    fn ctx_with_numbers() -> SessionContext {
+        SessionContext::new().with_planner(NumbersPlanner)
+    }
+
+    /// Session context with virtual tables (`numbers`, `colors`) and the
+    /// `SamplingJoinPlanner` registered for nested joins.
+    fn ctx_with_virtual_tables_and_sampling() -> SessionContext {
+        SessionContext::new()
+            .with_planner(NumbersPlanner)
+            .with_planner(ColorsPlanner)
+            .with_planner(SamplingJoinPlanner)
+    }
+
+    // Basic smoke test: virtual table can be queried like a regular table.
+    #[tokio::test]
+    async fn virtual_table_basic_select() {
+        let ctx = ctx_with_numbers();
+
+        let result = execute_sql_to_string(&ctx, "SELECT * FROM numbers").await;
+
+        assert_snapshot!(result, @r"
+        +--------+
+        | number |
+        +--------+
+        | 1      |
+        | 2      |
+        | 3      |
+        +--------+
+        ");
+    }
+
+    // Virtual table supports standard SQL operations (projection, filter, aggregation).
+    #[tokio::test]
+    async fn virtual_table_filters_and_aggregation() {
+        let ctx = ctx_with_numbers();
+
+        let filtered = execute_sql_to_string(
+            &ctx,
+            "SELECT number * 10 AS scaled FROM numbers WHERE number > 1",
+        )
+        .await;
+
+        assert_snapshot!(filtered, @r"
+        +--------+
+        | scaled |
+        +--------+
+        | 20     |
+        | 30     |
+        +--------+
+        ");
+
+        let aggregated = execute_sql_to_string(
+            &ctx,
+            "SELECT COUNT(*) as count, SUM(number) as total, AVG(number) as average \
+             FROM numbers",
+        )
+        .await;
+
+        assert_snapshot!(aggregated, @r"
+        +-------+-------+---------+
+        | count | total | average |
+        +-------+-------+---------+
+        | 3     | 6     | 2.0     |
+        +-------+-------+---------+
+        ");
+    }
+
+    // Multiple planners can coexist and each handles its own virtual table.
+    #[tokio::test]
+    async fn multiple_planners_virtual_tables() {
+        let ctx = SessionContext::new()
+            .with_planner(NumbersPlanner)
+            .with_planner(ColorsPlanner);
+
+        let result1 = execute_sql_to_string(&ctx, "SELECT * FROM numbers").await;
+        assert_snapshot!(result1, @r"
+        +--------+
+        | number |
+        +--------+
+        | 1      |
+        | 2      |
+        | 3      |
+        +--------+
+        ");
+
+        let result2 = execute_sql_to_string(&ctx, "SELECT * FROM colors").await;
+        assert_snapshot!(result2, @r"
+        +-------+
+        | color |
+        +-------+
+        | red   |
+        | green |
+        | blue  |
+        +-------+
+        ");
+    }
+
+    // Last registered planner for the same table name takes precedence (LIFO).
+    #[tokio::test]
+    async fn lifo_precedence_last_planner_wins() {
+        let ctx = SessionContext::new()
+            .with_planner(AlternativeNumbersPlanner)
+            .with_planner(NumbersPlanner);
+
+        let result = execute_sql_to_string(&ctx, "SELECT * FROM numbers").await;
+
+        // CustomValuesPlanner registered last, should win (returns 1,2,3 not 100,200)
+        assert_snapshot!(result, @r"
+        +--------+
+        | number |
+        +--------+
+        | 1      |
+        | 2      |
+        | 3      |
+        +--------+
+        ");
+    }
+
+    // Pass-through planner delegates to the catalog without changing behavior.
+    #[tokio::test]
+    async fn delegation_pass_through_to_catalog() {
+        let ctx = create_context_with_simple_table("real_table", vec![42])
+            .with_planner(PassThroughPlanner);
+
+        let result = execute_sql_to_string(&ctx, "SELECT * FROM real_table").await;
+
+        assert_snapshot!(result, @r"
+        +-------+
+        | value |
+        +-------+
+        | 42    |
+        +-------+
+        ");
+    }
+
+    // Catalog is used when no planner claims the relation.
+    #[tokio::test]
+    async fn catalog_fallback_when_no_planner() {
+        let ctx =
+            create_context_with_catalog_table("users", vec![1, 2], vec!["Alice", "Bob"])
+                .with_planner(NumbersPlanner);
+
+        let result = execute_sql_to_string(&ctx, "SELECT * FROM users ORDER BY id").await;
+
+        assert_snapshot!(result, @r"
+        +----+-------+
+        | id | name  |
+        +----+-------+
+        | 1  | Alice |
+        | 2  | Bob   |
+        +----+-------+
+        ");
+    }
+
+    // Planners can block specific constructs and surface custom error messages.
+    #[tokio::test]
+    async fn error_handling_premium_feature_blocking() {
+        // Verify UNNEST works without planner
+        let ctx_without_planner = SessionContext::new();
+        let result =
+            execute_sql(&ctx_without_planner, "SELECT * FROM UNNEST(ARRAY[1, 2, 3])")
+                .await
+                .expect("UNNEST should work by default");
+        assert_eq!(result.len(), 1);
+
+        // Same query with blocking planner registered
+        let ctx = SessionContext::new().with_planner(PremiumFeaturePlanner);
+
+        // Verify UNNEST is now rejected
+        let error = execute_sql(&ctx, "SELECT * FROM UNNEST(ARRAY[1, 2, 3])")
+            .await
+            .expect_err("UNNEST should be rejected");
+
+        let error_msg = error.to_string();
+        assert!(
+            error_msg.contains("premium feature") && error_msg.contains("DataFusion Pro"),
+            "Expected custom rejection message, got: {error_msg}"
+        );
+    }
+
+    // SamplingJoinPlanner recursively calls `context.plan()` on both sides of a
+    // nested join before sampling, exercising recursive relation planning.
+    #[tokio::test]
+    async fn recursive_planning_sampling_join() {
+        let ctx = ctx_with_virtual_tables_and_sampling();
+
+        let result =
+            execute_sql_to_string(&ctx, "SELECT * FROM (numbers JOIN colors ON true)")
+                .await;
+
+        // SamplingJoinPlanner limits each side to 2 rows: 2×2=4 (not 3×3=9)
+        assert_snapshot!(result, @r"
+        +--------+-------+
+        | number | color |
+        +--------+-------+
+        | 1      | red   |
+        | 1      | green |
+        | 2      | red   |
+        | 2      | green |
+        +--------+-------+
+        ");
+    }
+}
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 62e8ab18b9be0..e7bd2241398ad 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -23,13 +23,13 @@ use std::collections::HashMap;
 use std::hash::{Hash, Hasher};
 use std::mem::{size_of, size_of_val};
 use std::sync::{
-    atomic::{AtomicBool, Ordering},
     Arc,
+    atomic::{AtomicBool, Ordering},
 };
 
 use arrow::array::{
-    record_batch, types::UInt64Type, Array, AsArray, Int32Array, PrimitiveArray,
-    StringArray, StructArray, UInt64Array,
+    Array, AsArray, Int32Array, PrimitiveArray, StringArray, StructArray, UInt64Array,
+    record_batch, types::UInt64Type,
 };
 use arrow::datatypes::{Fields, Schema};
 use arrow_schema::FieldRef;
@@ -56,8 +56,8 @@ use datafusion_common::{cast::as_primitive_array, exec_err};
 
 use datafusion_expr::expr::WindowFunction;
 use datafusion_expr::{
-    col, create_udaf, function::AccumulatorArgs, AggregateUDFImpl, Expr,
-    GroupsAccumulator, LogicalPlanBuilder, SimpleAggregateUDF, WindowFunctionDefinition,
+    AggregateUDFImpl, Expr, GroupsAccumulator, LogicalPlanBuilder, SimpleAggregateUDF,
+    WindowFunctionDefinition, col, create_udaf, function::AccumulatorArgs,
 };
 use datafusion_functions_aggregate::average::AvgAccumulator;
 
@@ -69,7 +69,7 @@ async fn test_setup() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +-------+----------------------------+
     | value | time                       |
     +-------+----------------------------+
@@ -79,7 +79,7 @@ async fn test_setup() {
     | 5.0   | 1970-01-01T00:00:00.000005 |
     | 5.0   | 1970-01-01T00:00:00.000005 |
     +-------+----------------------------+
-    "###);
+    ");
 }
 
 /// Basic user defined aggregate
@@ -91,13 +91,13 @@ async fn test_udaf() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +----------------------------+
     | time_sum(t.time)           |
     +----------------------------+
     | 1970-01-01T00:00:00.000019 |
     +----------------------------+
-    "###);
+    ");
 
     // normal aggregates call update_batch
     assert!(test_state.update_batch());
@@ -112,7 +112,7 @@ async fn test_udaf_as_window() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +----------------------------+
     | time_sum                   |
     +----------------------------+
@@ -122,7 +122,7 @@ async fn test_udaf_as_window() {
     | 1970-01-01T00:00:00.000019 |
     | 1970-01-01T00:00:00.000019 |
     +----------------------------+
-    "###);
+    ");
 
     // aggregate over the entire window function call update_batch
     assert!(test_state.update_batch());
@@ -137,7 +137,7 @@ async fn test_udaf_as_window_with_frame() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +----------------------------+
     | time_sum                   |
     +----------------------------+
@@ -147,7 +147,7 @@ async fn test_udaf_as_window_with_frame() {
     | 1970-01-01T00:00:00.000014 |
     | 1970-01-01T00:00:00.000010 |
     +----------------------------+
-    "###);
+    ");
 
     // user defined aggregates with window frame should be calling retract batch
     assert!(test_state.update_batch());
@@ -164,7 +164,10 @@ async fn test_udaf_as_window_with_frame_without_retract_batch() {
     let sql = "SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as time_sum from t";
     // Note if this query ever does start working
     let err = execute(&ctx, sql).await.unwrap_err();
-    assert_contains!(err.to_string(), "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: time_sum(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING");
+    assert_contains!(
+        err.to_string(),
+        "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: time_sum(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING"
+    );
 }
 
 /// Basic query for with a udaf returning a structure
@@ -175,13 +178,13 @@ async fn test_udaf_returning_struct() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +------------------------------------------------+
     | first(t.value,t.time)                          |
     +------------------------------------------------+
     | {value: 2.0, time: 1970-01-01T00:00:00.000002} |
     +------------------------------------------------+
-    "###);
+    ");
 }
 
 /// Demonstrate extracting the fields from a structure using a subquery
@@ -192,13 +195,13 @@ async fn test_udaf_returning_struct_subquery() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +-----------------+----------------------------+
     | sq.first[value] | sq.first[time]             |
     +-----------------+----------------------------+
     | 2.0             | 1970-01-01T00:00:00.000002 |
     +-----------------+----------------------------+
-    "###);
+    ");
 }
 
 #[tokio::test]
@@ -212,13 +215,13 @@ async fn test_udaf_shadows_builtin_fn() {
     // compute with builtin `sum` aggregator
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r#"
     +---------------------------------------+
     | sum(arrow_cast(t.time,Utf8("Int64"))) |
     +---------------------------------------+
     | 19000                                 |
     +---------------------------------------+
-    "###);
+    "#);
 
     // Register `TimeSum` with name `sum`. This will shadow the builtin one
     TimeSum::register(&mut ctx, test_state.clone(), "sum");
@@ -226,13 +229,13 @@ async fn test_udaf_shadows_builtin_fn() {
 
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +----------------------------+
     | sum(t.time)                |
     +----------------------------+
     | 1970-01-01T00:00:00.000019 |
     +----------------------------+
-    "###);
+    ");
 }
 
 async fn execute(ctx: &SessionContext, sql: &str) -> Result<Vec<RecordBatch>> {
@@ -272,13 +275,13 @@ async fn simple_udaf() -> Result<()> {
 
     let result = ctx.sql("SELECT MY_AVG(a) FROM t").await?.collect().await?;
 
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +-------------+
     | my_avg(t.a) |
     +-------------+
     | 3.0         |
     +-------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -329,9 +332,10 @@ async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
 
     // doesn't work as it was registered as non lowercase
     let err = ctx.sql("SELECT MY_AVG(i) FROM t").await.unwrap_err();
-    assert!(err
-        .to_string()
-        .contains("Error during planning: Invalid function \'my_avg\'"));
+    assert!(
+        err.to_string()
+            .contains("Error during planning: Invalid function \'my_avg\'")
+    );
 
     // Can call it if you put quotes
     let result = ctx
@@ -340,13 +344,13 @@ async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
         .collect()
         .await?;
 
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +-------------+
     | MY_AVG(t.i) |
     +-------------+
     | 1.0         |
     +-------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -372,13 +376,13 @@ async fn test_user_defined_functions_with_alias() -> Result<()> {
 
     let result = plan_and_collect(&ctx, "SELECT dummy(i) FROM t").await?;
 
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +------------+
     | dummy(t.i) |
     +------------+
     | 1.0        |
     +------------+
-    "###);
+    ");
 
     let alias_result = plan_and_collect(&ctx, "SELECT dummy_alias(i) FROM t").await?;
 
@@ -449,13 +453,13 @@ async fn test_parameterized_aggregate_udf() -> Result<()> {
 
     let actual = DataFrame::new(ctx.state(), plan).collect().await?;
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +------+---+---+
     | text | a | b |
     +------+---+---+
     | foo  | 1 | 2 |
     +------+---+---+
-    "###);
+    ");
 
     ctx.deregister_table("t")?;
     Ok(())
@@ -569,6 +573,7 @@ impl TimeSum {
         Self { sum: 0, test_state }
     }
 
+    #[expect(clippy::needless_pass_by_value)]
     fn register(ctx: &mut SessionContext, test_state: Arc<TestState>, name: &str) {
         let timestamp_type = DataType::Timestamp(TimeUnit::Nanosecond, None);
         let input_type = vec![timestamp_type.clone()];
@@ -760,11 +765,11 @@ impl Accumulator for FirstSelector {
 
         // Update the actual values
         for (value, time) in v.iter().zip(t.iter()) {
-            if let (Some(time), Some(value)) = (time, value) {
-                if time < self.time {
-                    self.value = value;
-                    self.time = time;
-                }
+            if let (Some(time), Some(value)) = (time, value)
+                && time < self.time
+            {
+                self.value = value;
+                self.time = time;
             }
         }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs
new file mode 100644
index 0000000000000..168d81fc6b44c
--- /dev/null
+++ b/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{Int32Array, RecordBatch, StringArray};
+use arrow::datatypes::{DataType, Field, Schema};
+use async_trait::async_trait;
+use datafusion::prelude::*;
+use datafusion_common::{Result, assert_batches_eq};
+use datafusion_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+
+// This test checks the case where batch_size doesn't evenly divide
+// the number of rows.
+#[tokio::test]
+async fn test_async_udf_with_non_modular_batch_size() -> Result<()> {
+    let num_rows = 3;
+    let batch_size = 2;
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("prompt", DataType::Utf8, false),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(Int32Array::from((0..num_rows).collect::<Vec<i32>>())),
+            Arc::new(StringArray::from(
+                (0..num_rows)
+                    .map(|i| format!("prompt{i}"))
+                    .collect::<Vec<_>>(),
+            )),
+        ],
+    )?;
+
+    let ctx = SessionContext::new();
+    ctx.register_batch("test_table", batch)?;
+
+    ctx.register_udf(
+        AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl::new(batch_size)))
+            .into_scalar_udf(),
+    );
+
+    let df = ctx
+        .sql("SELECT id, test_async_udf(prompt) as result FROM test_table")
+        .await?;
+
+    let result = df.collect().await?;
+
+    assert_batches_eq!(
+        &[
+            "+----+---------+",
+            "| id | result  |",
+            "+----+---------+",
+            "| 0  | prompt0 |",
+            "| 1  | prompt1 |",
+            "| 2  | prompt2 |",
+            "+----+---------+"
+        ],
+        &result
+    );
+
+    Ok(())
+}
+
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+struct TestAsyncUDFImpl {
+    batch_size: usize,
+    signature: Signature,
+}
+
+impl TestAsyncUDFImpl {
+    fn new(batch_size: usize) -> Self {
+        Self {
+            batch_size,
+            signature: Signature::exact(vec![DataType::Utf8], Volatility::Volatile),
+        }
+    }
+}
+
+impl ScalarUDFImpl for TestAsyncUDFImpl {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "test_async_udf"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        panic!("Call invoke_async_with_args instead")
+    }
+}
+
+#[async_trait]
+impl AsyncScalarUDFImpl for TestAsyncUDFImpl {
+    fn ideal_batch_size(&self) -> Option<usize> {
+        Some(self.batch_size)
+    }
+    async fn invoke_async_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> Result<ColumnarValue> {
+        let arg1 = &args.args[0];
+        let results = call_external_service(arg1.clone()).await?;
+        Ok(results)
+    }
+}
+
+/// Simulates calling an async external service
+async fn call_external_service(arg1: ColumnarValue) -> Result<ColumnarValue> {
+    Ok(arg1)
+}
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index ffe0ba021edb3..d53e076739608 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -70,7 +70,7 @@ use arrow::{
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::{
     common::cast::as_int64_array,
-    common::{arrow_datafusion_err, internal_err, DFSchemaRef},
+    common::{DFSchemaRef, arrow_datafusion_err},
     error::{DataFusionError, Result},
     execution::{
         context::{QueryPlanner, SessionState, TaskContext},
@@ -91,10 +91,10 @@ use datafusion::{
 };
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::ScalarValue;
+use datafusion_common::{ScalarValue, assert_eq_or_internal_err, assert_or_internal_err};
 use datafusion_expr::{FetchType, InvariantLevel, Projection, SortExpr};
-use datafusion_optimizer::optimizer::ApplyOrder;
 use datafusion_optimizer::AnalyzerRule;
+use datafusion_optimizer::optimizer::ApplyOrder;
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 
 use async_trait::async_trait;
@@ -161,7 +161,7 @@ async fn run_and_compare_query(ctx: SessionContext, description: &str) -> Result
         insta::with_settings!({
             description => description,
         }, {
-            insta::assert_snapshot!(actual, @r###"
+            insta::assert_snapshot!(actual, @r"
             +-------------+---------+
             | customer_id | revenue |
             +-------------+---------+
@@ -169,7 +169,7 @@ async fn run_and_compare_query(ctx: SessionContext, description: &str) -> Result
             | jorge       | 200     |
             | andy        | 150     |
             +-------------+---------+
-        "###);
+            ");
         });
     }
 
@@ -188,13 +188,13 @@ async fn run_and_compare_query_with_analyzer_rule(
     insta::with_settings!({
         description => description,
     }, {
-        insta::assert_snapshot!(actual, @r###"
+        insta::assert_snapshot!(actual, @r"
         +------------+--------------------------+
         | UInt64(42) | arrow_typeof(UInt64(42)) |
         +------------+--------------------------+
         | 42         | UInt64                   |
         +------------+--------------------------+
-        "###);
+        ");
     });
 
     Ok(())
@@ -212,7 +212,7 @@ async fn run_and_compare_query_with_auto_schemas(
     insta::with_settings!({
             description => description,
         }, {
-            insta::assert_snapshot!(actual, @r###"
+            insta::assert_snapshot!(actual, @r"
             +----------+----------+
             | column_1 | column_2 |
             +----------+----------+
@@ -220,7 +220,7 @@ async fn run_and_compare_query_with_auto_schemas(
             | jorge    | 200      |
             | andy     | 150      |
             +----------+----------+
-        "###);
+            ");
     });
 
     Ok(())
@@ -433,21 +433,21 @@ impl OptimizerRule for OptimizerMakeExtensionNodeInvalid {
         plan: LogicalPlan,
         _config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>, DataFusionError> {
-        if let LogicalPlan::Extension(Extension { node }) = &plan {
-            if let Some(prev) = node.as_any().downcast_ref::<TopKPlanNode>() {
-                return Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-                    node: Arc::new(TopKPlanNode {
-                        k: prev.k,
-                        input: prev.input.clone(),
-                        expr: prev.expr.clone(),
-                        // In a real use case, this rewriter could have change the number of inputs, etc
-                        invariant_mock: Some(InvariantMock {
-                            should_fail_invariant: true,
-                            kind: InvariantLevel::Always,
-                        }),
+        if let LogicalPlan::Extension(Extension { node }) = &plan
+            && let Some(prev) = node.as_any().downcast_ref::<TopKPlanNode>()
+        {
+            return Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+                node: Arc::new(TopKPlanNode {
+                    k: prev.k,
+                    input: prev.input.clone(),
+                    expr: prev.expr.clone(),
+                    // In a real use case, this rewriter could have change the number of inputs, etc
+                    invariant_mock: Some(InvariantMock {
+                        should_fail_invariant: true,
+                        kind: InvariantLevel::Always,
                     }),
-                })));
-            }
+                }),
+            })));
         };
 
         Ok(Transformed::no(plan))
@@ -515,23 +515,18 @@ impl OptimizerRule for TopKOptimizerRule {
             return Ok(Transformed::no(plan));
         };
 
-        if let LogicalPlan::Sort(Sort {
-            ref expr,
-            ref input,
-            ..
-        }) = limit.input.as_ref()
+        if let LogicalPlan::Sort(Sort { expr, input, .. }) = limit.input.as_ref()
+            && expr.len() == 1
         {
-            if expr.len() == 1 {
-                // we found a sort with a single sort expr, replace with a a TopK
-                return Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-                    node: Arc::new(TopKPlanNode {
-                        k: fetch,
-                        input: input.as_ref().clone(),
-                        expr: expr[0].clone(),
-                        invariant_mock: self.invariant_mock.clone(),
-                    }),
-                })));
-            }
+            // we found a sort with a single sort expr, replace with a a TopK
+            return Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+                node: Arc::new(TopKPlanNode {
+                    k: fetch,
+                    input: input.as_ref().clone(),
+                    expr: expr[0].clone(),
+                    invariant_mock: self.invariant_mock.clone(),
+                }),
+            })));
         }
 
         Ok(Transformed::no(plan))
@@ -585,9 +580,10 @@ impl UserDefinedLogicalNodeCore for TopKPlanNode {
             kind,
         }) = self.invariant_mock.clone()
         {
-            if should_fail_invariant && check == kind {
-                return internal_err!("node fails check, such as improper inputs");
-            }
+            assert_or_internal_err!(
+                !(should_fail_invariant && check == kind),
+                "node fails check, such as improper inputs"
+            );
         }
         Ok(())
     }
@@ -733,9 +729,11 @@ impl ExecutionPlan for TopKExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return internal_err!("TopKExec invalid partition {partition}");
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "TopKExec invalid partition {partition}"
+        );
 
         Ok(Box::pin(TopKReader {
             input: self.input.execute(partition, context)?,
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 3ca8f846aa5e5..b86cd94a8a9b7 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -20,11 +20,11 @@ use std::collections::HashMap;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use arrow::array::{as_string_array, create_array, record_batch, Int8Array, UInt64Array};
 use arrow::array::{
-    builder::BooleanBuilder, cast::AsArray, Array, ArrayRef, Float32Array, Float64Array,
-    Int32Array, RecordBatch, StringArray,
+    Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray,
+    builder::BooleanBuilder, cast::AsArray,
 };
+use arrow::array::{Int8Array, UInt64Array, as_string_array, create_array, record_batch};
 use arrow::compute::kernels::numeric::add;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow_schema::extension::{Bool8, CanonicalExtensionType, ExtensionType};
@@ -38,15 +38,17 @@ use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{
-    assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err,
-    exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
+    DFSchema, DataFusionError, Result, ScalarValue, assert_batches_eq,
+    assert_batches_sorted_eq, assert_contains, exec_datafusion_err, exec_err,
+    not_impl_err, plan_err,
 };
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,
-    LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs,
-    ScalarUDF, ScalarUDFImpl, Signature, Volatility,
+    Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, LogicalPlanBuilder,
+    OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
+    Signature, Volatility, lit_with_metadata,
 };
+use datafusion_expr_common::signature::TypeSignature;
 use datafusion_functions_nested::range::range_udf;
 use parking_lot::Mutex;
 use regex::Regex;
@@ -63,13 +65,13 @@ async fn csv_query_custom_udf_with_cast() -> Result<()> {
     let sql = "SELECT avg(custom_sqrt(c11)) FROM aggregate_test_100";
     let actual = plan_and_collect(&ctx, sql).await?;
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +------------------------------------------+
     | avg(custom_sqrt(aggregate_test_100.c11)) |
     +------------------------------------------+
     | 0.6584408483418835                       |
     +------------------------------------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -82,13 +84,13 @@ async fn csv_query_avg_sqrt() -> Result<()> {
     let sql = "SELECT avg(custom_sqrt(c12)) FROM aggregate_test_100";
     let actual = plan_and_collect(&ctx, sql).await?;
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
     +------------------------------------------+
     | avg(custom_sqrt(aggregate_test_100.c12)) |
     +------------------------------------------+
     | 0.6706002946036459                       |
     +------------------------------------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -153,7 +155,7 @@ async fn scalar_udf() -> Result<()> {
 
     let result = DataFrame::new(ctx.state(), plan).collect().await?;
 
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +-----+-----+-----------------+
     | a   | b   | my_add(t.a,t.b) |
     +-----+-----+-----------------+
@@ -162,7 +164,7 @@ async fn scalar_udf() -> Result<()> {
     | 10  | 12  | 22              |
     | 100 | 120 | 220             |
     +-----+-----+-----------------+
-    "###);
+    ");
 
     let batch = &result[0];
     let a = as_int32_array(batch.column(0))?;
@@ -279,7 +281,7 @@ async fn scalar_udf_zero_params() -> Result<()> {
     ctx.register_udf(ScalarUDF::from(get_100_udf));
 
     let result = plan_and_collect(&ctx, "select get_100() a from t").await?;
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +-----+
     | a   |
     +-----+
@@ -288,22 +290,22 @@ async fn scalar_udf_zero_params() -> Result<()> {
     | 100 |
     | 100 |
     +-----+
-    "###);
+    ");
 
     let result = plan_and_collect(&ctx, "select get_100() a").await?;
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +-----+
     | a   |
     +-----+
     | 100 |
     +-----+
-    "###);
+    ");
 
     let result = plan_and_collect(&ctx, "select get_100() from t where a=999").await?;
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     ++
     ++
-    "###);
+    ");
 
     Ok(())
 }
@@ -330,13 +332,13 @@ async fn scalar_udf_override_built_in_scalar_function() -> Result<()> {
 
     // Make sure that the UDF is used instead of the built-in function
     let result = plan_and_collect(&ctx, "select abs(a) a from t").await?;
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +---+
     | a |
     +---+
     | 1 |
     +---+
-    "###);
+    ");
 
     Ok(())
 }
@@ -425,20 +427,21 @@ async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> {
     let err = plan_and_collect(&ctx, "SELECT MY_FUNC(i) FROM t")
         .await
         .unwrap_err();
-    assert!(err
-        .to_string()
-        .contains("Error during planning: Invalid function \'my_func\'"));
+    assert!(
+        err.to_string()
+            .contains("Error during planning: Invalid function \'my_func\'")
+    );
 
     // Can call it if you put quotes
     let result = plan_and_collect(&ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?;
 
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +--------------+
     | MY_FUNC(t.i) |
     +--------------+
     | 1            |
     +--------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -469,13 +472,13 @@ async fn test_user_defined_functions_with_alias() -> Result<()> {
     ctx.register_udf(udf);
 
     let result = plan_and_collect(&ctx, "SELECT dummy(i) FROM t").await?;
-    insta::assert_snapshot!(batches_to_string(&result), @r###"
+    insta::assert_snapshot!(batches_to_string(&result), @r"
     +------------+
     | dummy(t.i) |
     +------------+
     | 1          |
     +------------+
-    "###);
+    ");
 
     let alias_result = plan_and_collect(&ctx, "SELECT dummy_alias(i) FROM t").await?;
     insta::assert_snapshot!(batches_to_string(&alias_result), @r"
@@ -945,6 +948,7 @@ struct ScalarFunctionWrapper {
     expr: Expr,
     signature: Signature,
     return_type: DataType,
+    defaults: Vec<Option<Expr>>,
 }
 
 impl ScalarUDFImpl for ScalarFunctionWrapper {
@@ -973,7 +977,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
         args: Vec<Expr>,
         _info: &dyn SimplifyInfo,
     ) -> Result<ExprSimplifyResult> {
-        let replacement = Self::replacement(&self.expr, &args)?;
+        let replacement = Self::replacement(&self.expr, &args, &self.defaults)?;
 
         Ok(ExprSimplifyResult::Simplified(replacement))
     }
@@ -981,7 +985,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
 
 impl ScalarFunctionWrapper {
     // replaces placeholders with actual arguments
-    fn replacement(expr: &Expr, args: &[Expr]) -> Result<Expr> {
+    fn replacement(
+        expr: &Expr,
+        args: &[Expr],
+        defaults: &[Option<Expr>],
+    ) -> Result<Expr> {
         let result = expr.clone().transform(|e| {
             let r = match e {
                 Expr::Placeholder(placeholder) => {
@@ -989,11 +997,19 @@ impl ScalarFunctionWrapper {
                         Self::parse_placeholder_identifier(&placeholder.id)?;
                     if placeholder_position < args.len() {
                         Transformed::yes(args[placeholder_position].clone())
-                    } else {
+                    } else if placeholder_position >= defaults.len() {
                         exec_err!(
-                            "Function argument {} not provided, argument missing!",
+                            "Invalid placeholder, out of range: {}",
                             placeholder.id
                         )?
+                    } else {
+                        match defaults[placeholder_position] {
+                            Some(ref default) => Transformed::yes(default.clone()),
+                            None => exec_err!(
+                                "Function argument {} not provided, argument missing!",
+                                placeholder.id
+                            )?,
+                        }
                     }
                 }
                 _ => Transformed::no(e),
@@ -1021,6 +1037,32 @@ impl TryFrom<CreateFunction> for ScalarFunctionWrapper {
     type Error = DataFusionError;
 
     fn try_from(definition: CreateFunction) -> std::result::Result<Self, Self::Error> {
+        let args = definition.args.unwrap_or_default();
+        let defaults: Vec<Option<Expr>> =
+            args.iter().map(|a| a.default_expr.clone()).collect();
+        let signature: Signature = match defaults.iter().position(|v| v.is_some()) {
+            Some(pos) => {
+                let mut type_signatures: Vec<TypeSignature> = vec![];
+                // Generate all valid signatures
+                for n in pos..defaults.len() + 1 {
+                    if n == 0 {
+                        type_signatures.push(TypeSignature::Nullary)
+                    } else {
+                        type_signatures.push(TypeSignature::Exact(
+                            args.iter().take(n).map(|a| a.data_type.clone()).collect(),
+                        ))
+                    }
+                }
+                Signature::one_of(
+                    type_signatures,
+                    definition.params.behavior.unwrap_or(Volatility::Volatile),
+                )
+            }
+            None => Signature::exact(
+                args.iter().map(|a| a.data_type.clone()).collect(),
+                definition.params.behavior.unwrap_or(Volatility::Volatile),
+            ),
+        };
         Ok(Self {
             name: definition.name,
             expr: definition
@@ -1030,15 +1072,8 @@ impl TryFrom<CreateFunction> for ScalarFunctionWrapper {
             return_type: definition
                 .return_type
                 .expect("Return type has to be defined!"),
-            signature: Signature::exact(
-                definition
-                    .args
-                    .unwrap_or_default()
-                    .into_iter()
-                    .map(|a| a.data_type)
-                    .collect(),
-                definition.params.behavior.unwrap_or(Volatility::Volatile),
-            ),
+            signature,
+            defaults,
         })
     }
 }
@@ -1061,10 +1096,11 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> {
     // Create the `better_add` function dynamically via CREATE FUNCTION statement
     assert!(ctx.sql(sql).await.is_ok());
     // try to `drop function` when sql options have allow ddl disabled
-    assert!(ctx
-        .sql_with_options("drop function better_add", options)
-        .await
-        .is_err());
+    assert!(
+        ctx.sql_with_options("drop function better_add", options)
+            .await
+            .is_err()
+    );
 
     let result = ctx
         .sql("select better_add(2.0, 2.0)")
@@ -1109,6 +1145,180 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> {
     "#;
     assert!(ctx.sql(bad_definition_sql).await.is_err());
 
+    // FIXME: Definitions with invalid placeholders are allowed, fail at runtime
+    let bad_expression_sql = r#"
+    CREATE FUNCTION better_add(DOUBLE, DOUBLE)
+        RETURNS DOUBLE
+        RETURN $1 + $3
+    "#;
+    assert!(ctx.sql(bad_expression_sql).await.is_ok());
+
+    let err = ctx
+        .sql("select better_add(2.0, 2.0)")
+        .await?
+        .collect()
+        .await
+        .expect_err("unknown placeholder");
+    let expected = "Optimizer rule 'simplify_expressions' failed\ncaused by\nExecution error: Invalid placeholder, out of range: $3";
+    assert!(expected.starts_with(&err.strip_backtrace()));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn create_scalar_function_from_sql_statement_named_arguments() -> Result<()> {
+    let function_factory = Arc::new(CustomFunctionFactory::default());
+    let ctx = SessionContext::new().with_function_factory(function_factory.clone());
+
+    let sql = r#"
+    CREATE FUNCTION better_add(a DOUBLE, b DOUBLE)
+        RETURNS DOUBLE
+        RETURN $a + $b
+    "#;
+
+    assert!(ctx.sql(sql).await.is_ok());
+
+    let result = ctx
+        .sql("select better_add(2.0, 2.0)")
+        .await?
+        .collect()
+        .await?;
+
+    assert_batches_eq!(
+        &[
+            "+-----------------------------------+",
+            "| better_add(Float64(2),Float64(2)) |",
+            "+-----------------------------------+",
+            "| 4.0                               |",
+            "+-----------------------------------+",
+        ],
+        &result
+    );
+
+    // cannot mix named and positional style
+    let bad_expression_sql = r#"
+    CREATE FUNCTION bad_expression_fun(DOUBLE, b DOUBLE)
+        RETURNS DOUBLE
+        RETURN $1 + $b
+    "#;
+    let err = ctx
+        .sql(bad_expression_sql)
+        .await
+        .expect_err("cannot mix named and positional style");
+    let expected = "Error during planning: All function arguments must use either named or positional style.";
+    assert!(expected.starts_with(&err.strip_backtrace()));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn create_scalar_function_from_sql_statement_default_arguments() -> Result<()> {
+    let function_factory = Arc::new(CustomFunctionFactory::default());
+    let ctx = SessionContext::new().with_function_factory(function_factory.clone());
+
+    let sql = r#"
+    CREATE FUNCTION better_add(a DOUBLE = 2.0, b DOUBLE = 2.0)
+        RETURNS DOUBLE
+        RETURN $a + $b
+    "#;
+
+    assert!(ctx.sql(sql).await.is_ok());
+
+    // Check all function arity supported
+    let result = ctx.sql("select better_add()").await?.collect().await?;
+
+    assert_batches_eq!(
+        &[
+            "+--------------+",
+            "| better_add() |",
+            "+--------------+",
+            "| 4.0          |",
+            "+--------------+",
+        ],
+        &result
+    );
+
+    let result = ctx.sql("select better_add(2.0)").await?.collect().await?;
+
+    assert_batches_eq!(
+        &[
+            "+------------------------+",
+            "| better_add(Float64(2)) |",
+            "+------------------------+",
+            "| 4.0                    |",
+            "+------------------------+",
+        ],
+        &result
+    );
+
+    let result = ctx
+        .sql("select better_add(2.0, 2.0)")
+        .await?
+        .collect()
+        .await?;
+
+    assert_batches_eq!(
+        &[
+            "+-----------------------------------+",
+            "| better_add(Float64(2),Float64(2)) |",
+            "+-----------------------------------+",
+            "| 4.0                               |",
+            "+-----------------------------------+",
+        ],
+        &result
+    );
+
+    assert!(ctx.sql("select better_add(2.0, 2.0, 2.0)").await.is_err());
+    assert!(ctx.sql("drop function better_add").await.is_ok());
+
+    // works with positional style
+    let sql = r#"
+    CREATE FUNCTION better_add(DOUBLE, DOUBLE = 2.0)
+        RETURNS DOUBLE
+        RETURN $1 + $2
+    "#;
+    assert!(ctx.sql(sql).await.is_ok());
+
+    assert!(ctx.sql("select better_add()").await.is_err());
+    let result = ctx.sql("select better_add(2.0)").await?.collect().await?;
+    assert_batches_eq!(
+        &[
+            "+------------------------+",
+            "| better_add(Float64(2)) |",
+            "+------------------------+",
+            "| 4.0                    |",
+            "+------------------------+",
+        ],
+        &result
+    );
+
+    // non-default argument cannot follow default argument
+    let bad_expression_sql = r#"
+    CREATE FUNCTION bad_expression_fun(a DOUBLE = 2.0, b DOUBLE)
+        RETURNS DOUBLE
+        RETURN $a + $b
+    "#;
+    let err = ctx
+        .sql(bad_expression_sql)
+        .await
+        .expect_err("non-default argument cannot follow default argument");
+    let expected =
+        "Error during planning: Non-default arguments cannot follow default arguments.";
+    assert!(expected.starts_with(&err.strip_backtrace()));
+
+    // FIXME: The `DEFAULT` syntax does not work with positional params
+    let bad_expression_sql = r#"
+    CREATE FUNCTION bad_expression_fun(DOUBLE, DOUBLE DEFAULT 2.0)
+        RETURNS DOUBLE
+        RETURN $1 + $2
+    "#;
+    let err = ctx
+        .sql(bad_expression_sql)
+        .await
+        .expect_err("sqlparser error");
+    let expected =
+        "SQL error: ParserError(\"Expected: ), found: 2.0 at Line: 2, Column: 63\")";
+    assert!(expected.starts_with(&err.strip_backtrace()));
     Ok(())
 }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_table_functions.rs b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
index 2c6611f382cea..8be8609c62480 100644
--- a/datafusion/core/tests/user_defined/user_defined_table_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
@@ -21,17 +21,17 @@ use std::path::Path;
 use std::sync::Arc;
 
 use arrow::array::Int64Array;
-use arrow::csv::reader::Format;
 use arrow::csv::ReaderBuilder;
+use arrow::csv::reader::Format;
 
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::common::test_util::batches_to_string;
-use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::TableProvider;
+use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::error::Result;
 use datafusion::execution::TaskContext;
-use datafusion::physical_plan::{collect, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, collect};
 use datafusion::prelude::SessionContext;
 use datafusion_catalog::Session;
 use datafusion_catalog::TableFunctionImpl;
@@ -55,7 +55,7 @@ async fn test_simple_read_csv_udtf() -> Result<()> {
         .collect()
         .await?;
 
-    insta::assert_snapshot!(batches_to_string(&rbs), @r###"
+    insta::assert_snapshot!(batches_to_string(&rbs), @r"
     +-------------+-----------+-------------+-------------------------------------------------------------------------------------------------------------+
     | n_nationkey | n_name    | n_regionkey | n_comment                                                                                                   |
     +-------------+-----------+-------------+-------------------------------------------------------------------------------------------------------------+
@@ -65,7 +65,7 @@ async fn test_simple_read_csv_udtf() -> Result<()> {
     | 4           | EGYPT     | 4           | y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d         |
     | 5           | ETHIOPIA  | 0           | ven packages wake quickly. regu                                                                             |
     +-------------+-----------+-------------+-------------------------------------------------------------------------------------------------------------+
-    "###);
+    ");
 
     // just run, return all rows
     let rbs = ctx
@@ -74,7 +74,7 @@ async fn test_simple_read_csv_udtf() -> Result<()> {
         .collect()
         .await?;
 
-    insta::assert_snapshot!(batches_to_string(&rbs), @r###"
+    insta::assert_snapshot!(batches_to_string(&rbs), @r"
     +-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+
     | n_nationkey | n_name    | n_regionkey | n_comment                                                                                                          |
     +-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+
@@ -89,7 +89,7 @@ async fn test_simple_read_csv_udtf() -> Result<()> {
     | 9           | INDONESIA | 2           |  slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull |
     | 10          | IRAN      | 4           | efully alongside of the slyly final dependencies.                                                                  |
     +-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+
-    "###);
+    ");
 
     Ok(())
 }
@@ -205,7 +205,7 @@ impl TableFunctionImpl for SimpleCsvTableFunc {
         let mut filepath = String::new();
         for expr in exprs {
             match expr {
-                Expr::Literal(ScalarValue::Utf8(Some(ref path)), _) => {
+                Expr::Literal(ScalarValue::Utf8(Some(path)), _) => {
                     filepath.clone_from(path);
                 }
                 expr => new_exprs.push(expr.clone()),
diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index 33607ebc0d2cc..57baf271c5913 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -19,8 +19,8 @@
 //! user defined window functions
 
 use arrow::array::{
-    record_batch, Array, ArrayRef, AsArray, Int64Array, RecordBatch, StringArray,
-    UInt64Array,
+    Array, ArrayRef, AsArray, Int64Array, RecordBatch, StringArray, UInt64Array,
+    record_batch,
 };
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow_schema::FieldRef;
@@ -38,8 +38,8 @@ use datafusion_functions_window_common::{
     expr::ExpressionArgs, field::WindowUDFFieldArgs,
 };
 use datafusion_physical_expr::{
-    expressions::{col, lit},
     PhysicalExpr,
+    expressions::{col, lit},
 };
 use std::collections::HashMap;
 use std::hash::{Hash, Hasher};
@@ -47,8 +47,8 @@ use std::{
     any::Any,
     ops::Range,
     sync::{
-        atomic::{AtomicUsize, Ordering},
         Arc,
+        atomic::{AtomicUsize, Ordering},
     },
 };
 
@@ -62,8 +62,7 @@ const UNBOUNDED_WINDOW_QUERY_WITH_ALIAS: &str = "SELECT x, y, val, \
      from t ORDER BY x, y";
 
 /// A query with a window function evaluated over a moving window
-const BOUNDED_WINDOW_QUERY:  &str  =
-    "SELECT x, y, val, \
+const BOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \
      odd_counter(val) OVER (PARTITION BY x ORDER BY y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) \
      from t ORDER BY x, y";
 
@@ -75,22 +74,22 @@ async fn test_setup() {
     let sql = "SELECT * from t order by x, y";
     let actual = execute(&ctx, sql).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+
-         | x | y | val |
-         +---+---+-----+
-         | 1 | a | 0   |
-         | 1 | b | 1   |
-         | 1 | c | 2   |
-         | 2 | d | 3   |
-         | 2 | e | 4   |
-         | 2 | f | 5   |
-         | 2 | g | 6   |
-         | 2 | h | 6   |
-         | 2 | i | 6   |
-         | 2 | j | 6   |
-         +---+---+-----+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+
+    | x | y | val |
+    +---+---+-----+
+    | 1 | a | 0   |
+    | 1 | b | 1   |
+    | 1 | c | 2   |
+    | 2 | d | 3   |
+    | 2 | e | 4   |
+    | 2 | f | 5   |
+    | 2 | g | 6   |
+    | 2 | h | 6   |
+    | 2 | i | 6   |
+    | 2 | j | 6   |
+    +---+---+-----+
+    ");
 }
 
 /// Basic user defined window function
@@ -101,22 +100,22 @@ async fn test_udwf() {
 
     let actual = execute(&ctx, UNBOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 1                                                                                                                     |
-         | 1 | b | 1   | 1                                                                                                                     |
-         | 1 | c | 2   | 1                                                                                                                     |
-         | 2 | d | 3   | 2                                                                                                                     |
-         | 2 | e | 4   | 2                                                                                                                     |
-         | 2 | f | 5   | 2                                                                                                                     |
-         | 2 | g | 6   | 2                                                                                                                     |
-         | 2 | h | 6   | 2                                                                                                                     |
-         | 2 | i | 6   | 2                                                                                                                     |
-         | 2 | j | 6   | 2                                                                                                                     |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 1                                                                                                                     |
+    | 1 | b | 1   | 1                                                                                                                     |
+    | 1 | c | 2   | 1                                                                                                                     |
+    | 2 | d | 3   | 2                                                                                                                     |
+    | 2 | e | 4   | 2                                                                                                                     |
+    | 2 | f | 5   | 2                                                                                                                     |
+    | 2 | g | 6   | 2                                                                                                                     |
+    | 2 | h | 6   | 2                                                                                                                     |
+    | 2 | i | 6   | 2                                                                                                                     |
+    | 2 | j | 6   | 2                                                                                                                     |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    ");
 
     // evaluated on two distinct batches
     assert_eq!(test_state.evaluate_all_called(), 2);
@@ -175,22 +174,22 @@ async fn test_udwf_bounded_window_ignores_frame() {
     // Since the UDWF doesn't say it needs the window frame, the frame is ignored
     let actual = execute(&ctx, BOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 1                                                                                                            |
-         | 1 | b | 1   | 1                                                                                                            |
-         | 1 | c | 2   | 1                                                                                                            |
-         | 2 | d | 3   | 2                                                                                                            |
-         | 2 | e | 4   | 2                                                                                                            |
-         | 2 | f | 5   | 2                                                                                                            |
-         | 2 | g | 6   | 2                                                                                                            |
-         | 2 | h | 6   | 2                                                                                                            |
-         | 2 | i | 6   | 2                                                                                                            |
-         | 2 | j | 6   | 2                                                                                                            |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 1                                                                                                            |
+    | 1 | b | 1   | 1                                                                                                            |
+    | 1 | c | 2   | 1                                                                                                            |
+    | 2 | d | 3   | 2                                                                                                            |
+    | 2 | e | 4   | 2                                                                                                            |
+    | 2 | f | 5   | 2                                                                                                            |
+    | 2 | g | 6   | 2                                                                                                            |
+    | 2 | h | 6   | 2                                                                                                            |
+    | 2 | i | 6   | 2                                                                                                            |
+    | 2 | j | 6   | 2                                                                                                            |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    ");
 
     // evaluated on 2 distinct batches (when x=1 and x=2)
     assert_eq!(test_state.evaluate_called(), 0);
@@ -205,22 +204,22 @@ async fn test_udwf_bounded_window() {
 
     let actual = execute(&ctx, BOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 1                                                                                                            |
-         | 1 | b | 1   | 1                                                                                                            |
-         | 1 | c | 2   | 1                                                                                                            |
-         | 2 | d | 3   | 1                                                                                                            |
-         | 2 | e | 4   | 2                                                                                                            |
-         | 2 | f | 5   | 1                                                                                                            |
-         | 2 | g | 6   | 1                                                                                                            |
-         | 2 | h | 6   | 0                                                                                                            |
-         | 2 | i | 6   | 0                                                                                                            |
-         | 2 | j | 6   | 0                                                                                                            |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 1                                                                                                            |
+    | 1 | b | 1   | 1                                                                                                            |
+    | 1 | c | 2   | 1                                                                                                            |
+    | 2 | d | 3   | 1                                                                                                            |
+    | 2 | e | 4   | 2                                                                                                            |
+    | 2 | f | 5   | 1                                                                                                            |
+    | 2 | g | 6   | 1                                                                                                            |
+    | 2 | h | 6   | 0                                                                                                            |
+    | 2 | i | 6   | 0                                                                                                            |
+    | 2 | j | 6   | 0                                                                                                            |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    ");
 
     // Evaluate is called for each input rows
     assert_eq!(test_state.evaluate_called(), 10);
@@ -237,22 +236,22 @@ async fn test_stateful_udwf() {
 
     let actual = execute(&ctx, UNBOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 0                                                                                                                     |
-         | 1 | b | 1   | 1                                                                                                                     |
-         | 1 | c | 2   | 1                                                                                                                     |
-         | 2 | d | 3   | 1                                                                                                                     |
-         | 2 | e | 4   | 1                                                                                                                     |
-         | 2 | f | 5   | 2                                                                                                                     |
-         | 2 | g | 6   | 2                                                                                                                     |
-         | 2 | h | 6   | 2                                                                                                                     |
-         | 2 | i | 6   | 2                                                                                                                     |
-         | 2 | j | 6   | 2                                                                                                                     |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 0                                                                                                                     |
+    | 1 | b | 1   | 1                                                                                                                     |
+    | 1 | c | 2   | 1                                                                                                                     |
+    | 2 | d | 3   | 1                                                                                                                     |
+    | 2 | e | 4   | 1                                                                                                                     |
+    | 2 | f | 5   | 2                                                                                                                     |
+    | 2 | g | 6   | 2                                                                                                                     |
+    | 2 | h | 6   | 2                                                                                                                     |
+    | 2 | i | 6   | 2                                                                                                                     |
+    | 2 | j | 6   | 2                                                                                                                     |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    ");
 
     assert_eq!(test_state.evaluate_called(), 10);
     assert_eq!(test_state.evaluate_all_called(), 0);
@@ -268,22 +267,22 @@ async fn test_stateful_udwf_bounded_window() {
 
     let actual = execute(&ctx, BOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 1                                                                                                            |
-         | 1 | b | 1   | 1                                                                                                            |
-         | 1 | c | 2   | 1                                                                                                            |
-         | 2 | d | 3   | 1                                                                                                            |
-         | 2 | e | 4   | 2                                                                                                            |
-         | 2 | f | 5   | 1                                                                                                            |
-         | 2 | g | 6   | 1                                                                                                            |
-         | 2 | h | 6   | 0                                                                                                            |
-         | 2 | i | 6   | 0                                                                                                            |
-         | 2 | j | 6   | 0                                                                                                            |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 1                                                                                                            |
+    | 1 | b | 1   | 1                                                                                                            |
+    | 1 | c | 2   | 1                                                                                                            |
+    | 2 | d | 3   | 1                                                                                                            |
+    | 2 | e | 4   | 2                                                                                                            |
+    | 2 | f | 5   | 1                                                                                                            |
+    | 2 | g | 6   | 1                                                                                                            |
+    | 2 | h | 6   | 0                                                                                                            |
+    | 2 | i | 6   | 0                                                                                                            |
+    | 2 | j | 6   | 0                                                                                                            |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    ");
 
     // Evaluate and update_state is called for each input row
     assert_eq!(test_state.evaluate_called(), 10);
@@ -298,22 +297,22 @@ async fn test_udwf_query_include_rank() {
 
     let actual = execute(&ctx, UNBOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 3                                                                                                                     |
-         | 1 | b | 1   | 2                                                                                                                     |
-         | 1 | c | 2   | 1                                                                                                                     |
-         | 2 | d | 3   | 7                                                                                                                     |
-         | 2 | e | 4   | 6                                                                                                                     |
-         | 2 | f | 5   | 5                                                                                                                     |
-         | 2 | g | 6   | 4                                                                                                                     |
-         | 2 | h | 6   | 3                                                                                                                     |
-         | 2 | i | 6   | 2                                                                                                                     |
-         | 2 | j | 6   | 1                                                                                                                     |
-         +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 3                                                                                                                     |
+    | 1 | b | 1   | 2                                                                                                                     |
+    | 1 | c | 2   | 1                                                                                                                     |
+    | 2 | d | 3   | 7                                                                                                                     |
+    | 2 | e | 4   | 6                                                                                                                     |
+    | 2 | f | 5   | 5                                                                                                                     |
+    | 2 | g | 6   | 4                                                                                                                     |
+    | 2 | h | 6   | 3                                                                                                                     |
+    | 2 | i | 6   | 2                                                                                                                     |
+    | 2 | j | 6   | 1                                                                                                                     |
+    +---+---+-----+-----------------------------------------------------------------------------------------------------------------------+
+    ");
 
     assert_eq!(test_state.evaluate_called(), 0);
     assert_eq!(test_state.evaluate_all_called(), 0);
@@ -329,22 +328,22 @@ async fn test_udwf_bounded_query_include_rank() {
 
     let actual = execute(&ctx, BOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 3                                                                                                            |
-         | 1 | b | 1   | 2                                                                                                            |
-         | 1 | c | 2   | 1                                                                                                            |
-         | 2 | d | 3   | 7                                                                                                            |
-         | 2 | e | 4   | 6                                                                                                            |
-         | 2 | f | 5   | 5                                                                                                            |
-         | 2 | g | 6   | 4                                                                                                            |
-         | 2 | h | 6   | 3                                                                                                            |
-         | 2 | i | 6   | 2                                                                                                            |
-         | 2 | j | 6   | 1                                                                                                            |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 3                                                                                                            |
+    | 1 | b | 1   | 2                                                                                                            |
+    | 1 | c | 2   | 1                                                                                                            |
+    | 2 | d | 3   | 7                                                                                                            |
+    | 2 | e | 4   | 6                                                                                                            |
+    | 2 | f | 5   | 5                                                                                                            |
+    | 2 | g | 6   | 4                                                                                                            |
+    | 2 | h | 6   | 3                                                                                                            |
+    | 2 | i | 6   | 2                                                                                                            |
+    | 2 | j | 6   | 1                                                                                                            |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    ");
 
     assert_eq!(test_state.evaluate_called(), 0);
     assert_eq!(test_state.evaluate_all_called(), 0);
@@ -362,22 +361,22 @@ async fn test_udwf_bounded_window_returns_null() {
 
     let actual = execute(&ctx, BOUNDED_WINDOW_QUERY).await.unwrap();
 
-    insta::assert_snapshot!(batches_to_string(&actual), @r###"
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         | 1 | a | 0   | 1                                                                                                            |
-         | 1 | b | 1   | 1                                                                                                            |
-         | 1 | c | 2   | 1                                                                                                            |
-         | 2 | d | 3   | 1                                                                                                            |
-         | 2 | e | 4   | 2                                                                                                            |
-         | 2 | f | 5   | 1                                                                                                            |
-         | 2 | g | 6   | 1                                                                                                            |
-         | 2 | h | 6   |                                                                                                              |
-         | 2 | i | 6   |                                                                                                              |
-         | 2 | j | 6   |                                                                                                              |
-         +---+---+-----+--------------------------------------------------------------------------------------------------------------+
-         "###);
+    insta::assert_snapshot!(batches_to_string(&actual), @r"
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    | 1 | a | 0   | 1                                                                                                            |
+    | 1 | b | 1   | 1                                                                                                            |
+    | 1 | c | 2   | 1                                                                                                            |
+    | 2 | d | 3   | 1                                                                                                            |
+    | 2 | e | 4   | 2                                                                                                            |
+    | 2 | f | 5   | 1                                                                                                            |
+    | 2 | g | 6   | 1                                                                                                            |
+    | 2 | h | 6   |                                                                                                              |
+    | 2 | i | 6   |                                                                                                              |
+    | 2 | j | 6   |                                                                                                              |
+    +---+---+-----+--------------------------------------------------------------------------------------------------------------+
+    ");
 
     // Evaluate is called for each input rows
     assert_eq!(test_state.evaluate_called(), 10);
@@ -616,7 +615,9 @@ impl PartitionEvaluator for OddCounter {
         ranks_in_partition: &[Range<usize>],
     ) -> Result<ArrayRef> {
         self.test_state.inc_evaluate_all_with_rank_called();
-        println!("evaluate_all_with_rank, values: {num_rows:#?}, ranks_in_partitions: {ranks_in_partition:?}");
+        println!(
+            "evaluate_all_with_rank, values: {num_rows:#?}, ranks_in_partitions: {ranks_in_partition:?}"
+        );
         // when evaluating with ranks, just return the inverse rank instead
         let array: Int64Array = ranks_in_partition
             .iter()
diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs
index 3b85640804219..9997d23d4c61f 100644
--- a/datafusion/datasource-arrow/src/file_format.rs
+++ b/datafusion/datasource-arrow/src/file_format.rs
@@ -20,30 +20,31 @@
 //! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
 
 use std::any::Any;
-use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::{self, Debug};
+use std::io::{Seek, SeekFrom};
 use std::sync::Arc;
 
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::error::ArrowError;
 use arrow::ipc::convert::fb_to_schema;
-use arrow::ipc::reader::FileReader;
+use arrow::ipc::reader::{FileReader, StreamReader};
 use arrow::ipc::writer::IpcWriteOptions;
-use arrow::ipc::{root_as_message, CompressionType};
+use arrow::ipc::{CompressionType, root_as_message};
 use datafusion_common::error::Result;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
-    internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics,
-    DEFAULT_ARROW_EXTENSION,
+    DEFAULT_ARROW_EXTENSION, DataFusionError, GetExt, Statistics,
+    internal_datafusion_err, not_impl_err,
 };
 use datafusion_common_runtime::{JoinSet, SpawnedTask};
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::display::FileGroupDisplay;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::sink::{DataSink, DataSinkExec};
 use datafusion_datasource::write::{
-    get_writer_schema, ObjectWriterBuilder, SharedBuffer,
+    ObjectWriterBuilder, SharedBuffer, get_writer_schema,
 };
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
@@ -59,9 +60,11 @@ use datafusion_datasource::source::DataSourceExec;
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
-use futures::stream::BoxStream;
 use futures::StreamExt;
-use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
+use futures::stream::BoxStream;
+use object_store::{
+    GetOptions, GetRange, GetResultPayload, ObjectMeta, ObjectStore, path::Path,
+};
 use tokio::io::AsyncWriteExt;
 
 /// Initial writing buffer size. Note this is just a size hint for efficiency. It
@@ -71,8 +74,8 @@ const INITIAL_BUFFER_BYTES: usize = 1048576;
 /// If the buffered Arrow data exceeds this size, it is flushed to object store
 const BUFFER_FLUSH_BYTES: usize = 1024000;
 
+/// Factory struct used to create [`ArrowFormat`]
 #[derive(Default, Debug)]
-/// Factory struct used to create [ArrowFormat]
 pub struct ArrowFormatFactory;
 
 impl ArrowFormatFactory {
@@ -107,7 +110,7 @@ impl GetExt for ArrowFormatFactory {
     }
 }
 
-/// Arrow `FileFormat` implementation.
+/// Arrow [`FileFormat`] implementation.
 #[derive(Default, Debug)]
 pub struct ArrowFormat;
 
@@ -150,12 +153,25 @@ impl FileFormat for ArrowFormat {
             let schema = match r.payload {
                 #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(mut file, _) => {
-                    let reader = FileReader::try_new(&mut file, None)?;
-                    reader.schema()
-                }
-                GetResultPayload::Stream(stream) => {
-                    infer_schema_from_file_stream(stream).await?
+                    match FileReader::try_new(&mut file, None) {
+                        Ok(reader) => reader.schema(),
+                        Err(file_error) => {
+                            // not in the file format, but FileReader read some bytes
+                            // while trying to parse the file and so we need to rewind
+                            // it to the beginning of the file
+                            file.seek(SeekFrom::Start(0))?;
+                            match StreamReader::try_new(&mut file, None) {
+                                Ok(reader) => reader.schema(),
+                                Err(stream_error) => {
+                                    return Err(internal_datafusion_err!(
+                                        "Failed to parse Arrow file as either file format or stream format. File format error: {file_error}. Stream format error: {stream_error}"
+                                    ));
+                                }
+                            }
+                        }
+                    }
                 }
+                GetResultPayload::Stream(stream) => infer_stream_schema(stream).await?,
             };
             schemas.push(schema.as_ref().clone());
         }
@@ -175,10 +191,40 @@ impl FileFormat for ArrowFormat {
 
     async fn create_physical_plan(
         &self,
-        _state: &dyn Session,
+        state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(ArrowSource::default());
+        let object_store = state.runtime_env().object_store(&conf.object_store_url)?;
+        let object_location = &conf
+            .file_groups
+            .first()
+            .ok_or_else(|| internal_datafusion_err!("No files found in file group"))?
+            .files()
+            .first()
+            .ok_or_else(|| internal_datafusion_err!("No files found in file group"))?
+            .object_meta
+            .location;
+
+        let table_schema = TableSchema::new(
+            Arc::clone(conf.file_schema()),
+            conf.table_partition_cols().clone(),
+        );
+
+        let mut source: Arc<dyn FileSource> =
+            match is_object_in_arrow_ipc_file_format(object_store, object_location).await
+            {
+                Ok(true) => Arc::new(ArrowSource::new_file_source(table_schema)),
+                Ok(false) => Arc::new(ArrowSource::new_stream_file_source(table_schema)),
+                Err(e) => Err(e)?,
+            };
+
+        // Preserve projection from the original file source
+        if let Some(projection) = conf.file_source.projection()
+            && let Some(new_source) = source.try_pushdown_projection(projection)?
+        {
+            source = new_source;
+        }
+
         let config = FileScanConfigBuilder::from(conf)
             .with_source(source)
             .build();
@@ -202,12 +248,12 @@ impl FileFormat for ArrowFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ArrowSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(ArrowSource::new_file_source(table_schema))
     }
 }
 
-/// Implements [`FileSink`] for writing to arrow_ipc files
+/// Implements [`FileSink`] for Arrow IPC files
 struct ArrowFileSink {
     config: FileSinkConfig,
 }
@@ -344,101 +390,167 @@ impl DataSink for ArrowFileSink {
     }
 }
 
+// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
+// See <https://github.com/apache/arrow-rs/issues/5021>
+
 const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
 const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
 
-/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
-/// See <https://github.com/apache/arrow-rs/issues/5021>
-async fn infer_schema_from_file_stream(
+async fn infer_stream_schema(
     mut stream: BoxStream<'static, object_store::Result<Bytes>>,
 ) -> Result<SchemaRef> {
-    // Expected format:
-    // <magic number "ARROW1"> - 6 bytes
-    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
-    // <metadata_size: int32> - 4 bytes
-    // <metadata_flatbuffer: bytes>
-    // <rest of file bytes>
-
-    // So in first read we need at least all known sized sections,
-    // which is 6 + 2 + 4 + 4 = 16 bytes.
-    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
-
-    // Files should start with these magic bytes
-    if bytes[0..6] != ARROW_MAGIC {
-        return Err(ArrowError::ParseError(
-            "Arrow file does not contain correct header".to_string(),
-        ))?;
-    }
-
-    // Since continuation marker bytes added in later versions
-    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
-        (&bytes[12..16], 16)
+    // IPC streaming format.
+    // See https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
+    //
+    //   <SCHEMA>
+    //   <DICTIONARY 0>
+    //   ...
+    //   <DICTIONARY k - 1>
+    //   <RECORD BATCH 0>
+    //   ...
+    //   <DICTIONARY x DELTA>
+    //   ...
+    //   <DICTIONARY y DELTA>
+    //   ...
+    //   <RECORD BATCH n - 1>
+    //   <EOS [optional]: 0xFFFFFFFF 0x00000000>
+
+    // The streaming format is made up of a sequence of encapsulated messages.
+    // See https://arrow.apache.org/docs/format/Columnar.html#encapsulated-message-format
+    //
+    //   <continuation: 0xFFFFFFFF>  (added in v0.15.0)
+    //   <metadata_size: int32>
+    //   <metadata_flatbuffer: bytes>
+    //   <padding>
+    //   <message body>
+    //
+    // The first message is the schema.
+
+    // IPC file format is a wrapper around the streaming format with indexing information.
+    // See https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
+    //
+    //   <magic number "ARROW1">
+    //   <empty padding bytes [to 8 byte boundary]>
+    //   <STREAMING FORMAT with EOS>
+    //   <FOOTER>
+    //   <FOOTER SIZE: int32>
+    //   <magic number "ARROW1">
+
+    // For the purposes of this function, the arrow "preamble" is the magic number, padding,
+    // and the continuation marker. 16 bytes covers the preamble and metadata length
+    // no matter which version or format is used.
+    let bytes = extend_bytes_to_n_length_from_stream(vec![], 16, &mut stream).await?;
+
+    // The preamble length is everything before the metadata length
+    let preamble_len = if bytes[0..6] == ARROW_MAGIC {
+        // File format starts with magic number "ARROW1"
+        if bytes[8..12] == CONTINUATION_MARKER {
+            // Continuation marker was added in v0.15.0
+            12
+        } else {
+            // File format before v0.15.0
+            8
+        }
+    } else if bytes[0..4] == CONTINUATION_MARKER {
+        // Stream format after v0.15.0 starts with continuation marker
+        4
     } else {
-        (&bytes[8..12], 12)
+        // Stream format before v0.15.0 does not have a preamble
+        0
     };
 
-    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
-    let meta_len = i32::from_le_bytes(meta_len);
-
-    // Read bytes for Schema message
-    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
-        // Need to read more bytes to decode Message
-        let mut block_data = Vec::with_capacity(meta_len as usize);
-        // In case we had some spare bytes in our initial read chunk
-        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
-        let size_to_read = meta_len as usize - block_data.len();
-        let block_data =
-            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
-        Cow::Owned(block_data)
-    } else {
-        // Already have the bytes we need
-        let end_index = meta_len as usize + rest_of_bytes_start_index;
-        let block_data = &bytes[rest_of_bytes_start_index..end_index];
-        Cow::Borrowed(block_data)
-    };
+    let meta_len_bytes: [u8; 4] = bytes[preamble_len..preamble_len + 4]
+        .try_into()
+        .map_err(|err| {
+            ArrowError::ParseError(format!(
+                "Unable to read IPC message metadata length: {err:?}"
+            ))
+        })?;
 
-    // Decode Schema message
-    let message = root_as_message(&block_data).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
+    let meta_len = i32::from_le_bytes([
+        meta_len_bytes[0],
+        meta_len_bytes[1],
+        meta_len_bytes[2],
+        meta_len_bytes[3],
+    ]);
+
+    if meta_len < 0 {
+        return Err(ArrowError::ParseError(
+            "IPC message metadata length is negative".to_string(),
+        )
+        .into());
+    }
+
+    let bytes = extend_bytes_to_n_length_from_stream(
+        bytes,
+        preamble_len + 4 + (meta_len as usize),
+        &mut stream,
+    )
+    .await?;
+
+    let message = root_as_message(&bytes[preamble_len + 4..]).map_err(|err| {
+        ArrowError::ParseError(format!("Unable to read IPC message metadata: {err:?}"))
     })?;
-    let ipc_schema = message.header_as_schema().ok_or_else(|| {
-        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
+    let fb_schema = message.header_as_schema().ok_or_else(|| {
+        ArrowError::IpcError("Unable to read IPC message schema".to_string())
     })?;
-    let schema = fb_to_schema(ipc_schema);
+    let schema = fb_to_schema(fb_schema);
 
     Ok(Arc::new(schema))
 }
 
-async fn collect_at_least_n_bytes(
-    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
+async fn extend_bytes_to_n_length_from_stream(
+    bytes: Vec<u8>,
     n: usize,
-    extend_from: Option<Vec<u8>>,
+    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
 ) -> Result<Vec<u8>> {
-    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
-    // If extending existing buffer then ensure we read n additional bytes
-    let n = n + buf.len();
-    while let Some(bytes) = stream.next().await.transpose()? {
-        buf.extend_from_slice(&bytes);
+    if bytes.len() >= n {
+        return Ok(bytes);
+    }
+
+    let mut buf = bytes;
+
+    while let Some(b) = stream.next().await.transpose()? {
+        buf.extend_from_slice(&b);
+
         if buf.len() >= n {
             break;
         }
     }
+
     if buf.len() < n {
         return Err(ArrowError::ParseError(
             "Unexpected end of byte stream for Arrow IPC file".to_string(),
-        ))?;
+        )
+        .into());
     }
+
     Ok(buf)
 }
 
+async fn is_object_in_arrow_ipc_file_format(
+    store: Arc<dyn ObjectStore>,
+    object_location: &Path,
+) -> Result<bool> {
+    let get_opts = GetOptions {
+        range: Some(GetRange::Bounded(0..6)),
+        ..Default::default()
+    };
+    let bytes = store
+        .get_opts(object_location, get_opts)
+        .await?
+        .bytes()
+        .await?;
+    Ok(bytes.len() >= 6 && bytes[0..6] == ARROW_MAGIC)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
     use chrono::DateTime;
-    use datafusion_common::config::TableOptions;
     use datafusion_common::DFSchema;
+    use datafusion_common::config::TableOptions;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnv;
     use datafusion_expr::execution_props::ExecutionProps;
@@ -524,80 +636,146 @@ mod tests {
 
     #[tokio::test]
     async fn test_infer_schema_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let state = MockSession::new();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
-
-        // Test chunk sizes where too small so we keep having to read more bytes
-        // And when large enough that first read contains all we need
-        for chunk_size in [7, 3000] {
-            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
-            let inferred_schema = arrow_format
+        for file in ["example.arrow", "example_stream.arrow"] {
+            let mut bytes = std::fs::read(format!("tests/data/{file}"))?;
+            bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
+            let location = Path::parse(file)?;
+            let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+            in_memory_store.put(&location, bytes.into()).await?;
+
+            let state = MockSession::new();
+            let object_meta = ObjectMeta {
+                location,
+                last_modified: DateTime::default(),
+                size: u64::MAX,
+                e_tag: None,
+                version: None,
+            };
+
+            let arrow_format = ArrowFormat {};
+            let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
+
+            // Test chunk sizes where too small so we keep having to read more bytes
+            // And when large enough that first read contains all we need
+            for chunk_size in [7, 3000] {
+                let store =
+                    Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
+                let inferred_schema = arrow_format
+                    .infer_schema(
+                        &state,
+                        &(store.clone() as Arc<dyn ObjectStore>),
+                        std::slice::from_ref(&object_meta),
+                    )
+                    .await?;
+                let actual_fields = inferred_schema
+                    .fields()
+                    .iter()
+                    .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+                    .collect::<Vec<_>>();
+                assert_eq!(expected, actual_fields);
+            }
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_short_stream() -> Result<()> {
+        for file in ["example.arrow", "example_stream.arrow"] {
+            let mut bytes = std::fs::read(format!("tests/data/{file}"))?;
+            bytes.truncate(20); // should cause error that file shorter than expected
+            let location = Path::parse(file)?;
+            let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+            in_memory_store.put(&location, bytes.into()).await?;
+
+            let state = MockSession::new();
+            let object_meta = ObjectMeta {
+                location,
+                last_modified: DateTime::default(),
+                size: u64::MAX,
+                e_tag: None,
+                version: None,
+            };
+
+            let arrow_format = ArrowFormat {};
+
+            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
+            let err = arrow_format
                 .infer_schema(
                     &state,
                     &(store.clone() as Arc<dyn ObjectStore>),
                     std::slice::from_ref(&object_meta),
                 )
-                .await?;
-            let actual_fields = inferred_schema
-                .fields()
-                .iter()
-                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-                .collect::<Vec<_>>();
-            assert_eq!(expected, actual_fields);
+                .await;
+
+            assert!(err.is_err());
+            assert_eq!(
+                "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
+                err.unwrap_err().to_string().lines().next().unwrap()
+            );
         }
 
         Ok(())
     }
 
     #[tokio::test]
-    async fn test_infer_schema_short_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(20); // should cause error that file shorter than expected
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let state = MockSession::new();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-
-        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
-        let err = arrow_format
-            .infer_schema(
-                &state,
-                &(store.clone() as Arc<dyn ObjectStore>),
-                std::slice::from_ref(&object_meta),
-            )
-            .await;
+    async fn test_format_detection_file_format() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("test.arrow");
+
+        let file_bytes = std::fs::read("tests/data/example.arrow")?;
+        store.put(&path, file_bytes.into()).await?;
+
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+        assert!(is_file, "Should detect file format");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_format_detection_stream_format() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("test_stream.arrow");
+
+        let stream_bytes = std::fs::read("tests/data/example_stream.arrow")?;
+        store.put(&path, stream_bytes.into()).await?;
+
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+
+        assert!(!is_file, "Should detect stream format (not file)");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_format_detection_corrupted_file() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("corrupted.arrow");
+
+        store
+            .put(&path, Bytes::from(vec![0x43, 0x4f, 0x52, 0x41]).into())
+            .await?;
 
-        assert!(err.is_err());
-        assert_eq!(
-            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
-            err.unwrap_err().to_string().lines().next().unwrap()
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+
+        assert!(
+            !is_file,
+            "Corrupted file should not be detected as file format"
         );
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_format_detection_empty_file() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("empty.arrow");
+
+        store.put(&path, Bytes::new().into()).await?;
+
+        let result = is_object_in_arrow_ipc_file_format(store.clone(), &path).await;
+
+        // currently errors because it tries to read 0..6 from an empty file
+        assert!(result.is_err(), "Empty file should error");
+
+        Ok(())
+    }
 }
diff --git a/datafusion/datasource-arrow/src/mod.rs b/datafusion/datasource-arrow/src/mod.rs
index 18bb8792c3ffe..cbfd7887093e7 100644
--- a/datafusion/datasource-arrow/src/mod.rs
+++ b/datafusion/datasource-arrow/src/mod.rs
@@ -15,9 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![deny(clippy::allow_attributes)]
+
+//! [`ArrowFormat`]: Apache Arrow file format abstractions
 
 pub mod file_format;
 pub mod source;
diff --git a/datafusion/datasource-arrow/src/source.rs b/datafusion/datasource-arrow/src/source.rs
index f254b7e3ff30f..4c8fd5b3407be 100644
--- a/datafusion/datasource-arrow/src/source.rs
+++ b/datafusion/datasource-arrow/src/source.rs
@@ -15,21 +15,38 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
+//! Execution plan for reading Arrow IPC files
+//!
+//! # Naming Note
+//!
+//! The naming in this module can be confusing:
+//! - `ArrowFileOpener` handles the Arrow IPC **file format**
+//!   (with footer, supports parallel reading)
+//! - `ArrowStreamFileOpener` handles the Arrow IPC **stream format**
+//!   (without footer, sequential only)
+//! - `ArrowSource` is the unified `FileSource` implementation that uses either opener
+//!   depending on the format specified at construction
+//!
+//! Despite the name "ArrowStreamFileOpener", it still reads from files - the "Stream"
+//! refers to the Arrow IPC stream format, not streaming I/O. Both formats can be stored
+//! in files on disk or object storage.
+
 use std::sync::Arc;
+use std::{any::Any, io::Cursor};
 
-use datafusion_datasource::as_file_source;
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
-use datafusion_datasource::TableSchema;
+use datafusion_datasource::{TableSchema, as_file_source};
 
 use arrow::buffer::Buffer;
-use arrow_ipc::reader::FileDecoder;
+use arrow::ipc::reader::{FileDecoder, FileReader, StreamReader};
 use datafusion_common::error::Result;
-use datafusion_common::{exec_datafusion_err, Statistics};
+use datafusion_common::exec_datafusion_err;
+use datafusion_datasource::PartitionedFile;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
-use datafusion_datasource::PartitionedFile;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::projection::ProjectionExprs;
 
 use datafusion_datasource::file_stream::FileOpenFuture;
 use datafusion_datasource::file_stream::FileOpener;
@@ -37,95 +54,71 @@ use futures::StreamExt;
 use itertools::Itertools;
 use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
 
-/// Arrow configuration struct that is given to DataSourceExec
-/// Does not hold anything special, since [`FileScanConfig`] is sufficient for arrow
-#[derive(Clone, Default)]
-pub struct ArrowSource {
-    metrics: ExecutionPlanMetricsSet,
-    projected_statistics: Option<Statistics>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+/// Enum indicating which Arrow IPC format to use
+#[derive(Clone, Copy, Debug)]
+enum ArrowFormat {
+    /// Arrow IPC file format (with footer, supports parallel reading)
+    File,
+    /// Arrow IPC stream format (without footer, sequential only)
+    Stream,
 }
 
-impl From<ArrowSource> for Arc<dyn FileSource> {
-    fn from(source: ArrowSource) -> Self {
-        as_file_source(source)
-    }
+/// `FileOpener` for Arrow IPC stream format. Supports only sequential reading.
+pub(crate) struct ArrowStreamFileOpener {
+    object_store: Arc<dyn ObjectStore>,
+    projection: Option<Vec<usize>>,
 }
 
-impl FileSource for ArrowSource {
-    fn create_file_opener(
-        &self,
-        object_store: Arc<dyn ObjectStore>,
-        base_config: &FileScanConfig,
-        _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(ArrowOpener {
-            object_store,
-            projection: base_config.file_column_projection_indices(),
-        })
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
-    fn with_schema(&self, _schema: TableSchema) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projected_statistics = Some(statistics);
-        Arc::new(conf)
-    }
-
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
-    fn metrics(&self) -> &ExecutionPlanMetricsSet {
-        &self.metrics
-    }
+impl FileOpener for ArrowStreamFileOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        if partitioned_file.range.is_some() {
+            return Err(exec_datafusion_err!(
+                "ArrowStreamFileOpener does not support range-based reading"
+            ));
+        }
+        let object_store = Arc::clone(&self.object_store);
+        let projection = self.projection.clone();
 
-    fn statistics(&self) -> Result<Statistics> {
-        let statistics = &self.projected_statistics;
-        Ok(statistics
-            .clone()
-            .expect("projected_statistics must be set"))
-    }
+        Ok(Box::pin(async move {
+            let r = object_store
+                .get(&partitioned_file.object_meta.location)
+                .await?;
 
-    fn file_type(&self) -> &str {
-        "arrow"
-    }
+            let stream = match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
+                GetResultPayload::File(file, _) => futures::stream::iter(
+                    StreamReader::try_new(file.try_clone()?, projection.clone())?,
+                )
+                .map(|r| r.map_err(Into::into))
+                .boxed(),
+                GetResultPayload::Stream(_) => {
+                    let bytes = r.bytes().await?;
+                    let cursor = Cursor::new(bytes);
+                    futures::stream::iter(StreamReader::try_new(
+                        cursor,
+                        projection.clone(),
+                    )?)
+                    .map(|r| r.map_err(Into::into))
+                    .boxed()
+                }
+            };
 
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
+            Ok(stream)
         }))
     }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
 }
 
-/// The struct arrow that implements `[FileOpener]` trait
-pub struct ArrowOpener {
-    pub object_store: Arc<dyn ObjectStore>,
-    pub projection: Option<Vec<usize>>,
+/// `FileOpener` for Arrow IPC file format. Supports range-based parallel reading.
+pub(crate) struct ArrowFileOpener {
+    object_store: Arc<dyn ObjectStore>,
+    projection: Option<Vec<usize>>,
 }
 
-impl FileOpener for ArrowOpener {
+impl FileOpener for ArrowFileOpener {
     fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
         let object_store = Arc::clone(&self.object_store);
         let projection = self.projection.clone();
+
         Ok(Box::pin(async move {
             let range = partitioned_file.range.clone();
             match range {
@@ -133,27 +126,26 @@ impl FileOpener for ArrowOpener {
                     let r = object_store
                         .get(&partitioned_file.object_meta.location)
                         .await?;
-                    match r.payload {
+                    let stream = match r.payload {
                         #[cfg(not(target_arch = "wasm32"))]
-                        GetResultPayload::File(file, _) => {
-                            let arrow_reader = arrow::ipc::reader::FileReader::try_new(
-                                file, projection,
-                            )?;
-                            Ok(futures::stream::iter(arrow_reader)
-                                .map(|r| r.map_err(Into::into))
-                                .boxed())
-                        }
+                        GetResultPayload::File(file, _) => futures::stream::iter(
+                            FileReader::try_new(file.try_clone()?, projection.clone())?,
+                        )
+                        .map(|r| r.map_err(Into::into))
+                        .boxed(),
                         GetResultPayload::Stream(_) => {
                             let bytes = r.bytes().await?;
-                            let cursor = std::io::Cursor::new(bytes);
-                            let arrow_reader = arrow::ipc::reader::FileReader::try_new(
-                                cursor, projection,
-                            )?;
-                            Ok(futures::stream::iter(arrow_reader)
-                                .map(|r| r.map_err(Into::into))
-                                .boxed())
+                            let cursor = Cursor::new(bytes);
+                            futures::stream::iter(FileReader::try_new(
+                                cursor,
+                                projection.clone(),
+                            )?)
+                            .map(|r| r.map_err(Into::into))
+                            .boxed()
                         }
-                    }
+                    };
+
+                    Ok(stream)
                 }
                 Some(range) => {
                     // range is not none, the file maybe split into multiple parts to scan in parallel
@@ -242,7 +234,7 @@ impl FileOpener for ArrowOpener {
                         )
                         .await?;
 
-                    Ok(futures::stream::iter(
+                    let stream = futures::stream::iter(
                         recordbatches
                             .into_iter()
                             .zip(recordbatch_results)
@@ -253,9 +245,406 @@ impl FileOpener for ArrowOpener {
                             }),
                     )
                     .map(|r| r.map_err(Into::into))
-                    .boxed())
+                    .boxed();
+
+                    Ok(stream)
                 }
             }
         }))
     }
 }
+
+/// `FileSource` for both Arrow IPC file and stream formats
+#[derive(Clone)]
+pub struct ArrowSource {
+    format: ArrowFormat,
+    metrics: ExecutionPlanMetricsSet,
+    projection: SplitProjection,
+    table_schema: TableSchema,
+}
+
+impl ArrowSource {
+    /// Creates an [`ArrowSource`] for file format
+    pub fn new_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        Self {
+            format: ArrowFormat::File,
+            metrics: ExecutionPlanMetricsSet::new(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
+        }
+    }
+
+    /// Creates an [`ArrowSource`] for stream format
+    pub fn new_stream_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        Self {
+            format: ArrowFormat::Stream,
+            metrics: ExecutionPlanMetricsSet::new(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
+        }
+    }
+}
+
+impl FileSource for ArrowSource {
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        _base_config: &FileScanConfig,
+        _partition: usize,
+    ) -> Result<Arc<dyn FileOpener>> {
+        let split_projection = self.projection.clone();
+
+        let opener: Arc<dyn FileOpener> = match self.format {
+            ArrowFormat::File => Arc::new(ArrowFileOpener {
+                object_store,
+                projection: Some(split_projection.file_indices.clone()),
+            }),
+            ArrowFormat::Stream => Arc::new(ArrowStreamFileOpener {
+                object_store,
+                projection: Some(split_projection.file_indices.clone()),
+            }),
+        };
+        ProjectionOpener::try_new(
+            split_projection,
+            opener,
+            self.table_schema.file_schema(),
+        )
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
+        Arc::new(Self { ..self.clone() })
+    }
+
+    fn metrics(&self) -> &ExecutionPlanMetricsSet {
+        &self.metrics
+    }
+
+    fn file_type(&self) -> &str {
+        match self.format {
+            ArrowFormat::File => "arrow",
+            ArrowFormat::Stream => "arrow_stream",
+        }
+    }
+
+    fn repartitioned(
+        &self,
+        target_partitions: usize,
+        repartition_file_min_size: usize,
+        output_ordering: Option<LexOrdering>,
+        config: &FileScanConfig,
+    ) -> Result<Option<FileScanConfig>> {
+        match self.format {
+            ArrowFormat::Stream => {
+                // The Arrow IPC stream format doesn't support range-based parallel reading
+                // because it lacks a footer with the information that would be needed to
+                // make range-based parallel reading practical. Without the data in the
+                // footer you would either need to read the the entire file and record the
+                // offsets of the record batches and dictionaries, essentially recreating
+                // the footer's contents, or else each partition would need to read the
+                // entire file up to the correct offset which is a lot of duplicate I/O.
+                // We're opting to avoid that entirely by only acting on a single partition
+                // and reading sequentially.
+                Ok(None)
+            }
+            ArrowFormat::File => {
+                // Use the default trait implementation logic for file format
+                use datafusion_datasource::file_groups::FileGroupPartitioner;
+
+                if config.file_compression_type.is_compressed() {
+                    return Ok(None);
+                }
+
+                let repartitioned_file_groups_option = FileGroupPartitioner::new()
+                    .with_target_partitions(target_partitions)
+                    .with_repartition_file_min_size(repartition_file_min_size)
+                    .with_preserve_order_within_groups(output_ordering.is_some())
+                    .repartition_file_groups(&config.file_groups);
+
+                if let Some(repartitioned_file_groups) = repartitioned_file_groups_option
+                {
+                    let mut source = config.clone();
+                    source.file_groups = repartitioned_file_groups;
+                    return Ok(Some(source));
+                }
+                Ok(None)
+            }
+        }
+    }
+
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        source.projection = SplitProjection::new(
+            self.table_schema().file_schema(),
+            &source.projection.source.try_merge(projection)?,
+        );
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
+    }
+}
+
+/// `FileOpener` wrapper for both Arrow IPC file and stream formats
+pub struct ArrowOpener {
+    pub inner: Arc<dyn FileOpener>,
+}
+
+impl FileOpener for ArrowOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        self.inner.open(partitioned_file)
+    }
+}
+
+impl ArrowOpener {
+    /// Creates a new [`ArrowOpener`]
+    pub fn new(inner: Arc<dyn FileOpener>) -> Self {
+        Self { inner }
+    }
+
+    pub fn new_file_opener(
+        object_store: Arc<dyn ObjectStore>,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            inner: Arc::new(ArrowFileOpener {
+                object_store,
+                projection,
+            }),
+        }
+    }
+
+    pub fn new_stream_file_opener(
+        object_store: Arc<dyn ObjectStore>,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            inner: Arc::new(ArrowStreamFileOpener {
+                object_store,
+                projection,
+            }),
+        }
+    }
+}
+
+impl From<ArrowSource> for Arc<dyn FileSource> {
+    fn from(source: ArrowSource) -> Self {
+        as_file_source(source)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{fs::File, io::Read};
+
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_ipc::reader::{FileReader, StreamReader};
+    use bytes::Bytes;
+    use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+    use datafusion_execution::object_store::ObjectStoreUrl;
+    use object_store::memory::InMemory;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_file_opener_without_ranges() -> Result<()> {
+        for filename in ["example.arrow", "example_stream.arrow"] {
+            let path = format!("tests/data/{filename}");
+            let path_str = path.as_str();
+            let mut file = File::open(path_str)?;
+            let file_size = file.metadata()?.len();
+
+            let mut buffer = Vec::new();
+            file.read_to_end(&mut buffer)?;
+            let bytes = Bytes::from(buffer);
+
+            let object_store = Arc::new(InMemory::new());
+            let partitioned_file = PartitionedFile::new(filename, file_size);
+            object_store
+                .put(&partitioned_file.object_meta.location, bytes.into())
+                .await?;
+
+            let schema = match FileReader::try_new(File::open(path_str)?, None) {
+                Ok(reader) => reader.schema(),
+                Err(_) => StreamReader::try_new(File::open(path_str)?, None)?.schema(),
+            };
+
+            let source: Arc<dyn FileSource> = if filename.contains("stream") {
+                Arc::new(ArrowSource::new_stream_file_source(schema))
+            } else {
+                Arc::new(ArrowSource::new_file_source(schema))
+            };
+
+            let scan_config = FileScanConfigBuilder::new(
+                ObjectStoreUrl::local_filesystem(),
+                source.clone(),
+            )
+            .build();
+
+            let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
+            let mut stream = file_opener.open(partitioned_file)?.await?;
+
+            assert!(stream.next().await.is_some());
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_file_opener_with_ranges() -> Result<()> {
+        let filename = "example.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new_with_range(
+            filename.into(),
+            file_size,
+            0,
+            (file_size - 1) as i64,
+        );
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let schema = FileReader::try_new(File::open(path_str)?, None)?.schema();
+
+        let source = Arc::new(ArrowSource::new_file_source(schema));
+
+        let scan_config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            source.clone(),
+        )
+        .build();
+
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
+        let mut stream = file_opener.open(partitioned_file)?.await?;
+
+        assert!(stream.next().await.is_some());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_stream_opener_errors_with_ranges() -> Result<()> {
+        let filename = "example_stream.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new_with_range(
+            filename.into(),
+            file_size,
+            0,
+            (file_size - 1) as i64,
+        );
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let schema = StreamReader::try_new(File::open(path_str)?, None)?.schema();
+
+        let source = Arc::new(ArrowSource::new_stream_file_source(schema));
+
+        let scan_config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            source.clone(),
+        )
+        .build();
+
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
+        let result = file_opener.open(partitioned_file);
+        assert!(result.is_err());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_arrow_stream_repartitioning_not_supported() -> Result<()> {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("f0", DataType::Int64, false)]));
+        let source = ArrowSource::new_stream_file_source(schema);
+
+        let config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            Arc::new(source.clone()) as Arc<dyn FileSource>,
+        )
+        .build();
+
+        for target_partitions in [2, 4, 8, 16] {
+            let result =
+                source.repartitioned(target_partitions, 1024 * 1024, None, &config)?;
+
+            assert!(
+                result.is_none(),
+                "Stream format should not support repartitioning with {target_partitions} partitions",
+            );
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_stream_opener_with_projection() -> Result<()> {
+        let filename = "example_stream.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new(filename, file_size);
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let opener = ArrowStreamFileOpener {
+            object_store,
+            projection: Some(vec![0]), // just the first column
+        };
+
+        let mut stream = opener.open(partitioned_file)?.await?;
+
+        if let Some(batch) = stream.next().await {
+            let batch = batch?;
+            assert_eq!(
+                batch.num_columns(),
+                1,
+                "Projection should result in 1 column"
+            );
+        } else {
+            panic!("Expected at least one batch");
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/datasource-arrow/tests/data/example_stream.arrow b/datafusion/datasource-arrow/tests/data/example_stream.arrow
new file mode 100644
index 0000000000000..dbe10596f3a9d
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream.arrow differ
diff --git a/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow b/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow
new file mode 100644
index 0000000000000..78e56749d7f0d
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow differ
diff --git a/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow b/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow
new file mode 100644
index 0000000000000..3fa48d7669d91
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow differ
diff --git a/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs
index a80f18cf818fe..ea676a7611db9 100644
--- a/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs
+++ b/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs
@@ -19,25 +19,25 @@
 
 use apache_avro::schema::RecordSchema;
 use apache_avro::{
+    Error as AvroError, Reader as AvroReader,
     error::Details as AvroErrorDetails,
     schema::{Schema as AvroSchema, SchemaKind},
     types::Value,
-    Error as AvroError, Reader as AvroReader,
 };
 use arrow::array::{
-    make_array, Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef,
-    BooleanBuilder, LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait,
-    PrimitiveArray, StringArray, StringBuilder, StringDictionaryBuilder,
+    Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef, BooleanBuilder,
+    LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait, PrimitiveArray,
+    StringArray, StringBuilder, StringDictionaryBuilder, make_array,
 };
 use arrow::array::{BinaryArray, FixedSizeBinaryArray, GenericListArray};
 use arrow::buffer::{Buffer, MutableBuffer};
 use arrow::datatypes::{
     ArrowDictionaryKeyType, ArrowNumericType, ArrowPrimitiveType, DataType, Date32Type,
-    Date64Type, Field, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+    Date64Type, Field, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type,
+    Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
     Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type,
 };
 use arrow::datatypes::{Fields, SchemaRef};
 use arrow::error::ArrowError;
@@ -46,7 +46,7 @@ use arrow::error::Result as ArrowResult;
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
 use datafusion_common::arrow_err;
-use datafusion_common::error::{DataFusionError, Result};
+use datafusion_common::error::Result;
 use num_traits::NumCast;
 use std::collections::BTreeMap;
 use std::io::Read;
@@ -103,16 +103,16 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                     )
                     .is_some();
                 let sub_schemas = us.variants();
-                if has_nullable && sub_schemas.len() == 2 {
-                    if let Some(sub_schema) =
+                if has_nullable
+                    && sub_schemas.len() == 2
+                    && let Some(sub_schema) =
                         sub_schemas.iter().find(|&s| !matches!(s, AvroSchema::Null))
-                    {
-                        Self::child_schema_lookup(
-                            parent_field_name,
-                            sub_schema,
-                            schema_lookup,
-                        )?;
-                    }
+                {
+                    Self::child_schema_lookup(
+                        parent_field_name,
+                        sub_schema,
+                        schema_lookup,
+                    )?;
                 }
             }
             AvroSchema::Record(RecordSchema { fields, lookup, .. }) => {
@@ -132,9 +132,8 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                 }
             }
             AvroSchema::Array(schema) => {
-                let sub_parent_field_name = format!("{parent_field_name}.element");
                 Self::child_schema_lookup(
-                    &sub_parent_field_name,
+                    parent_field_name,
                     &schema.items,
                     schema_lookup,
                 )?;
@@ -309,7 +308,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
             e => {
                 return Err(SchemaError(format!(
                     "Nested list data builder type is not supported: {e}"
-                )))
+                )));
             }
         };
 
@@ -374,7 +373,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                     e => {
                         return Err(SchemaError(format!(
                             "Nested list data builder type is not supported: {e}"
-                        )))
+                        )));
                     }
                 }
             }
@@ -518,7 +517,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
             DataType::UInt32 => self.read_primitive_list_values::<UInt32Type>(rows),
             DataType::UInt64 => self.read_primitive_list_values::<UInt64Type>(rows),
             DataType::Float16 => {
-                return Err(SchemaError("Float16 not supported".to_string()))
+                return Err(SchemaError("Float16 not supported".to_string()));
             }
             DataType::Float32 => self.read_primitive_list_values::<Float32Type>(rows),
             DataType::Float64 => self.read_primitive_list_values::<Float64Type>(rows),
@@ -529,7 +528,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
             | DataType::Time64(_) => {
                 return Err(SchemaError(
                     "Temporal types are not yet supported, see ARROW-4803".to_string(),
-                ))
+                ));
             }
             DataType::Utf8 => flatten_string_values(rows)
                 .into_iter()
@@ -596,10 +595,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                     })
                     .collect();
 
-                let sub_parent_field_name =
-                    format!("{}.{}", parent_field_name, list_field.name());
-                let arrays =
-                    self.build_struct_array(&rows, &sub_parent_field_name, fields)?;
+                let arrays = self.build_struct_array(&rows, parent_field_name, fields)?;
                 let data_type = DataType::Struct(fields.clone());
                 ArrayDataBuilder::new(data_type)
                     .len(rows.len())
@@ -719,7 +715,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                         t => {
                             return Err(SchemaError(format!(
                                 "TimeUnit {t:?} not supported with Time64"
-                            )))
+                            )));
                         }
                     },
                     DataType::Time32(unit) => match unit {
@@ -733,7 +729,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                         t => {
                             return Err(SchemaError(format!(
                                 "TimeUnit {t:?} not supported with Time32"
-                            )))
+                            )));
                         }
                     },
                     DataType::Utf8 | DataType::LargeUtf8 => Arc::new(
@@ -757,7 +753,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                             .collect::<BinaryArray>(),
                     )
                         as ArrayRef,
-                    DataType::FixedSizeBinary(ref size) => {
+                    DataType::FixedSizeBinary(size) => {
                         Arc::new(FixedSizeBinaryArray::try_from_sparse_iter_with_size(
                             rows.iter().map(|row| {
                                 let maybe_value = self.field_lookup(&field_path, row);
@@ -766,9 +762,9 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                             *size,
                         )?) as ArrayRef
                     }
-                    DataType::List(ref list_field) => {
+                    DataType::List(list_field) => {
                         match list_field.data_type() {
-                            DataType::Dictionary(ref key_ty, _) => {
+                            DataType::Dictionary(key_ty, _) => {
                                 self.build_wrapped_list_array(rows, &field_path, key_ty)?
                             }
                             _ => {
@@ -788,7 +784,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                             }
                         }
                     }
-                    DataType::Dictionary(ref key_ty, ref val_ty) => self
+                    DataType::Dictionary(key_ty, val_ty) => self
                         .build_string_dictionary_array(
                             rows,
                             &field_path,
@@ -833,7 +829,7 @@ impl<R: Read> AvroArrowArrayReader<'_, R> {
                         return Err(SchemaError(format!(
                             "type {} not supported",
                             field.data_type()
-                        )))
+                        )));
                     }
                 };
                 Ok(arr)
@@ -1038,7 +1034,7 @@ where
 mod test {
     use crate::avro_to_arrow::{Reader, ReaderBuilder};
     use arrow::array::Array;
-    use arrow::datatypes::DataType;
+    use arrow::datatypes::{DataType, Fields};
     use arrow::datatypes::{Field, TimeUnit};
     use datafusion_common::assert_batches_eq;
     use datafusion_common::cast::{
@@ -1720,4 +1716,92 @@ mod test {
         assert_eq!(2, num_batches);
         assert_eq!(28, sum_id);
     }
+
+    #[test]
+    fn test_list_of_structs_with_custom_field_name() {
+        let schema = apache_avro::Schema::parse_str(
+            r#"
+        {
+          "type": "record",
+          "name": "root",
+          "fields": [
+            {
+              "name": "items",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "name": "item_record",
+                  "fields": [
+                    {
+                      "name": "id",
+                      "type": "long"
+                    },
+                    {
+                      "name": "name",
+                      "type": "string"
+                    }
+                  ]
+                }
+              }
+            }
+          ]
+        }"#,
+        )
+        .unwrap();
+
+        let r1 = apache_avro::to_value(serde_json::json!({
+            "items": [
+                {
+                    "id": 1,
+                    "name": "first"
+                },
+                {
+                    "id": 2,
+                    "name": "second"
+                }
+            ]
+        }))
+        .unwrap()
+        .resolve(&schema)
+        .unwrap();
+
+        let mut w = apache_avro::Writer::new(&schema, vec![]);
+        w.append(r1).unwrap();
+        let bytes = w.into_inner().unwrap();
+
+        // Create an Arrow schema where the list field is NOT named "element"
+        let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![Field::new(
+            "items",
+            DataType::List(Arc::new(Field::new(
+                "item", // This is NOT "element"
+                DataType::Struct(Fields::from(vec![
+                    Field::new("id", DataType::Int64, false),
+                    Field::new("name", DataType::Utf8, false),
+                ])),
+                false,
+            ))),
+            false,
+        )]));
+
+        let mut reader = ReaderBuilder::new()
+            .with_schema(arrow_schema)
+            .with_batch_size(10)
+            .build(std::io::Cursor::new(bytes))
+            .unwrap();
+
+        // This used to fail because schema_lookup would have "items.element.id" and "items.element.name"
+        // but build_struct_array will try to look up "items.item.id" and "items.item.name",
+        // Now it it is simply "items.id" and "items.name"
+        let batch = reader.next().unwrap().unwrap();
+
+        let expected = [
+            "+-----------------------------------------------+",
+            "| items                                         |",
+            "+-----------------------------------------------+",
+            "| [{id: 1, name: first}, {id: 2, name: second}] |",
+            "+-----------------------------------------------+",
+        ];
+        assert_batches_eq!(expected, &[batch]);
+    }
 }
diff --git a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
index 5ef35e2bee89e..bd96b47aea9e6 100644
--- a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
+++ b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
@@ -113,7 +113,7 @@ impl ReaderBuilder {
             None => Arc::new(super::read_avro_schema_from_reader(&mut source)?),
         };
         source.rewind()?;
-        Reader::try_new(source, schema, self.batch_size, self.projection)
+        Reader::try_new(source, &schema, self.batch_size, self.projection.as_ref())
     }
 }
 
@@ -135,12 +135,12 @@ impl<R: Read> Reader<'_, R> {
     /// useful if plucking values from a struct, e.g. getting `a.b.c.e` from `a.b.c.{d, e}`.
     pub fn try_new(
         reader: R,
-        schema: SchemaRef,
+        schema: &SchemaRef,
         batch_size: usize,
-        projection: Option<Vec<String>>,
+        projection: Option<&Vec<String>>,
     ) -> Result<Self> {
         let projected_schema = projection.as_ref().filter(|p| !p.is_empty()).map_or_else(
-            || Arc::clone(&schema),
+            || Arc::clone(schema),
             |proj| {
                 Arc::new(arrow::datatypes::Schema::new(
                     proj.iter()
diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
index 3fce0d4826a22..0e8f2a4d56088 100644
--- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
+++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use apache_avro::Schema as AvroSchema;
 use apache_avro::schema::{
     Alias, DecimalSchema, EnumSchema, FixedSchema, Name, RecordSchema,
 };
 use apache_avro::types::Value;
-use apache_avro::Schema as AvroSchema;
 use arrow::datatypes::{DataType, IntervalUnit, Schema, TimeUnit, UnionMode};
 use arrow::datatypes::{Field, UnionFields};
 use datafusion_common::error::Result;
@@ -248,15 +248,9 @@ fn default_field_name(dt: &DataType) -> &str {
 fn external_props(schema: &AvroSchema) -> HashMap<String, String> {
     let mut props = HashMap::new();
     match &schema {
-        AvroSchema::Record(RecordSchema {
-            doc: Some(ref doc), ..
-        })
-        | AvroSchema::Enum(EnumSchema {
-            doc: Some(ref doc), ..
-        })
-        | AvroSchema::Fixed(FixedSchema {
-            doc: Some(ref doc), ..
-        }) => {
+        AvroSchema::Record(RecordSchema { doc: Some(doc), .. })
+        | AvroSchema::Enum(EnumSchema { doc: Some(doc), .. })
+        | AvroSchema::Fixed(FixedSchema { doc: Some(doc), .. }) => {
             props.insert("avro::doc".to_string(), doc.clone());
         }
         _ => {}
@@ -312,8 +306,8 @@ pub fn aliased(
 #[cfg(test)]
 mod test {
     use super::{aliased, external_props, to_arrow_schema};
-    use apache_avro::schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema};
     use apache_avro::Schema as AvroSchema;
+    use apache_avro::schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema};
     use arrow::datatypes::DataType::{Binary, Float32, Float64, Timestamp, Utf8};
     use arrow::datatypes::DataType::{Boolean, Int32, Int64};
     use arrow::datatypes::TimeUnit::Microsecond;
diff --git a/datafusion/datasource-avro/src/file_format.rs b/datafusion/datasource-avro/src/file_format.rs
index 60c361b42e771..2447c032e700d 100644
--- a/datafusion/datasource-avro/src/file_format.rs
+++ b/datafusion/datasource-avro/src/file_format.rs
@@ -27,15 +27,15 @@ use crate::source::AvroSource;
 
 use arrow::datatypes::Schema;
 use arrow::datatypes::SchemaRef;
+use datafusion_common::DEFAULT_AVRO_EXTENSION;
+use datafusion_common::GetExt;
 use datafusion_common::internal_err;
 use datafusion_common::parsers::CompressionTypeVariant;
-use datafusion_common::GetExt;
-use datafusion_common::DEFAULT_AVRO_EXTENSION;
 use datafusion_common::{Result, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
-use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_session::Session;
@@ -154,13 +154,13 @@ impl FileFormat for AvroFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let config = FileScanConfigBuilder::from(conf)
-            .with_source(self.file_source())
-            .build();
-        Ok(DataSourceExec::from_data_source(config))
+        Ok(DataSourceExec::from_data_source(conf))
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(AvroSource::new())
+    fn file_source(
+        &self,
+        table_schema: datafusion_datasource::TableSchema,
+    ) -> Arc<dyn FileSource> {
+        Arc::new(AvroSource::new(table_schema))
     }
 }
diff --git a/datafusion/datasource-avro/src/mod.rs b/datafusion/datasource-avro/src/mod.rs
index ad8ebe11446f5..22c40e203a014 100644
--- a/datafusion/datasource-avro/src/mod.rs
+++ b/datafusion/datasource-avro/src/mod.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 //! An [Avro](https://avro.apache.org/) based [`FileSource`](datafusion_datasource::file::FileSource) implementation and related functionality.
 
diff --git a/datafusion/datasource-avro/src/source.rs b/datafusion/datasource-avro/src/source.rs
index 1ff73d2c3cc39..1c466be266f17 100644
--- a/datafusion/datasource-avro/src/source.rs
+++ b/datafusion/datasource-avro/src/source.rs
@@ -22,42 +22,53 @@ use std::sync::Arc;
 
 use crate::avro_to_arrow::Reader as AvroReader;
 
-use arrow::datatypes::SchemaRef;
 use datafusion_common::error::Result;
-use datafusion_common::Statistics;
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::file_stream::FileOpener;
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
-use datafusion_datasource::TableSchema;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::projection::ProjectionExprs;
 
 use object_store::ObjectStore;
 
 /// AvroSource holds the extra configuration that is necessary for opening avro files
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct AvroSource {
-    schema: Option<SchemaRef>,
+    table_schema: TableSchema,
     batch_size: Option<usize>,
-    projection: Option<Vec<String>>,
+    projection: SplitProjection,
     metrics: ExecutionPlanMetricsSet,
-    projected_statistics: Option<Statistics>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
 
 impl AvroSource {
-    /// Initialize an AvroSource with default values
-    pub fn new() -> Self {
-        Self::default()
+    /// Initialize an AvroSource with the provided schema
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        Self {
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
+            batch_size: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+        }
     }
 
     fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
+        let file_schema = self.table_schema.file_schema();
+        let projection = Some(
+            self.projection
+                .file_indices
+                .iter()
+                .map(|&idx| file_schema.field(idx).name().clone())
+                .collect::<Vec<_>>(),
+        );
         AvroReader::try_new(
             reader,
-            Arc::clone(self.schema.as_ref().expect("Schema must set before open")),
+            &Arc::clone(self.table_schema.file_schema()),
             self.batch_size.expect("Batch size must set before open"),
-            self.projection.clone(),
+            projection.as_ref(),
         )
     }
 }
@@ -68,53 +79,53 @@ impl FileSource for AvroSource {
         object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(private::AvroOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        let mut opener = Arc::new(private::AvroOpener {
             config: Arc::new(self.clone()),
             object_store,
-        })
+        }) as Arc<dyn FileOpener>;
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
         self
     }
 
-    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.batch_size = Some(batch_size);
-        Arc::new(conf)
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
     }
 
-    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
+    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
-        // TableSchema may have partition columns, but AvroSource does not use partition columns or values atm
-        conf.schema = Some(Arc::clone(schema.file_schema()));
+        conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projected_statistics = Some(statistics);
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
     }
 
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projection = config.projected_file_column_names();
-        Arc::new(conf)
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        let statistics = &self.projected_statistics;
-        Ok(statistics
-            .clone()
-            .expect("projected_statistics must be set"))
-    }
-
     fn file_type(&self) -> &str {
         "avro"
     }
@@ -128,27 +139,13 @@ impl FileSource for AvroSource {
     ) -> Result<Option<FileScanConfig>> {
         Ok(None)
     }
-
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
 }
 
 mod private {
     use super::*;
 
     use bytes::Buf;
-    use datafusion_datasource::{file_stream::FileOpenFuture, PartitionedFile};
+    use datafusion_datasource::{PartitionedFile, file_stream::FileOpenFuture};
     use futures::StreamExt;
     use object_store::{GetResultPayload, ObjectStore};
 
diff --git a/datafusion/datasource-csv/Cargo.toml b/datafusion/datasource-csv/Cargo.toml
index c9e138759ef4a..295092512742b 100644
--- a/datafusion/datasource-csv/Cargo.toml
+++ b/datafusion/datasource-csv/Cargo.toml
@@ -20,7 +20,7 @@ name = "datafusion-datasource-csv"
 description = "datafusion-datasource-csv"
 readme = "README.md"
 authors.workspace = true
-edition.workspace = true
+edition = { workspace = true }
 homepage.workspace = true
 license.workspace = true
 repository.workspace = true
diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs
index 1c39893b23c85..efb7829179e07 100644
--- a/datafusion/datasource-csv/src/file_format.rs
+++ b/datafusion/datasource-csv/src/file_format.rs
@@ -31,23 +31,24 @@ use arrow::error::ArrowError;
 use datafusion_common::config::{ConfigField, ConfigFileType, CsvOptions};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{
-    exec_err, not_impl_err, DataFusionError, GetExt, Result, Statistics,
-    DEFAULT_CSV_EXTENSION,
+    DEFAULT_CSV_EXTENSION, DataFusionError, GetExt, Result, Statistics, exec_err,
+    not_impl_err,
 };
 use datafusion_common_runtime::SpawnedTask;
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::decoder::Decoder;
 use datafusion_datasource::display::FileGroupDisplay;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_format::{
-    FileFormat, FileFormatFactory, DEFAULT_SCHEMA_INFER_MAX_RECORD,
+    DEFAULT_SCHEMA_INFER_MAX_RECORD, FileFormat, FileFormatFactory,
 };
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
 use datafusion_datasource::sink::{DataSink, DataSinkExec};
+use datafusion_datasource::write::BatchSerializer;
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join;
-use datafusion_datasource::write::BatchSerializer;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -58,8 +59,8 @@ use async_trait::async_trait;
 use bytes::{Buf, Bytes};
 use datafusion_datasource::source::DataSourceExec;
 use futures::stream::BoxStream;
-use futures::{pin_mut, Stream, StreamExt, TryStreamExt};
-use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore};
+use futures::{Stream, StreamExt, TryStreamExt, pin_mut};
+use object_store::{ObjectMeta, ObjectStore, delimited::newline_delimited_stream};
 use regex::Regex;
 
 #[derive(Default)]
@@ -210,6 +211,11 @@ impl CsvFormat {
 
     /// Set a limit in terms of records to scan to infer the schema
     /// - default to `DEFAULT_SCHEMA_INFER_MAX_RECORD`
+    ///
+    /// # Behavior when set to 0
+    ///
+    /// When `max_rec` is set to 0, schema inference is disabled and all fields
+    /// will be inferred as `Utf8` (string) type, regardless of their actual content.
     pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self {
         self.options.schema_infer_max_rec = Some(max_rec);
         self
@@ -434,20 +440,23 @@ impl FileFormat for CsvFormat {
             .newlines_in_values
             .unwrap_or_else(|| state.config_options().catalog.newlines_in_values);
 
-        let conf_builder = FileScanConfigBuilder::from(conf)
-            .with_file_compression_type(self.options.compression.into())
-            .with_newlines_in_values(newlines_in_values);
+        let mut csv_options = self.options.clone();
+        csv_options.has_header = Some(has_header);
+        csv_options.newlines_in_values = Some(newlines_in_values);
 
-        let truncated_rows = self.options.truncated_rows.unwrap_or(false);
-        let source = Arc::new(
-            CsvSource::new(has_header, self.options.delimiter, self.options.quote)
-                .with_escape(self.options.escape)
-                .with_terminator(self.options.terminator)
-                .with_comment(self.options.comment)
-                .with_truncate_rows(truncated_rows),
-        );
+        // Get the existing CsvSource and update its options
+        // We need to preserve the table_schema from the original source (which includes partition columns)
+        let csv_source = conf
+            .file_source
+            .as_any()
+            .downcast_ref::<CsvSource>()
+            .expect("file_source should be a CsvSource");
+        let source = Arc::new(csv_source.clone().with_csv_options(csv_options));
 
-        let config = conf_builder.with_source(source).build();
+        let config = FileScanConfigBuilder::from(conf)
+            .with_file_compression_type(self.options.compression.into())
+            .with_source(source)
+            .build();
 
         Ok(DataSourceExec::from_data_source(config))
     }
@@ -489,8 +498,12 @@ impl FileFormat for CsvFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(CsvSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        let mut csv_options = self.options.clone();
+        if csv_options.has_header.is_none() {
+            csv_options.has_header = Some(true);
+        }
+        Arc::new(CsvSource::new(table_schema).with_csv_options(csv_options))
     }
 }
 
@@ -521,6 +534,7 @@ impl CsvFormat {
         let mut column_names = vec![];
         let mut column_type_possibilities = vec![];
         let mut record_number = -1;
+        let initial_records_to_read = records_to_read;
 
         pin_mut!(stream);
 
@@ -611,12 +625,31 @@ impl CsvFormat {
             }
         }
 
-        let schema = build_schema_helper(column_names, column_type_possibilities);
+        let schema = build_schema_helper(
+            column_names,
+            column_type_possibilities,
+            initial_records_to_read == 0,
+        );
         Ok((schema, total_records_read))
     }
 }
 
-fn build_schema_helper(names: Vec<String>, types: Vec<HashSet<DataType>>) -> Schema {
+/// Builds a schema from column names and their possible data types.
+///
+/// # Arguments
+///
+/// * `names` - Vector of column names
+/// * `types` - Vector of possible data types for each column (as HashSets)
+/// * `disable_inference` - When true, forces all columns with no inferred types to be Utf8.
+///   This should be set to true when `schema_infer_max_rec` is explicitly
+///   set to 0, indicating the user wants to skip type inference and treat
+///   all fields as strings. When false, columns with no inferred types
+///   will be set to Null, allowing schema merging to work properly.
+fn build_schema_helper(
+    names: Vec<String>,
+    types: Vec<HashSet<DataType>>,
+    disable_inference: bool,
+) -> Schema {
     let fields = names
         .into_iter()
         .zip(types)
@@ -629,10 +662,17 @@ fn build_schema_helper(names: Vec<String>, types: Vec<HashSet<DataType>>) -> Sch
             data_type_possibilities.remove(&DataType::Null);
 
             match data_type_possibilities.len() {
-                // Return Null for columns with only nulls / empty files
-                // This allows schema merging to work when reading folders
-                // such files along with normal files.
-                0 => Field::new(field_name, DataType::Null, true),
+                // When no types were inferred (empty HashSet):
+                // - If schema_infer_max_rec was explicitly set to 0, return Utf8
+                // - Otherwise return Null (whether from reading null values or empty files)
+                //   This allows schema merging to work when reading folders with empty files
+                0 => {
+                    if disable_inference {
+                        Field::new(field_name, DataType::Utf8, true)
+                    } else {
+                        Field::new(field_name, DataType::Null, true)
+                    }
+                }
                 1 => Field::new(
                     field_name,
                     data_type_possibilities.iter().next().unwrap().clone(),
@@ -772,6 +812,7 @@ impl FileSink for CsvSink {
             context,
             serializer,
             self.writer_options.compression.into(),
+            self.writer_options.compression_level,
             object_store,
             demux_task,
             file_stream_rx,
@@ -823,7 +864,7 @@ mod tests {
             HashSet::from([DataType::Utf8]), // col5
         ];
 
-        let schema = build_schema_helper(column_names, column_type_possibilities);
+        let schema = build_schema_helper(column_names, column_type_possibilities, false);
 
         // Verify schema has 5 columns
         assert_eq!(schema.fields().len(), 5);
@@ -853,7 +894,7 @@ mod tests {
             HashSet::from([DataType::Utf8]),                     // Should remain Utf8
         ];
 
-        let schema = build_schema_helper(column_names, column_type_possibilities);
+        let schema = build_schema_helper(column_names, column_type_possibilities, false);
 
         // col1 should be Float64 due to Int64 + Float64 = Float64
         assert_eq!(*schema.field(0).data_type(), DataType::Float64);
@@ -871,7 +912,7 @@ mod tests {
             HashSet::from([DataType::Boolean, DataType::Int64, DataType::Utf8]), // Should resolve to Utf8 due to conflicts
         ];
 
-        let schema = build_schema_helper(column_names, column_type_possibilities);
+        let schema = build_schema_helper(column_names, column_type_possibilities, false);
 
         // Should default to Utf8 for conflicting types
         assert_eq!(*schema.field(0).data_type(), DataType::Utf8);
diff --git a/datafusion/datasource-csv/src/mod.rs b/datafusion/datasource-csv/src/mod.rs
index 90538d0808b1a..d58ce1188550c 100644
--- a/datafusion/datasource-csv/src/mod.rs
+++ b/datafusion/datasource-csv/src/mod.rs
@@ -15,16 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![deny(clippy::allow_attributes)]
 
 pub mod file_format;
 pub mod source;
 
 use std::sync::Arc;
 
-use arrow::datatypes::SchemaRef;
+use datafusion_common::Result;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::{file::FileSource, file_scan_config::FileScanConfig};
@@ -33,11 +35,12 @@ pub use file_format::*;
 
 /// Returns a [`FileScanConfig`] for given `file_groups`
 pub fn partitioned_csv_config(
-    schema: SchemaRef,
     file_groups: Vec<FileGroup>,
     file_source: Arc<dyn FileSource>,
-) -> FileScanConfig {
-    FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), schema, file_source)
-        .with_file_groups(file_groups)
-        .build()
+) -> Result<FileScanConfig> {
+    Ok(
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(file_groups)
+            .build(),
+    )
 }
diff --git a/datafusion/datasource-csv/src/source.rs b/datafusion/datasource-csv/src/source.rs
index 0b18571e58bd7..37e3ef23b61f1 100644
--- a/datafusion/datasource-csv/src/source.rs
+++ b/datafusion/datasource-csv/src/source.rs
@@ -17,29 +17,30 @@
 
 //! Execution plan for reading CSV files
 
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
+use datafusion_physical_plan::projection::ProjectionExprs;
 use std::any::Any;
 use std::fmt;
 use std::io::{Read, Seek, SeekFrom};
 use std::sync::Arc;
 use std::task::Poll;
 
-use datafusion_datasource::decoder::{deserialize_stream, DecoderDeserializer};
+use datafusion_datasource::decoder::{DecoderDeserializer, deserialize_stream};
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
 use datafusion_datasource::{
-    as_file_source, calculate_range, FileRange, ListingTableUrl, PartitionedFile,
-    RangeCalculation, TableSchema,
+    FileRange, ListingTableUrl, PartitionedFile, RangeCalculation, TableSchema,
+    as_file_source, calculate_range,
 };
 
 use arrow::csv;
-use arrow::datatypes::SchemaRef;
-use datafusion_common::{DataFusionError, Result, Statistics};
+use datafusion_common::config::CsvOptions;
+use datafusion_common::{DataFusionError, Result};
 use datafusion_common_runtime::JoinSet;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_execution::TaskContext;
-use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet};
 use datafusion_physical_plan::{
     DisplayFormatType, ExecutionPlan, ExecutionPlanProperties,
 };
@@ -61,113 +62,122 @@ use tokio::io::AsyncWriteExt;
 /// # use datafusion_datasource_csv::source::CsvSource;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
 /// # use datafusion_datasource::source::DataSourceExec;
+/// # use datafusion_common::config::CsvOptions;
 ///
 /// # let object_store_url = ObjectStoreUrl::local_filesystem();
 /// # let file_schema = Arc::new(Schema::empty());
 ///
-/// let source = Arc::new(CsvSource::new(
-///         true,
-///         b',',
-///         b'"',
-///     )
-///     .with_terminator(Some(b'#')
-/// ));
+/// let options = CsvOptions {
+///     has_header: Some(true),
+///     delimiter: b',',
+///     quote: b'"',
+///     newlines_in_values: Some(true), // The file contains newlines in values
+///     ..Default::default()
+/// };
+/// let source = Arc::new(CsvSource::new(file_schema.clone())
+///     .with_csv_options(options)
+///     .with_terminator(Some(b'#'))
+/// );
 /// // Create a DataSourceExec for reading the first 100MB of `file1.csv`
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+/// let config = FileScanConfigBuilder::new(object_store_url, source)
 ///     .with_file(PartitionedFile::new("file1.csv", 100*1024*1024))
-///     .with_newlines_in_values(true) // The file contains newlines in values;
 ///     .build();
 /// let exec = (DataSourceExec::from_data_source(config));
 /// ```
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct CsvSource {
+    options: CsvOptions,
     batch_size: Option<usize>,
-    file_schema: Option<SchemaRef>,
-    file_projection: Option<Vec<usize>>,
-    pub(crate) has_header: bool,
-    delimiter: u8,
-    quote: u8,
-    terminator: Option<u8>,
-    escape: Option<u8>,
-    comment: Option<u8>,
+    table_schema: TableSchema,
+    projection: SplitProjection,
     metrics: ExecutionPlanMetricsSet,
-    projected_statistics: Option<Statistics>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    truncate_rows: bool,
 }
 
 impl CsvSource {
     /// Returns a [`CsvSource`]
-    pub fn new(has_header: bool, delimiter: u8, quote: u8) -> Self {
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
-            has_header,
-            delimiter,
-            quote,
-            ..Self::default()
+            options: CsvOptions::default(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
+            batch_size: None,
+            metrics: ExecutionPlanMetricsSet::new(),
         }
     }
 
+    /// Sets the CSV options
+    pub fn with_csv_options(mut self, options: CsvOptions) -> Self {
+        self.options = options;
+        self
+    }
+
     /// true if the first line of each file is a header
     pub fn has_header(&self) -> bool {
-        self.has_header
+        self.options.has_header.unwrap_or(true)
     }
 
     // true if rows length support truncate
     pub fn truncate_rows(&self) -> bool {
-        self.truncate_rows
+        self.options.truncated_rows.unwrap_or(false)
     }
     /// A column delimiter
     pub fn delimiter(&self) -> u8 {
-        self.delimiter
+        self.options.delimiter
     }
 
     /// The quote character
     pub fn quote(&self) -> u8 {
-        self.quote
+        self.options.quote
     }
 
     /// The line terminator
     pub fn terminator(&self) -> Option<u8> {
-        self.terminator
+        self.options.terminator
     }
 
     /// Lines beginning with this byte are ignored.
     pub fn comment(&self) -> Option<u8> {
-        self.comment
+        self.options.comment
     }
 
     /// The escape character
     pub fn escape(&self) -> Option<u8> {
-        self.escape
+        self.options.escape
     }
 
     /// Initialize a CsvSource with escape
     pub fn with_escape(&self, escape: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.escape = escape;
+        conf.options.escape = escape;
         conf
     }
 
     /// Initialize a CsvSource with terminator
     pub fn with_terminator(&self, terminator: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.terminator = terminator;
+        conf.options.terminator = terminator;
         conf
     }
 
     /// Initialize a CsvSource with comment
     pub fn with_comment(&self, comment: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.comment = comment;
+        conf.options.comment = comment;
         conf
     }
 
     /// Whether to support truncate rows when read csv file
     pub fn with_truncate_rows(&self, truncate_rows: bool) -> Self {
         let mut conf = self.clone();
-        conf.truncate_rows = truncate_rows;
+        conf.options.truncated_rows = Some(truncate_rows);
         conf
     }
+
+    /// Whether values may contain newline characters
+    pub fn newlines_in_values(&self) -> bool {
+        self.options.newlines_in_values.unwrap_or(false)
+    }
 }
 
 impl CsvSource {
@@ -176,29 +186,24 @@ impl CsvSource {
     }
 
     fn builder(&self) -> csv::ReaderBuilder {
-        let mut builder = csv::ReaderBuilder::new(Arc::clone(
-            self.file_schema
-                .as_ref()
-                .expect("Schema must be set before initializing builder"),
-        ))
-        .with_delimiter(self.delimiter)
-        .with_batch_size(
-            self.batch_size
-                .expect("Batch size must be set before initializing builder"),
-        )
-        .with_header(self.has_header)
-        .with_quote(self.quote)
-        .with_truncated_rows(self.truncate_rows);
-        if let Some(terminator) = self.terminator {
+        let mut builder =
+            csv::ReaderBuilder::new(Arc::clone(self.table_schema.file_schema()))
+                .with_delimiter(self.delimiter())
+                .with_batch_size(
+                    self.batch_size
+                        .expect("Batch size must be set before initializing builder"),
+                )
+                .with_header(self.has_header())
+                .with_quote(self.quote())
+                .with_truncated_rows(self.truncate_rows());
+        if let Some(terminator) = self.terminator() {
             builder = builder.with_terminator(terminator);
         }
-        if let Some(proj) = &self.file_projection {
-            builder = builder.with_projection(proj.clone());
-        }
-        if let Some(escape) = self.escape {
+        builder = builder.with_projection(self.projection.file_indices.clone());
+        if let Some(escape) = self.escape() {
             builder = builder.with_escape(escape)
         }
-        if let Some(comment) = self.comment {
+        if let Some(comment) = self.comment() {
             builder = builder.with_comment(comment);
         }
 
@@ -211,6 +216,7 @@ pub struct CsvOpener {
     config: Arc<CsvSource>,
     file_compression_type: FileCompressionType,
     object_store: Arc<dyn ObjectStore>,
+    partition_index: usize,
 }
 
 impl CsvOpener {
@@ -224,6 +230,7 @@ impl CsvOpener {
             config,
             file_compression_type,
             object_store,
+            partition_index: 0,
         }
     }
 }
@@ -239,77 +246,74 @@ impl FileSource for CsvSource {
         &self,
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
-        _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(CsvOpener {
+        partition_index: usize,
+    ) -> Result<Arc<dyn FileOpener>> {
+        let mut opener = Arc::new(CsvOpener {
             config: Arc::new(self.clone()),
             file_compression_type: base_config.file_compression_type,
             object_store,
-        })
+            partition_index,
+        }) as Arc<dyn FileOpener>;
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
         self
     }
 
-    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.batch_size = Some(batch_size);
-        Arc::new(conf)
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
     }
 
-    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
+    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
-        conf.file_schema = Some(Arc::clone(schema.file_schema()));
+        conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projected_statistics = Some(statistics);
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
     }
 
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.file_projection = config.file_column_projection_indices();
-        Arc::new(conf)
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
-    fn statistics(&self) -> Result<Statistics> {
-        let statistics = &self.projected_statistics;
-        Ok(statistics
-            .clone()
-            .expect("projected_statistics must be set"))
-    }
+
     fn file_type(&self) -> &str {
         "csv"
     }
+
+    fn supports_repartitioning(&self) -> bool {
+        // Cannot repartition if values may contain newlines, as record
+        // boundaries cannot be determined by byte offset alone
+        !self.options.newlines_in_values.unwrap_or(false)
+    }
+
     fn fmt_extra(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(f, ", has_header={}", self.has_header)
+                write!(f, ", has_header={}", self.has_header())
             }
             DisplayFormatType::TreeRender => Ok(()),
         }
     }
-
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
 }
 
 impl FileOpener for CsvOpener {
@@ -340,18 +344,16 @@ impl FileOpener for CsvOpener {
         // `self.config.has_header` controls whether to skip reading the 1st line header
         // If the .csv file is read in parallel and this `CsvOpener` is only reading some middle
         // partition, then don't skip first line
-        let mut csv_has_header = self.config.has_header;
-        if let Some(FileRange { start, .. }) = partitioned_file.range {
-            if start != 0 {
-                csv_has_header = false;
-            }
+        let mut csv_has_header = self.config.has_header();
+        if let Some(FileRange { start, .. }) = partitioned_file.range
+            && start != 0
+        {
+            csv_has_header = false;
         }
 
-        let config = CsvSource {
-            has_header: csv_has_header,
-            truncate_rows: self.config.truncate_rows,
-            ..(*self.config).clone()
-        };
+        let mut config = (*self.config).clone();
+        config.options.has_header = Some(csv_has_header);
+        config.options.truncated_rows = Some(config.truncate_rows());
 
         let file_compression_type = self.file_compression_type.to_owned();
 
@@ -363,7 +365,10 @@ impl FileOpener for CsvOpener {
         }
 
         let store = Arc::clone(&self.object_store);
-        let terminator = self.config.terminator;
+        let terminator = self.config.terminator();
+
+        let baseline_metrics =
+            BaselineMetrics::new(&self.config.metrics, self.partition_index);
 
         Ok(Box::pin(async move {
             // Current partition contains bytes [start_byte, end_byte) (might contain incomplete lines at boundaries)
@@ -377,7 +382,7 @@ impl FileOpener for CsvOpener {
                 RangeCalculation::TerminateEarly => {
                     return Ok(
                         futures::stream::poll_fn(move |_| Poll::Ready(None)).boxed()
-                    )
+                    );
                 }
             };
 
@@ -404,7 +409,17 @@ impl FileOpener for CsvOpener {
                         )?
                     };
 
-                    Ok(futures::stream::iter(config.open(decoder)?)
+                    let mut reader = config.open(decoder)?;
+
+                    // Use std::iter::from_fn to wrap execution of iterator's next() method.
+                    let iterator = std::iter::from_fn(move || {
+                        let mut timer = baseline_metrics.elapsed_compute().timer();
+                        let result = reader.next();
+                        timer.stop();
+                        result
+                    });
+
+                    Ok(futures::stream::iter(iterator)
                         .map(|r| r.map_err(Into::into))
                         .boxed())
                 }
diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs
index 51f4bd7e963e0..a14458b5acd36 100644
--- a/datafusion/datasource-json/src/file_format.rs
+++ b/datafusion/datasource-json/src/file_format.rs
@@ -30,26 +30,27 @@ use arrow::array::RecordBatch;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::error::ArrowError;
 use arrow::json;
-use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
+use arrow::json::reader::{ValueIter, infer_json_schema_from_iterator};
 use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions};
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::{
-    not_impl_err, GetExt, Result, Statistics, DEFAULT_JSON_EXTENSION,
+    DEFAULT_JSON_EXTENSION, GetExt, Result, Statistics, not_impl_err,
 };
 use datafusion_common_runtime::SpawnedTask;
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::decoder::Decoder;
 use datafusion_datasource::display::FileGroupDisplay;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_format::{
-    FileFormat, FileFormatFactory, DEFAULT_SCHEMA_INFER_MAX_RECORD,
+    DEFAULT_SCHEMA_INFER_MAX_RECORD, FileFormat, FileFormatFactory,
 };
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
 use datafusion_datasource::sink::{DataSink, DataSinkExec};
+use datafusion_datasource::write::BatchSerializer;
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join;
-use datafusion_datasource::write::BatchSerializer;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -253,12 +254,10 @@ impl FileFormat for JsonFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(JsonSource::new());
         let conf = FileScanConfigBuilder::from(conf)
             .with_file_compression_type(FileCompressionType::from(
                 self.options.compression,
             ))
-            .with_source(source)
             .build();
         Ok(DataSourceExec::from_data_source(conf))
     }
@@ -281,8 +280,8 @@ impl FileFormat for JsonFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(JsonSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(JsonSource::new(table_schema))
     }
 }
 
@@ -374,6 +373,7 @@ impl FileSink for JsonSink {
             context,
             serializer,
             self.writer_options.compression.into(),
+            self.writer_options.compression_level,
             object_store,
             demux_task,
             file_stream_rx,
diff --git a/datafusion/datasource-json/src/mod.rs b/datafusion/datasource-json/src/mod.rs
index 18bb8792c3ffe..3d27d4cc5ef5a 100644
--- a/datafusion/datasource-json/src/mod.rs
+++ b/datafusion/datasource-json/src/mod.rs
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![deny(clippy::allow_attributes)]
 
 pub mod file_format;
 pub mod source;
diff --git a/datafusion/datasource-json/src/source.rs b/datafusion/datasource-json/src/source.rs
index 52ed0def03f18..5797054f11b9c 100644
--- a/datafusion/datasource-json/src/source.rs
+++ b/datafusion/datasource-json/src/source.rs
@@ -26,19 +26,18 @@ use crate::file_format::JsonDecoder;
 
 use datafusion_common::error::{DataFusionError, Result};
 use datafusion_common_runtime::JoinSet;
-use datafusion_datasource::decoder::{deserialize_stream, DecoderDeserializer};
+use datafusion_datasource::decoder::{DecoderDeserializer, deserialize_stream};
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::{
-    as_file_source, calculate_range, ListingTableUrl, PartitionedFile, RangeCalculation,
-    TableSchema,
+    ListingTableUrl, PartitionedFile, RangeCalculation, as_file_source, calculate_range,
 };
+use datafusion_physical_plan::projection::ProjectionExprs;
 use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use arrow::json::ReaderBuilder;
 use arrow::{datatypes::SchemaRef, json};
-use datafusion_common::Statistics;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_execution::TaskContext;
@@ -75,18 +74,24 @@ impl JsonOpener {
 }
 
 /// JsonSource holds the extra configuration that is necessary for [`JsonOpener`]
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct JsonSource {
+    table_schema: datafusion_datasource::TableSchema,
     batch_size: Option<usize>,
     metrics: ExecutionPlanMetricsSet,
-    projected_statistics: Option<Statistics>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    projection: SplitProjection,
 }
 
 impl JsonSource {
-    /// Initialize a JsonSource with default values
-    pub fn new() -> Self {
-        Self::default()
+    /// Initialize a JsonSource with the provided schema
+    pub fn new(table_schema: impl Into<datafusion_datasource::TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        Self {
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
+            batch_size: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+        }
     }
 }
 
@@ -102,68 +107,68 @@ impl FileSource for JsonSource {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(JsonOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        // Get the projected file schema for JsonOpener
+        let file_schema = self.table_schema.file_schema();
+        let projected_schema =
+            Arc::new(file_schema.project(&self.projection.file_indices)?);
+
+        let mut opener = Arc::new(JsonOpener {
             batch_size: self
                 .batch_size
                 .expect("Batch size must set before creating opener"),
-            projected_schema: base_config.projected_file_schema(),
+            projected_schema,
             file_compression_type: base_config.file_compression_type,
             object_store,
-        })
+        }) as Arc<dyn FileOpener>;
+
+        // Wrap with ProjectionOpener
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
         self
     }
 
+    fn table_schema(&self) -> &datafusion_datasource::TableSchema {
+        &self.table_schema
+    }
+
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
-    fn with_schema(&self, _schema: TableSchema) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projected_statistics = Some(statistics);
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        let statistics = &self.projected_statistics;
-        Ok(statistics
-            .clone()
-            .expect("projected_statistics must be set to call"))
-    }
-
     fn file_type(&self) -> &str {
         "json"
     }
-
-    fn with_schema_adapter_factory(
-        &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
-    }
-
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
-    }
 }
 
 impl FileOpener for JsonOpener {
@@ -192,7 +197,7 @@ impl FileOpener for JsonOpener {
                 RangeCalculation::TerminateEarly => {
                     return Ok(
                         futures::stream::poll_fn(move |_| Poll::Ready(None)).boxed()
-                    )
+                    );
                 }
             };
 
diff --git a/datafusion/datasource-parquet/src/access_plan.rs b/datafusion/datasource-parquet/src/access_plan.rs
index 0c30f3ff85b6d..570792d40e5b4 100644
--- a/datafusion/datasource-parquet/src/access_plan.rs
+++ b/datafusion/datasource-parquet/src/access_plan.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err};
 use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
 use parquet::file::metadata::RowGroupMetaData;
 
@@ -269,13 +269,13 @@ impl ParquetAccessPlan {
                 .sum::<usize>();
 
             let row_group_row_count = rg_meta.num_rows();
-            if rows_in_selection as i64 != row_group_row_count {
-                return internal_err!(
-                    "Invalid ParquetAccessPlan Selection. Row group {idx} has {row_group_row_count} rows \
+            assert_eq_or_internal_err!(
+                rows_in_selection as i64,
+                row_group_row_count,
+                "Invalid ParquetAccessPlan Selection. Row group {idx} has {row_group_row_count} rows \
                     but selection only specifies {rows_in_selection} rows. \
                     Selection: {selection:?}"
-                );
-            }
+            );
         }
 
         let total_selection: RowSelection = self
@@ -302,13 +302,10 @@ impl ParquetAccessPlan {
 
     /// Return an iterator over the row group indexes that should be scanned
     pub fn row_group_index_iter(&self) -> impl Iterator<Item = usize> + '_ {
-        self.row_groups.iter().enumerate().filter_map(|(idx, b)| {
-            if b.should_scan() {
-                Some(idx)
-            } else {
-                None
-            }
-        })
+        self.row_groups
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, b)| if b.should_scan() { Some(idx) } else { None })
     }
 
     /// Return a vec of all row group indexes to scan
@@ -482,7 +479,10 @@ mod test {
             .unwrap_err()
             .to_string();
         assert_eq!(row_group_indexes, vec![0, 1, 2, 3]);
-        assert_contains!(err, "Internal error: Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 12 rows");
+        assert_contains!(
+            err,
+            "Row group 1 has 20 rows but selection only specifies 12 rows"
+        );
     }
 
     #[test]
@@ -508,7 +508,10 @@ mod test {
             .unwrap_err()
             .to_string();
         assert_eq!(row_group_indexes, vec![0, 1, 2, 3]);
-        assert_contains!(err, "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows");
+        assert_contains!(
+            err,
+            "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows"
+        );
     }
 
     /// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index f27bda387fda5..5e482382be687 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -27,10 +27,11 @@ use std::{fmt, vec};
 
 use arrow::array::RecordBatch;
 use arrow::datatypes::{Fields, Schema, SchemaRef, TimeUnit};
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
 use datafusion_datasource::write::{
-    get_writer_schema, ObjectWriterBuilder, SharedBuffer,
+    ObjectWriterBuilder, SharedBuffer, get_writer_schema,
 };
 
 use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
@@ -41,8 +42,8 @@ use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions
 use datafusion_common::encryption::FileDecryptionProperties;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, GetExt,
-    HashSet, Result, DEFAULT_PARQUET_EXTENSION,
+    DEFAULT_PARQUET_EXTENSION, DataFusionError, GetExt, HashSet, Result,
+    internal_datafusion_err, internal_err, not_impl_err,
 };
 use datafusion_common::{HashMap, Statistics};
 use datafusion_common_runtime::{JoinSet, SpawnedTask};
@@ -59,7 +60,7 @@ use datafusion_session::Session;
 
 use crate::metadata::DFParquetMetadata;
 use crate::reader::CachedParquetFileReaderFactory;
-use crate::source::{parse_coerce_int96_string, ParquetSource};
+use crate::source::{ParquetSource, parse_coerce_int96_string};
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion_datasource::source::DataSourceExec;
@@ -71,8 +72,8 @@ use object_store::buffered::BufWriter;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
 use parquet::arrow::arrow_writer::{
-    compute_leaves, ArrowColumnChunk, ArrowColumnWriter, ArrowLeafColumn,
-    ArrowRowGroupWriterFactory, ArrowWriterOptions,
+    ArrowColumnChunk, ArrowColumnWriter, ArrowLeafColumn, ArrowRowGroupWriterFactory,
+    ArrowWriterOptions, compute_leaves,
 };
 use parquet::arrow::async_reader::MetadataFetch;
 use parquet::arrow::{ArrowWriter, AsyncArrowWriter};
@@ -459,7 +460,13 @@ impl FileFormat for ParquetFormat {
             metadata_size_hint = Some(metadata);
         }
 
-        let mut source = ParquetSource::new(self.options.clone());
+        let mut source = conf
+            .file_source()
+            .as_any()
+            .downcast_ref::<ParquetSource>()
+            .cloned()
+            .ok_or_else(|| internal_datafusion_err!("Expected ParquetSource"))?;
+        source = source.with_table_parquet_options(self.options.clone());
 
         // Use the CachedParquetFileReaderFactory
         let metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache();
@@ -476,11 +483,8 @@ impl FileFormat for ParquetFormat {
 
         source = self.set_source_encryption_factory(source, state)?;
 
-        // Apply schema adapter factory before building the new config
-        let file_source = source.apply_schema_adapter(&conf)?;
-
         let conf = FileScanConfigBuilder::from(conf)
-            .with_source(file_source)
+            .with_source(Arc::new(source))
             .build();
         Ok(DataSourceExec::from_data_source(conf))
     }
@@ -501,8 +505,11 @@ impl FileFormat for ParquetFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ParquetSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(
+            ParquetSource::new(table_schema)
+                .with_table_parquet_options(self.options.clone()),
+        )
     }
 }
 
@@ -533,8 +540,9 @@ impl ParquetFormat {
         _state: &dyn Session,
     ) -> Result<ParquetSource> {
         if let Some(encryption_factory_id) = &self.options.crypto.factory_id {
-            Err(DataFusionError::Configuration(
-                format!("Parquet encryption factory id is set to '{encryption_factory_id}' but the parquet_encryption feature is disabled")))
+            Err(DataFusionError::Configuration(format!(
+                "Parquet encryption factory id is set to '{encryption_factory_id}' but the parquet_encryption feature is disabled"
+            )))
         } else {
             Ok(source)
         }
@@ -1063,6 +1071,7 @@ pub async fn fetch_statistics(
     since = "50.0.0",
     note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead"
 )]
+#[expect(clippy::needless_pass_by_value)]
 pub fn statistics_from_parquet_meta_calc(
     metadata: &ParquetMetaData,
     table_schema: SchemaRef,
@@ -1491,7 +1500,7 @@ fn spawn_parquet_parallel_serialization_task(
     serialize_tx: Sender<SpawnedTask<RBStreamSerializeResult>>,
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
-    parallel_options: ParallelParquetWriterOptions,
+    parallel_options: Arc<ParallelParquetWriterOptions>,
     pool: Arc<dyn MemoryPool>,
 ) -> SpawnedTask<Result<(), DataFusionError>> {
     SpawnedTask::spawn(async move {
@@ -1662,7 +1671,7 @@ async fn output_single_parquet_file_parallelized(
         serialize_tx,
         Arc::clone(&output_schema),
         Arc::clone(&arc_props),
-        parallel_options,
+        parallel_options.into(),
         Arc::clone(&pool),
     );
     let parquet_meta_data = concatenate_parallel_row_groups(
diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs
index 6505a447d7ce6..8b11ba64ae7f1 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -19,7 +19,7 @@
 //! and schema information.
 
 use crate::{
-    apply_file_schema_type_coercions, coerce_int96_to_resolution, ObjectStoreFetch,
+    ObjectStoreFetch, apply_file_schema_type_coercions, coerce_int96_to_resolution,
 };
 use arrow::array::{ArrayRef, BooleanArray};
 use arrow::compute::and;
@@ -38,10 +38,11 @@ use log::debug;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
 use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
-use parquet::arrow::parquet_to_arrow_schema;
+use parquet::arrow::{parquet_column, parquet_to_arrow_schema};
 use parquet::file::metadata::{
     PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
 };
+use parquet::schema::types::SchemaDescriptor;
 use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -123,8 +124,8 @@ impl<'a> DFParquetMetadata<'a> {
         let cache_metadata =
             !cfg!(feature = "parquet_encryption") || decryption_properties.is_none();
 
-        if cache_metadata {
-            if let Some(parquet_metadata) = file_metadata_cache
+        if cache_metadata
+            && let Some(parquet_metadata) = file_metadata_cache
                 .as_ref()
                 .and_then(|file_metadata_cache| file_metadata_cache.get(object_meta))
                 .and_then(|file_metadata| {
@@ -135,9 +136,8 @@ impl<'a> DFParquetMetadata<'a> {
                             Arc::clone(cached_parquet_metadata.parquet_metadata())
                         })
                 })
-            {
-                return Ok(parquet_metadata);
-            }
+        {
+            return Ok(parquet_metadata);
         }
 
         let mut reader =
@@ -161,13 +161,11 @@ impl<'a> DFParquetMetadata<'a> {
                 .map_err(DataFusionError::from)?,
         );
 
-        if cache_metadata {
-            if let Some(file_metadata_cache) = file_metadata_cache {
-                file_metadata_cache.put(
-                    object_meta,
-                    Arc::new(CachedParquetMetaData::new(Arc::clone(&metadata))),
-                );
-            }
+        if cache_metadata && let Some(file_metadata_cache) = file_metadata_cache {
+            file_metadata_cache.put(
+                object_meta,
+                Arc::new(CachedParquetMetaData::new(Arc::clone(&metadata))),
+            );
         }
 
         Ok(metadata)
@@ -227,30 +225,40 @@ impl<'a> DFParquetMetadata<'a> {
     /// - Exact row count
     /// - Exact byte size
     /// - All column statistics marked as unknown via Statistics::unknown_column(&table_schema)
+    /// - Column byte sizes are still calculated and recorded
+    ///
     /// # When only some columns have statistics:
     ///
     /// For columns with statistics:
     /// - Min/max values are properly extracted and represented as Precision::Exact
     /// - Null counts are calculated by summing across row groups
+    /// - Byte sizes are calculated and recorded
     ///
     /// For columns without statistics,
     /// - For min/max, there are two situations:
     ///     1. The column isn't in arrow schema, then min/max values are set to Precision::Absent
     ///     2. The column is in arrow schema, but not in parquet schema due to schema revolution, min/max values are set to Precision::Exact(null)
     /// - Null counts are set to Precision::Exact(num_rows) (conservatively assuming all values could be null)
+    ///
+    /// # Byte Size Calculation:
+    ///
+    /// - For primitive types with known fixed size, exact byte size is calculated as (byte width * number of rows)
+    /// - For other types, uncompressed Parquet size is used as an estimate for in-memory size
+    /// - If neither method is applicable, byte size is marked as Precision::Absent
     pub fn statistics_from_parquet_metadata(
         metadata: &ParquetMetaData,
-        table_schema: &SchemaRef,
+        logical_file_schema: &SchemaRef,
     ) -> Result<Statistics> {
         let row_groups_metadata = metadata.row_groups();
 
-        let mut statistics = Statistics::new_unknown(table_schema);
+        // Use Statistics::default() as opposed to Statistics::new_unknown()
+        // because we are going to replace the column statistics below
+        // and we don't want to initialize them twice.
+        let mut statistics = Statistics::default();
         let mut has_statistics = false;
         let mut num_rows = 0_usize;
-        let mut total_byte_size = 0_usize;
         for row_group_meta in row_groups_metadata {
             num_rows += row_group_meta.num_rows() as usize;
-            total_byte_size += row_group_meta.total_byte_size() as usize;
 
             if !has_statistics {
                 has_statistics = row_group_meta
@@ -260,33 +268,35 @@ impl<'a> DFParquetMetadata<'a> {
             }
         }
         statistics.num_rows = Precision::Exact(num_rows);
-        statistics.total_byte_size = Precision::Exact(total_byte_size);
 
         let file_metadata = metadata.file_metadata();
-        let mut file_schema = parquet_to_arrow_schema(
+        let mut physical_file_schema = parquet_to_arrow_schema(
             file_metadata.schema_descr(),
             file_metadata.key_value_metadata(),
         )?;
 
-        if let Some(merged) = apply_file_schema_type_coercions(table_schema, &file_schema)
+        if let Some(merged) =
+            apply_file_schema_type_coercions(logical_file_schema, &physical_file_schema)
         {
-            file_schema = merged;
+            physical_file_schema = merged;
         }
 
-        statistics.column_statistics = if has_statistics {
-            let (mut max_accs, mut min_accs) = create_max_min_accs(table_schema);
-            let mut null_counts_array =
-                vec![Precision::Exact(0); table_schema.fields().len()];
-            let mut is_max_value_exact = vec![Some(true); table_schema.fields().len()];
-            let mut is_min_value_exact = vec![Some(true); table_schema.fields().len()];
-            table_schema
-                .fields()
-                .iter()
-                .enumerate()
-                .for_each(|(idx, field)| {
-                    match StatisticsConverter::try_new(
+        statistics.column_statistics =
+            if has_statistics {
+                let (mut max_accs, mut min_accs) =
+                    create_max_min_accs(logical_file_schema);
+                let mut null_counts_array =
+                    vec![Precision::Absent; logical_file_schema.fields().len()];
+                let mut column_byte_sizes =
+                    vec![Precision::Absent; logical_file_schema.fields().len()];
+                let mut is_max_value_exact =
+                    vec![Some(true); logical_file_schema.fields().len()];
+                let mut is_min_value_exact =
+                    vec![Some(true); logical_file_schema.fields().len()];
+                logical_file_schema.fields().iter().enumerate().for_each(
+                    |(idx, field)| match StatisticsConverter::try_new(
                         field.name(),
-                        &file_schema,
+                        &physical_file_schema,
                         file_metadata.schema_descr(),
                     ) {
                         Ok(stats_converter) => {
@@ -296,8 +306,12 @@ impl<'a> DFParquetMetadata<'a> {
                                 null_counts_array: &mut null_counts_array,
                                 is_min_value_exact: &mut is_min_value_exact,
                                 is_max_value_exact: &mut is_max_value_exact,
+                                column_byte_sizes: &mut column_byte_sizes,
                             };
                             summarize_min_max_null_counts(
+                                file_metadata.schema_descr(),
+                                logical_file_schema,
+                                &physical_file_schema,
                                 &mut accumulators,
                                 idx,
                                 &stats_converter,
@@ -309,20 +323,53 @@ impl<'a> DFParquetMetadata<'a> {
                             debug!("Failed to create statistics converter: {e}");
                             null_counts_array[idx] = Precision::Exact(num_rows);
                         }
-                    }
-                });
-
-            get_col_stats(
-                table_schema,
-                null_counts_array,
-                &mut max_accs,
-                &mut min_accs,
-                &mut is_max_value_exact,
-                &mut is_min_value_exact,
-            )
-        } else {
-            Statistics::unknown_column(table_schema)
-        };
+                    },
+                );
+
+                get_col_stats(
+                    logical_file_schema,
+                    &null_counts_array,
+                    &mut max_accs,
+                    &mut min_accs,
+                    &mut is_max_value_exact,
+                    &mut is_min_value_exact,
+                    &column_byte_sizes,
+                )
+            } else {
+                // Record column sizes
+                logical_file_schema
+                    .fields()
+                    .iter()
+                    .enumerate()
+                    .map(|(logical_file_schema_index, field)| {
+                        let arrow_field =
+                            logical_file_schema.field(logical_file_schema_index);
+                        let parquet_idx = parquet_column(
+                            file_metadata.schema_descr(),
+                            &physical_file_schema,
+                            arrow_field.name(),
+                        )
+                        .map(|(idx, _)| idx);
+                        let byte_size = compute_arrow_column_size(
+                            field.data_type(),
+                            row_groups_metadata,
+                            parquet_idx,
+                            num_rows,
+                        );
+                        ColumnStatistics::new_unknown().with_byte_size(byte_size)
+                    })
+                    .collect()
+            };
+
+        #[cfg(debug_assertions)]
+        {
+            // Check that the column statistics length matches the table schema fields length
+            assert_eq!(
+                statistics.column_statistics.len(),
+                logical_file_schema.fields().len(),
+                "Column statistics length does not match table schema fields length"
+            );
+        }
 
         Ok(statistics)
     }
@@ -362,11 +409,12 @@ fn create_max_min_accs(
 
 fn get_col_stats(
     schema: &Schema,
-    null_counts: Vec<Precision<usize>>,
+    null_counts: &[Precision<usize>],
     max_values: &mut [Option<MaxAccumulator>],
     min_values: &mut [Option<MinAccumulator>],
     is_max_value_exact: &mut [Option<bool>],
     is_min_value_exact: &mut [Option<bool>],
+    column_byte_sizes: &[Precision<usize>],
 ) -> Vec<ColumnStatistics> {
     (0..schema.fields().len())
         .map(|i| {
@@ -400,6 +448,7 @@ fn get_col_stats(
                 min_value: min_value.unwrap_or(Precision::Absent),
                 sum_value: Precision::Absent,
                 distinct_count: Precision::Absent,
+                byte_size: column_byte_sizes[i],
             }
         })
         .collect()
@@ -412,11 +461,15 @@ struct StatisticsAccumulators<'a> {
     null_counts_array: &'a mut [Precision<usize>],
     is_min_value_exact: &'a mut [Option<bool>],
     is_max_value_exact: &'a mut [Option<bool>],
+    column_byte_sizes: &'a mut [Precision<usize>],
 }
 
 fn summarize_min_max_null_counts(
+    parquet_schema: &SchemaDescriptor,
+    logical_file_schema: &Schema,
+    physical_file_schema: &Schema,
     accumulators: &mut StatisticsAccumulators,
-    arrow_schema_index: usize,
+    logical_schema_index: usize,
     stats_converter: &StatisticsConverter,
     row_groups_metadata: &[RowGroupMetaData],
 ) -> Result<()> {
@@ -428,27 +481,27 @@ fn summarize_min_max_null_counts(
     let is_min_value_exact_stat =
         stats_converter.row_group_is_min_value_exact(row_groups_metadata)?;
 
-    if let Some(max_acc) = &mut accumulators.max_accs[arrow_schema_index] {
+    if let Some(max_acc) = &mut accumulators.max_accs[logical_schema_index] {
         max_acc.update_batch(&[Arc::clone(&max_values)])?;
         let mut cur_max_acc = max_acc.clone();
-        accumulators.is_max_value_exact[arrow_schema_index] = has_any_exact_match(
-            cur_max_acc.evaluate()?,
-            max_values,
-            is_max_value_exact_stat,
+        accumulators.is_max_value_exact[logical_schema_index] = has_any_exact_match(
+            &cur_max_acc.evaluate()?,
+            &max_values,
+            &is_max_value_exact_stat,
         );
     }
 
-    if let Some(min_acc) = &mut accumulators.min_accs[arrow_schema_index] {
+    if let Some(min_acc) = &mut accumulators.min_accs[logical_schema_index] {
         min_acc.update_batch(&[Arc::clone(&min_values)])?;
         let mut cur_min_acc = min_acc.clone();
-        accumulators.is_min_value_exact[arrow_schema_index] = has_any_exact_match(
-            cur_min_acc.evaluate()?,
-            min_values,
-            is_min_value_exact_stat,
+        accumulators.is_min_value_exact[logical_schema_index] = has_any_exact_match(
+            &cur_min_acc.evaluate()?,
+            &min_values,
+            &is_min_value_exact_stat,
         );
     }
 
-    accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) {
+    accumulators.null_counts_array[logical_schema_index] = match sum(&null_counts) {
         Some(null_count) => Precision::Exact(null_count as usize),
         None => match null_counts.len() {
             // If sum() returned None we either have no rows or all values are null
@@ -457,9 +510,55 @@ fn summarize_min_max_null_counts(
         },
     };
 
+    // This is the same logic as parquet_column but we start from arrow schema index
+    // instead of looking up by name.
+    let parquet_index = parquet_column(
+        parquet_schema,
+        physical_file_schema,
+        logical_file_schema.field(logical_schema_index).name(),
+    )
+    .map(|(idx, _)| idx);
+
+    let arrow_field = logical_file_schema.field(logical_schema_index);
+    accumulators.column_byte_sizes[logical_schema_index] = compute_arrow_column_size(
+        arrow_field.data_type(),
+        row_groups_metadata,
+        parquet_index,
+        row_groups_metadata
+            .iter()
+            .map(|rg| rg.num_rows() as usize)
+            .sum(),
+    );
+
     Ok(())
 }
 
+/// Compute the Arrow in-memory size for a single column
+fn compute_arrow_column_size(
+    data_type: &DataType,
+    row_groups_metadata: &[RowGroupMetaData],
+    parquet_idx: Option<usize>,
+    num_rows: usize,
+) -> Precision<usize> {
+    // For primitive types with known fixed size, compute exact size
+    if let Some(byte_width) = data_type.primitive_width() {
+        return Precision::Exact(byte_width * num_rows);
+    }
+
+    // Use the uncompressed Parquet size as an estimate for other types
+    if let Some(parquet_idx) = parquet_idx {
+        let uncompressed_bytes: i64 = row_groups_metadata
+            .iter()
+            .filter_map(|rg| rg.columns().get(parquet_idx))
+            .map(|col| col.uncompressed_size())
+            .sum();
+        return Precision::Inexact(uncompressed_bytes as usize);
+    }
+
+    // Otherwise, we cannot determine the size
+    Precision::Absent
+}
+
 /// Checks if any occurrence of `value` in `array` corresponds to a `true`
 /// entry in the `exactness` array.
 ///
@@ -475,13 +574,13 @@ fn summarize_min_max_null_counts(
 /// values are `[true, false, false]`. Since at least one is `true`, the
 /// function returns `Some(true)`.
 fn has_any_exact_match(
-    value: ScalarValue,
-    array: ArrayRef,
-    exactness: BooleanArray,
+    value: &ScalarValue,
+    array: &ArrayRef,
+    exactness: &BooleanArray,
 ) -> Option<bool> {
     let scalar_array = value.to_scalar().ok()?;
     let eq_mask = eq(&scalar_array, &array).ok()?;
-    let combined_mask = and(&eq_mask, &exactness).ok()?;
+    let combined_mask = and(&eq_mask, exactness).ok()?;
     Some(combined_mask.true_count() > 0)
 }
 
@@ -531,7 +630,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![true, false, false, false, false, false]);
 
-            let result = has_any_exact_match(computed_min, row_group_mins, exactness);
+            let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness);
             assert_eq!(result, Some(true));
         }
         // Case 2: All inexact matches
@@ -542,7 +641,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![false, false, false, false, false, false]);
 
-            let result = has_any_exact_match(computed_min, row_group_mins, exactness);
+            let result = has_any_exact_match(&computed_min, &row_group_mins, &exactness);
             assert_eq!(result, Some(false));
         }
         // Case 3: All exact matches
@@ -553,7 +652,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![false, true, true, true, false, true]);
 
-            let result = has_any_exact_match(computed_max, row_group_maxes, exactness);
+            let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness);
             assert_eq!(result, Some(true));
         }
         // Case 4: All maxes are null values
@@ -563,7 +662,7 @@ mod tests {
                 Arc::new(Int32Array::from(vec![None, None, None, None])) as ArrayRef;
             let exactness = BooleanArray::from(vec![None, Some(true), None, Some(false)]);
 
-            let result = has_any_exact_match(computed_max, row_group_maxes, exactness);
+            let result = has_any_exact_match(&computed_max, &row_group_maxes, &exactness);
             assert_eq!(result, Some(false));
         }
     }
diff --git a/datafusion/datasource-parquet/src/metrics.rs b/datafusion/datasource-parquet/src/metrics.rs
index 306bc9e6b013d..5eaa137e9a456 100644
--- a/datafusion/datasource-parquet/src/metrics.rs
+++ b/datafusion/datasource-parquet/src/metrics.rs
@@ -16,7 +16,8 @@
 // under the License.
 
 use datafusion_physical_plan::metrics::{
-    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, PruningMetrics, Time,
+    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, PruningMetrics,
+    RatioMergeStrategy, RatioMetrics, Time,
 };
 
 /// Stores metrics about the parquet execution for a particular parquet file.
@@ -66,6 +67,8 @@ pub struct ParquetFileMetrics {
     pub page_index_eval_time: Time,
     /// Total time spent reading and parsing metadata from the footer
     pub metadata_load_time: Time,
+    /// Scan Efficiency Ratio, calculated as bytes_scanned / total_file_size
+    pub scan_efficiency_ratio: RatioMetrics,
     /// Predicate Cache: number of records read directly from the inner reader.
     /// This is the number of rows decoded while evaluating predicates
     pub predicate_cache_inner_records: Count,
@@ -114,6 +117,15 @@ impl ParquetFileMetrics {
             .with_type(MetricType::SUMMARY)
             .pruning_metrics("files_ranges_pruned_statistics", partition);
 
+        let scan_efficiency_ratio = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .ratio_metrics_with_strategy(
+                "scan_efficiency_ratio",
+                partition,
+                RatioMergeStrategy::AddPartSetTotal,
+            );
+
         // -----------------------
         // 'dev' level metrics
         // -----------------------
@@ -164,6 +176,7 @@ impl ParquetFileMetrics {
             bloom_filter_eval_time,
             page_index_eval_time,
             metadata_load_time,
+            scan_efficiency_ratio,
             predicate_cache_inner_records,
             predicate_cache_records,
         }
diff --git a/datafusion/datasource-parquet/src/mod.rs b/datafusion/datasource-parquet/src/mod.rs
index 2f64f34bc09b4..eb4cc9e9ad5a3 100644
--- a/datafusion/datasource-parquet/src/mod.rs
+++ b/datafusion/datasource-parquet/src/mod.rs
@@ -18,6 +18,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 pub mod access_plan;
 pub mod file_format;
@@ -28,6 +30,7 @@ mod page_filter;
 mod reader;
 mod row_filter;
 mod row_group_filter;
+mod sort;
 pub mod source;
 mod writer;
 
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 2815b82f1d455..bea970f144863 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -20,61 +20,66 @@
 use crate::page_filter::PagePruningAccessPlanFilter;
 use crate::row_group_filter::RowGroupAccessPlanFilter;
 use crate::{
-    apply_file_schema_type_coercions, coerce_int96_to_resolution, row_filter,
     ParquetAccessPlan, ParquetFileMetrics, ParquetFileReaderFactory,
+    apply_file_schema_type_coercions, coerce_int96_to_resolution, row_filter,
 };
-use arrow::array::RecordBatch;
+use arrow::array::{RecordBatch, RecordBatchOptions};
+use arrow::datatypes::DataType;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_physical_expr::projection::ProjectionExprs;
+use datafusion_physical_expr::utils::reassign_expr_columns;
+use datafusion_physical_expr_adapter::replace_columns_with_literals;
+use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use arrow::datatypes::{FieldRef, SchemaRef, TimeUnit};
+use arrow::datatypes::{SchemaRef, TimeUnit};
 use datafusion_common::encryption::FileDecryptionProperties;
-
-use datafusion_common::{exec_err, DataFusionError, Result};
-use datafusion_datasource::PartitionedFile;
+use datafusion_common::stats::Precision;
+use datafusion_common::{
+    ColumnStatistics, DataFusionError, Result, ScalarValue, Statistics, exec_err,
+};
+use datafusion_datasource::{PartitionedFile, TableSchema};
 use datafusion_physical_expr::simplifier::PhysicalExprSimplifier;
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::{
-    is_dynamic_physical_expr, PhysicalExpr,
+    PhysicalExpr, is_dynamic_physical_expr,
 };
 use datafusion_physical_plan::metrics::{
     Count, ExecutionPlanMetricsSet, MetricBuilder, PruningMetrics,
 };
-use datafusion_pruning::{build_pruning_predicate, FilePruner, PruningPredicate};
+use datafusion_pruning::{FilePruner, PruningPredicate, build_pruning_predicate};
 
+use crate::sort::reverse_row_selection;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_common::config::EncryptionFactoryOptions;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_execution::parquet_encryption::EncryptionFactory;
-use futures::{ready, Stream, StreamExt, TryStreamExt};
-use itertools::Itertools;
+use futures::{Stream, StreamExt, TryStreamExt, ready};
 use log::debug;
 use parquet::arrow::arrow_reader::metrics::ArrowReaderMetrics;
-use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
+use parquet::arrow::arrow_reader::{
+    ArrowReaderMetadata, ArrowReaderOptions, RowSelectionPolicy,
+};
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
-use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader, RowGroupMetaData};
 
 /// Implements [`FileOpener`] for a parquet file
 pub(super) struct ParquetOpener {
     /// Execution partition index
     pub partition_index: usize,
-    /// Column indexes in `table_schema` needed by the query
-    pub projection: Arc<[usize]>,
+    /// Projection to apply on top of the table schema (i.e. can reference partition columns).
+    pub projection: ProjectionExprs,
     /// Target number of rows in each output RecordBatch
     pub batch_size: usize,
     /// Optional limit on the number of rows to read
     pub limit: Option<usize>,
     /// Optional predicate to apply during the scan
     pub predicate: Option<Arc<dyn PhysicalExpr>>,
-    /// Schema of the output table without partition columns.
-    /// This is the schema we coerce the physical file schema into.
-    pub logical_file_schema: SchemaRef,
-    /// Partition columns
-    pub partition_fields: Vec<FieldRef>,
+    /// Table schema, including partition columns.
+    pub table_schema: TableSchema,
     /// Optional hint for how large the initial request to read parquet metadata
     /// should be
     pub metadata_size_hint: Option<usize>,
@@ -87,14 +92,14 @@ pub(super) struct ParquetOpener {
     pub pushdown_filters: bool,
     /// Should the filters be reordered to optimize the scan?
     pub reorder_filters: bool,
+    /// Should we force the reader to use RowSelections for filtering
+    pub force_filter_selections: bool,
     /// Should the page index be read from parquet files, if present, to skip
     /// data pages
     pub enable_page_index: bool,
     /// Should the bloom filter be read from parquet, if present, to skip row
     /// groups
     pub enable_bloom_filter: bool,
-    /// Schema adapter factory
-    pub schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
     /// Should row group pruning be applied
     pub enable_row_group_stats_pruning: bool,
     /// Coerce INT96 timestamps to specific TimeUnit
@@ -103,7 +108,7 @@ pub(super) struct ParquetOpener {
     #[cfg(feature = "parquet_encryption")]
     pub file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
     /// Rewrite expressions in the context of the file schema
-    pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+    pub(crate) expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
     /// Optional factory to create file decryption properties dynamically
     #[cfg(feature = "parquet_encryption")]
     pub encryption_factory:
@@ -111,6 +116,60 @@ pub(super) struct ParquetOpener {
     /// Maximum size of the predicate cache, in bytes. If none, uses
     /// the arrow-rs default.
     pub max_predicate_cache_size: Option<usize>,
+    /// Whether to read row groups in reverse order
+    pub reverse_row_groups: bool,
+}
+
+/// Represents a prepared access plan with optional row selection
+struct PreparedAccessPlan {
+    /// Row group indexes to read
+    row_group_indexes: Vec<usize>,
+    /// Optional row selection for filtering within row groups
+    row_selection: Option<parquet::arrow::arrow_reader::RowSelection>,
+}
+
+impl PreparedAccessPlan {
+    /// Create a new prepared access plan from a ParquetAccessPlan
+    fn from_access_plan(
+        access_plan: ParquetAccessPlan,
+        rg_metadata: &[RowGroupMetaData],
+    ) -> Result<Self> {
+        let row_group_indexes = access_plan.row_group_indexes();
+        let row_selection = access_plan.into_overall_row_selection(rg_metadata)?;
+
+        Ok(Self {
+            row_group_indexes,
+            row_selection,
+        })
+    }
+
+    /// Reverse the access plan for reverse scanning
+    fn reverse(
+        mut self,
+        file_metadata: &parquet::file::metadata::ParquetMetaData,
+    ) -> Result<Self> {
+        // Reverse the row group indexes
+        self.row_group_indexes = self.row_group_indexes.into_iter().rev().collect();
+
+        // If we have a row selection, reverse it to match the new row group order
+        if let Some(row_selection) = self.row_selection {
+            self.row_selection =
+                Some(reverse_row_selection(&row_selection, file_metadata)?);
+        }
+
+        Ok(self)
+    }
+
+    /// Apply this access plan to a ParquetRecordBatchStreamBuilder
+    fn apply_to_builder(
+        self,
+        mut builder: ParquetRecordBatchStreamBuilder<Box<dyn AsyncFileReader>>,
+    ) -> ParquetRecordBatchStreamBuilder<Box<dyn AsyncFileReader>> {
+        if let Some(row_selection) = self.row_selection {
+            builder = builder.with_row_selection(row_selection);
+        }
+        builder.with_row_groups(self.row_group_indexes)
+    }
 }
 
 impl FileOpener for ParquetOpener {
@@ -136,17 +195,63 @@ impl FileOpener for ParquetOpener {
 
         let batch_size = self.batch_size;
 
-        let projected_schema =
-            SchemaRef::from(self.logical_file_schema.project(&self.projection)?);
-        let schema_adapter_factory = Arc::clone(&self.schema_adapter_factory);
-        let schema_adapter = self
-            .schema_adapter_factory
-            .create(projected_schema, Arc::clone(&self.logical_file_schema));
+        // Calculate the output schema from the original projection (before literal replacement)
+        // so we get correct field names from column references
+        let logical_file_schema = Arc::clone(self.table_schema.file_schema());
+        let output_schema = Arc::new(
+            self.projection
+                .project_schema(self.table_schema.table_schema())?,
+        );
+
+        // Build a combined map for replacing column references with literal values.
+        // This includes:
+        // 1. Partition column values from the file path (e.g., region=us-west-2)
+        // 2. Constant columns detected from file statistics (where min == max)
+        //
+        // Although partition columns *are* constant columns, we don't want to rely on
+        // statistics for them being populated if we can use the partition values
+        // (which are guaranteed to be present).
+        //
+        // For example, given a partition column `region` and predicate
+        // `region IN ('us-east-1', 'eu-central-1')` with file path
+        // `/data/region=us-west-2/...`, the predicate is rewritten to
+        // `'us-west-2' IN ('us-east-1', 'eu-central-1')` which simplifies to FALSE.
+        //
+        // While partition column optimization is done during logical planning,
+        // there are cases where partition columns may appear in more complex
+        // predicates that cannot be simplified until we open the file (such as
+        // dynamic predicates).
+        let mut literal_columns: HashMap<String, ScalarValue> = self
+            .table_schema
+            .table_partition_cols()
+            .iter()
+            .zip(partitioned_file.partition_values.iter())
+            .map(|(field, value)| (field.name().clone(), value.clone()))
+            .collect();
+        // Add constant columns from file statistics.
+        // Note that if there are statistics for partition columns there will be overlap,
+        // but since we use a HashMap, we'll just overwrite the partition values with the
+        // constant values from statistics (which should be the same).
+        literal_columns.extend(constant_columns_from_stats(
+            partitioned_file.statistics.as_deref(),
+            &logical_file_schema,
+        ));
+
+        // Apply literal replacements to projection and predicate
+        let mut projection = self.projection.clone();
         let mut predicate = self.predicate.clone();
-        let logical_file_schema = Arc::clone(&self.logical_file_schema);
-        let partition_fields = self.partition_fields.clone();
+        if !literal_columns.is_empty() {
+            projection = projection.try_map_exprs(|expr| {
+                replace_columns_with_literals(Arc::clone(&expr), &literal_columns)
+            })?;
+            predicate = predicate
+                .map(|p| replace_columns_with_literals(p, &literal_columns))
+                .transpose()?;
+        }
+
         let reorder_predicates = self.reorder_filters;
         let pushdown_filters = self.pushdown_filters;
+        let force_filter_selections = self.force_filter_selections;
         let coerce_int96 = self.coerce_int96;
         let enable_bloom_filter = self.enable_bloom_filter;
         let enable_row_group_stats_pruning = self.enable_row_group_stats_pruning;
@@ -155,14 +260,14 @@ impl FileOpener for ParquetOpener {
         let predicate_creation_errors = MetricBuilder::new(&self.metrics)
             .global_counter("num_predicate_creation_errors");
 
-        let expr_adapter_factory = self.expr_adapter_factory.clone();
-        let mut predicate_file_schema = Arc::clone(&self.logical_file_schema);
+        let expr_adapter_factory = Arc::clone(&self.expr_adapter_factory);
 
         let enable_page_index = self.enable_page_index;
         #[cfg(feature = "parquet_encryption")]
         let encryption_context = self.get_encryption_context();
         let max_predicate_cache_size = self.max_predicate_cache_size;
 
+        let reverse_row_groups = self.reverse_row_groups;
         Ok(Box::pin(async move {
             #[cfg(feature = "parquet_encryption")]
             let file_decryption_properties = encryption_context
@@ -179,27 +284,40 @@ impl FileOpener for ParquetOpener {
             // we can end the stream early.
             let mut file_pruner = predicate
                 .as_ref()
-                .map(|p| {
-                    Ok::<_, DataFusionError>(
-                        (is_dynamic_physical_expr(p) | partitioned_file.has_statistics())
-                            .then_some(FilePruner::new(
-                                Arc::clone(p),
-                                &logical_file_schema,
-                                partition_fields.clone(),
-                                partitioned_file.clone(),
-                                predicate_creation_errors.clone(),
-                            )?),
-                    )
+                .filter(|p| {
+                    // Make a FilePruner only if there is either
+                    // 1. a dynamic expr in the predicate
+                    // 2. the file has file-level statistics.
+                    //
+                    // File-level statistics may prune the file without loading
+                    // any row groups or metadata.
+                    //
+                    // Dynamic filters may prune the file after initial
+                    // planning, as the dynamic filter is updated during
+                    // execution.
+                    //
+                    // The case where there is a dynamic filter but no
+                    // statistics corresponds to a dynamic filter that
+                    // references partition columns. While rare, this is possible
+                    // e.g. `select * from table order by partition_col limit
+                    // 10` could hit this condition.
+                    is_dynamic_physical_expr(p) || partitioned_file.has_statistics()
                 })
-                .transpose()?
-                .flatten();
-
-            if let Some(file_pruner) = &mut file_pruner {
-                if file_pruner.should_prune()? {
-                    // Return an empty stream immediately to skip the work of setting up the actual stream
-                    file_metrics.files_ranges_pruned_statistics.add_pruned(1);
-                    return Ok(futures::stream::empty().boxed());
-                }
+                .and_then(|p| {
+                    FilePruner::try_new(
+                        Arc::clone(p),
+                        &logical_file_schema,
+                        &partitioned_file,
+                        predicate_creation_errors.clone(),
+                    )
+                });
+
+            if let Some(file_pruner) = &mut file_pruner
+                && file_pruner.should_prune()?
+            {
+                // Return an empty stream immediately to skip the work of setting up the actual stream
+                file_metrics.files_ranges_pruned_statistics.add_pruned(1);
+                return Ok(futures::stream::empty().boxed());
             }
 
             file_metrics.files_ranges_pruned_statistics.add_matched(1);
@@ -228,8 +346,9 @@ impl FileOpener for ParquetOpener {
             // - The table schema as defined by the TableProvider.
             //   This is what the user sees, what they get when they `SELECT * FROM table`, etc.
             // - The logical file schema: this is the table schema minus any hive partition columns and projections.
-            //   This is what the physicalfile schema is coerced to.
-            // - The physical file schema: this is the schema as defined by the parquet file. This is what the parquet file actually contains.
+            //   This is what the physical file schema is coerced to.
+            // - The physical file schema: this is the schema that the arrow-rs
+            //   parquet reader will actually produce.
             let mut physical_file_schema = Arc::clone(reader_metadata.schema());
 
             // The schema loaded from the file may not be the same as the
@@ -247,51 +366,44 @@ impl FileOpener for ParquetOpener {
                 )?;
             }
 
-            if let Some(ref coerce) = coerce_int96 {
-                if let Some(merged) = coerce_int96_to_resolution(
+            if let Some(ref coerce) = coerce_int96
+                && let Some(merged) = coerce_int96_to_resolution(
                     reader_metadata.parquet_schema(),
                     &physical_file_schema,
                     coerce,
-                ) {
-                    physical_file_schema = Arc::new(merged);
-                    options = options.with_schema(Arc::clone(&physical_file_schema));
-                    reader_metadata = ArrowReaderMetadata::try_new(
-                        Arc::clone(reader_metadata.metadata()),
-                        options.clone(),
-                    )?;
-                }
+                )
+            {
+                physical_file_schema = Arc::new(merged);
+                options = options.with_schema(Arc::clone(&physical_file_schema));
+                reader_metadata = ArrowReaderMetadata::try_new(
+                    Arc::clone(reader_metadata.metadata()),
+                    options.clone(),
+                )?;
             }
 
-            // Adapt the predicate to the physical file schema.
+            // Adapt the projection & filter predicate to the physical file schema.
             // This evaluates missing columns and inserts any necessary casts.
-            if let Some(expr_adapter_factory) = expr_adapter_factory {
-                predicate = predicate
-                    .map(|p| {
-                        let partition_values = partition_fields
-                            .iter()
-                            .cloned()
-                            .zip(partitioned_file.partition_values)
-                            .collect_vec();
-                        let expr = expr_adapter_factory
-                            .create(
-                                Arc::clone(&logical_file_schema),
-                                Arc::clone(&physical_file_schema),
-                            )
-                            .with_partition_values(partition_values)
-                            .rewrite(p)?;
-                        // After rewriting to the file schema, further simplifications may be possible.
-                        // For example, if `'a' = col_that_is_missing` becomes `'a' = NULL` that can then be simplified to `FALSE`
-                        // and we can avoid doing any more work on the file (bloom filters, loading the page index, etc.).
-                        PhysicalExprSimplifier::new(&physical_file_schema).simplify(expr)
-                    })
-                    .transpose()?;
-                predicate_file_schema = Arc::clone(&physical_file_schema);
-            }
+            // After rewriting to the file schema, further simplifications may be possible.
+            // For example, if `'a' = col_that_is_missing` becomes `'a' = NULL` that can then be simplified to `FALSE`
+            // and we can avoid doing any more work on the file (bloom filters, loading the page index, etc.).
+            // Additionally, if any casts were inserted we can move casts from the column to the literal side:
+            // `CAST(col AS INT) = 5` can become `col = CAST(5 AS <col type>)`, which can be evaluated statically.
+            let rewriter = expr_adapter_factory.create(
+                Arc::clone(&logical_file_schema),
+                Arc::clone(&physical_file_schema),
+            );
+            let simplifier = PhysicalExprSimplifier::new(&physical_file_schema);
+            predicate = predicate
+                .map(|p| simplifier.simplify(rewriter.rewrite(p)?))
+                .transpose()?;
+            // Adapt projections to the physical file schema as well
+            projection = projection
+                .try_map_exprs(|p| simplifier.simplify(rewriter.rewrite(p)?))?;
 
             // Build predicates for this specific file
             let (pruning_predicate, page_pruning_predicate) = build_pruning_predicates(
                 predicate.as_ref(),
-                &predicate_file_schema,
+                &physical_file_schema,
                 &predicate_creation_errors,
             );
 
@@ -315,24 +427,18 @@ impl FileOpener for ParquetOpener {
                 reader_metadata,
             );
 
-            let (schema_mapping, adapted_projections) =
-                schema_adapter.map_schema(&physical_file_schema)?;
+            let indices = projection.column_indices();
 
-            let mask = ProjectionMask::roots(
-                builder.parquet_schema(),
-                adapted_projections.iter().cloned(),
-            );
+            let mask = ProjectionMask::roots(builder.parquet_schema(), indices);
 
             // Filter pushdown: evaluate predicates during scan
             if let Some(predicate) = pushdown_filters.then_some(predicate).flatten() {
                 let row_filter = row_filter::build_row_filter(
                     &predicate,
                     &physical_file_schema,
-                    &predicate_file_schema,
                     builder.metadata(),
                     reorder_predicates,
                     &file_metrics,
-                    &schema_adapter_factory,
                 );
 
                 match row_filter {
@@ -347,6 +453,10 @@ impl FileOpener for ParquetOpener {
                     }
                 };
             };
+            if force_filter_selections {
+                builder =
+                    builder.with_row_selection_policy(RowSelectionPolicy::Selectors);
+            }
 
             // Determine which row groups to actually read. The idea is to skip
             // as many row groups as possible based on the metadata and query
@@ -412,25 +522,31 @@ impl FileOpener for ParquetOpener {
             // page index pruning: if all data on individual pages can
             // be ruled using page metadata, rows from other columns
             // with that range can be skipped as well
-            if enable_page_index && !access_plan.is_empty() {
-                if let Some(p) = page_pruning_predicate {
-                    access_plan = p.prune_plan_with_page_index(
-                        access_plan,
-                        &physical_file_schema,
-                        builder.parquet_schema(),
-                        file_metadata.as_ref(),
-                        &file_metrics,
-                    );
-                }
+            if enable_page_index
+                && !access_plan.is_empty()
+                && let Some(p) = page_pruning_predicate
+            {
+                access_plan = p.prune_plan_with_page_index(
+                    access_plan,
+                    &physical_file_schema,
+                    builder.parquet_schema(),
+                    file_metadata.as_ref(),
+                    &file_metrics,
+                );
             }
 
-            let row_group_indexes = access_plan.row_group_indexes();
-            if let Some(row_selection) =
-                access_plan.into_overall_row_selection(rg_metadata)?
-            {
-                builder = builder.with_row_selection(row_selection);
+            // Prepare the access plan (extract row groups and row selection)
+            let mut prepared_plan =
+                PreparedAccessPlan::from_access_plan(access_plan, rg_metadata)?;
+
+            // If reverse scanning is enabled, reverse the prepared plan
+            if reverse_row_groups {
+                prepared_plan = prepared_plan.reverse(file_metadata.as_ref())?;
             }
 
+            // Apply the prepared plan to the builder
+            builder = prepared_plan.apply_to_builder(builder);
+
             if let Some(limit) = limit {
                 builder = builder.with_limit(limit)
             }
@@ -445,7 +561,6 @@ impl FileOpener for ParquetOpener {
             let stream = builder
                 .with_projection(mask)
                 .with_batch_size(batch_size)
-                .with_row_groups(row_group_indexes)
                 .with_metrics(arrow_reader_metrics.clone())
                 .build()?;
 
@@ -455,14 +570,48 @@ impl FileOpener for ParquetOpener {
                 file_metrics.predicate_cache_inner_records.clone();
             let predicate_cache_records = file_metrics.predicate_cache_records.clone();
 
+            let stream_schema = Arc::clone(stream.schema());
+            // Check if we need to replace the schema to handle things like differing nullability or metadata.
+            // See note below about file vs. output schema.
+            let replace_schema = !stream_schema.eq(&output_schema);
+
+            // Rebase column indices to match the narrowed stream schema.
+            // The projection expressions have indices based on physical_file_schema,
+            // but the stream only contains the columns selected by the ProjectionMask.
+            let projection = projection
+                .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?;
+
+            let projector = projection.make_projector(&stream_schema)?;
+
             let stream = stream.map_err(DataFusionError::from).map(move |b| {
-                b.and_then(|b| {
+                b.and_then(|mut b| {
                     copy_arrow_reader_metrics(
                         &arrow_reader_metrics,
                         &predicate_cache_inner_records,
                         &predicate_cache_records,
                     );
-                    schema_mapping.map_batch(b)
+                    b = projector.project_batch(&b)?;
+                    if replace_schema {
+                        // Ensure the output batch has the expected schema.
+                        // This handles things like schema level and field level metadata, which may not be present
+                        // in the physical file schema.
+                        // It is also possible for nullability to differ; some writers create files with
+                        // OPTIONAL fields even when there are no nulls in the data.
+                        // In these cases it may make sense for the logical schema to be `NOT NULL`.
+                        // RecordBatch::try_new_with_options checks that if the schema is NOT NULL
+                        // the array cannot contain nulls, amongst other checks.
+                        let (_stream_schema, arrays, num_rows) = b.into_parts();
+                        let options =
+                            RecordBatchOptions::new().with_row_count(Some(num_rows));
+                        RecordBatch::try_new_with_options(
+                            Arc::clone(&output_schema),
+                            arrays,
+                            &options,
+                        )
+                        .map_err(Into::into)
+                    } else {
+                        Ok(b)
+                    }
                 })
             });
 
@@ -496,6 +645,64 @@ fn copy_arrow_reader_metrics(
     }
 }
 
+type ConstantColumns = HashMap<String, ScalarValue>;
+
+/// Extract constant column values from statistics, keyed by column name in the logical file schema.
+fn constant_columns_from_stats(
+    statistics: Option<&Statistics>,
+    file_schema: &SchemaRef,
+) -> ConstantColumns {
+    let mut constants = HashMap::new();
+    let Some(statistics) = statistics else {
+        return constants;
+    };
+
+    let num_rows = match statistics.num_rows {
+        Precision::Exact(num_rows) => Some(num_rows),
+        _ => None,
+    };
+
+    for (idx, column_stats) in statistics
+        .column_statistics
+        .iter()
+        .take(file_schema.fields().len())
+        .enumerate()
+    {
+        let field = file_schema.field(idx);
+        if let Some(value) =
+            constant_value_from_stats(column_stats, num_rows, field.data_type())
+        {
+            constants.insert(field.name().clone(), value);
+        }
+    }
+
+    constants
+}
+
+fn constant_value_from_stats(
+    column_stats: &ColumnStatistics,
+    num_rows: Option<usize>,
+    data_type: &DataType,
+) -> Option<ScalarValue> {
+    if let (Precision::Exact(min), Precision::Exact(max)) =
+        (&column_stats.min_value, &column_stats.max_value)
+        && min == max
+        && !min.is_null()
+        && matches!(column_stats.null_count, Precision::Exact(0))
+    {
+        return Some(min.clone());
+    }
+
+    if let (Some(num_rows), Precision::Exact(nulls)) =
+        (num_rows, &column_stats.null_count)
+        && *nulls == num_rows
+    {
+        return ScalarValue::try_new_null(data_type).ok();
+    }
+
+    None
+}
+
 /// Wraps an inner RecordBatchStream and a [`FilePruner`]
 ///
 /// This can terminate the scan early when some dynamic filters is updated after
@@ -525,6 +732,7 @@ impl<S> EarlyStoppingStream<S> {
         }
     }
 }
+
 impl<S> EarlyStoppingStream<S>
 where
     S: Stream<Item = Result<RecordBatch>> + Unpin,
@@ -575,7 +783,6 @@ where
 }
 
 #[derive(Default)]
-#[cfg_attr(not(feature = "parquet_encryption"), allow(dead_code))]
 struct EncryptionContext {
     #[cfg(feature = "parquet_encryption")]
     file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
@@ -617,7 +824,7 @@ impl EncryptionContext {
 }
 
 #[cfg(not(feature = "parquet_encryption"))]
-#[allow(dead_code)]
+#[expect(dead_code)]
 impl EncryptionContext {
     async fn get_file_decryption_properties(
         &self,
@@ -637,7 +844,7 @@ impl ParquetOpener {
     }
 
     #[cfg(not(feature = "parquet_encryption"))]
-    #[allow(dead_code)]
+    #[expect(dead_code)]
     fn get_encryption_context(&self) -> EncryptionContext {
         EncryptionContext::default()
     }
@@ -756,34 +963,271 @@ fn should_enable_page_index(
 mod test {
     use std::sync::Arc;
 
-    use arrow::{
-        compute::cast,
-        datatypes::{DataType, Field, Schema, SchemaRef},
-    };
+    use super::{ConstantColumns, constant_columns_from_stats};
+    use crate::{DefaultParquetFileReaderFactory, opener::ParquetOpener};
+    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use bytes::{BufMut, BytesMut};
     use datafusion_common::{
-        assert_batches_eq, record_batch, stats::Precision, ColumnStatistics,
-        DataFusionError, ScalarValue, Statistics,
-    };
-    use datafusion_datasource::{
-        file_stream::FileOpener,
-        schema_adapter::{
-            DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
-            SchemaMapper,
-        },
-        PartitionedFile,
+        ColumnStatistics, DataFusionError, ScalarValue, Statistics, record_batch,
+        stats::Precision,
     };
+    use datafusion_datasource::{PartitionedFile, TableSchema, file_stream::FileOpener};
     use datafusion_expr::{col, lit};
     use datafusion_physical_expr::{
-        expressions::DynamicFilterPhysicalExpr, planner::logical2physical, PhysicalExpr,
+        PhysicalExpr,
+        expressions::{Column, DynamicFilterPhysicalExpr, Literal},
+        planner::logical2physical,
+        projection::ProjectionExprs,
+    };
+    use datafusion_physical_expr_adapter::{
+        DefaultPhysicalExprAdapterFactory, replace_columns_with_literals,
     };
-    use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory;
-    use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+    use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
     use futures::{Stream, StreamExt};
-    use object_store::{memory::InMemory, path::Path, ObjectStore};
+    use object_store::{ObjectStore, memory::InMemory, path::Path};
     use parquet::arrow::ArrowWriter;
+    use parquet::file::properties::WriterProperties;
+
+    /// Builder for creating [`ParquetOpener`] instances with sensible defaults for tests.
+    /// This helps reduce code duplication and makes it clear what differs between test cases.
+    struct ParquetOpenerBuilder {
+        store: Option<Arc<dyn ObjectStore>>,
+        table_schema: Option<TableSchema>,
+        partition_index: usize,
+        projection_indices: Option<Vec<usize>>,
+        projection: Option<ProjectionExprs>,
+        batch_size: usize,
+        limit: Option<usize>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        metadata_size_hint: Option<usize>,
+        metrics: ExecutionPlanMetricsSet,
+        pushdown_filters: bool,
+        reorder_filters: bool,
+        force_filter_selections: bool,
+        enable_page_index: bool,
+        enable_bloom_filter: bool,
+        enable_row_group_stats_pruning: bool,
+        coerce_int96: Option<arrow::datatypes::TimeUnit>,
+        max_predicate_cache_size: Option<usize>,
+        reverse_row_groups: bool,
+    }
 
-    use crate::{opener::ParquetOpener, DefaultParquetFileReaderFactory};
+    impl ParquetOpenerBuilder {
+        /// Create a new builder with sensible defaults for tests.
+        fn new() -> Self {
+            Self {
+                store: None,
+                table_schema: None,
+                partition_index: 0,
+                projection_indices: None,
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                predicate: None,
+                metadata_size_hint: None,
+                metrics: ExecutionPlanMetricsSet::new(),
+                pushdown_filters: false,
+                reorder_filters: false,
+                force_filter_selections: false,
+                enable_page_index: false,
+                enable_bloom_filter: false,
+                enable_row_group_stats_pruning: false,
+                coerce_int96: None,
+                max_predicate_cache_size: None,
+                reverse_row_groups: false,
+            }
+        }
+
+        /// Set the object store (required for building).
+        fn with_store(mut self, store: Arc<dyn ObjectStore>) -> Self {
+            self.store = Some(store);
+            self
+        }
+
+        /// Create a simple table schema from a file schema (for files without partition columns).
+        fn with_schema(mut self, file_schema: SchemaRef) -> Self {
+            self.table_schema = Some(TableSchema::from_file_schema(file_schema));
+            self
+        }
+
+        /// Set a custom table schema (for files with partition columns).
+        fn with_table_schema(mut self, table_schema: TableSchema) -> Self {
+            self.table_schema = Some(table_schema);
+            self
+        }
+
+        /// Set projection by column indices (convenience method for common case).
+        fn with_projection_indices(mut self, indices: &[usize]) -> Self {
+            self.projection_indices = Some(indices.to_vec());
+            self
+        }
+
+        /// Set the predicate.
+        fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
+            self.predicate = Some(predicate);
+            self
+        }
+
+        /// Enable pushdown filters.
+        fn with_pushdown_filters(mut self, enable: bool) -> Self {
+            self.pushdown_filters = enable;
+            self
+        }
+
+        /// Enable filter reordering.
+        fn with_reorder_filters(mut self, enable: bool) -> Self {
+            self.reorder_filters = enable;
+            self
+        }
+
+        /// Enable row group stats pruning.
+        fn with_row_group_stats_pruning(mut self, enable: bool) -> Self {
+            self.enable_row_group_stats_pruning = enable;
+            self
+        }
+
+        /// Set reverse row groups flag.
+        fn with_reverse_row_groups(mut self, enable: bool) -> Self {
+            self.reverse_row_groups = enable;
+            self
+        }
+
+        /// Build the ParquetOpener instance.
+        ///
+        /// # Panics
+        ///
+        /// Panics if required fields (store, schema/table_schema) are not set.
+        fn build(self) -> ParquetOpener {
+            let store = self
+                .store
+                .expect("ParquetOpenerBuilder: store must be set via with_store()");
+            let table_schema = self.table_schema.expect(
+                "ParquetOpenerBuilder: table_schema must be set via with_schema() or with_table_schema()",
+            );
+            let file_schema = Arc::clone(table_schema.file_schema());
+
+            let projection = if let Some(projection) = self.projection {
+                projection
+            } else if let Some(indices) = self.projection_indices {
+                ProjectionExprs::from_indices(&indices, &file_schema)
+            } else {
+                // Default: project all columns
+                let all_indices: Vec<usize> = (0..file_schema.fields().len()).collect();
+                ProjectionExprs::from_indices(&all_indices, &file_schema)
+            };
+
+            ParquetOpener {
+                partition_index: self.partition_index,
+                projection,
+                batch_size: self.batch_size,
+                limit: self.limit,
+                predicate: self.predicate,
+                table_schema,
+                metadata_size_hint: self.metadata_size_hint,
+                metrics: self.metrics,
+                parquet_file_reader_factory: Arc::new(
+                    DefaultParquetFileReaderFactory::new(store),
+                ),
+                pushdown_filters: self.pushdown_filters,
+                reorder_filters: self.reorder_filters,
+                force_filter_selections: self.force_filter_selections,
+                enable_page_index: self.enable_page_index,
+                enable_bloom_filter: self.enable_bloom_filter,
+                enable_row_group_stats_pruning: self.enable_row_group_stats_pruning,
+                coerce_int96: self.coerce_int96,
+                #[cfg(feature = "parquet_encryption")]
+                file_decryption_properties: None,
+                expr_adapter_factory: Arc::new(DefaultPhysicalExprAdapterFactory),
+                #[cfg(feature = "parquet_encryption")]
+                encryption_factory: None,
+                max_predicate_cache_size: self.max_predicate_cache_size,
+                reverse_row_groups: self.reverse_row_groups,
+            }
+        }
+    }
+
+    fn constant_int_stats() -> (Statistics, SchemaRef) {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]));
+        let statistics = Statistics {
+            num_rows: Precision::Exact(3),
+            total_byte_size: Precision::Absent,
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::from(5i32)),
+                    min_value: Precision::Exact(ScalarValue::from(5i32)),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
+                },
+                ColumnStatistics::new_unknown(),
+            ],
+        };
+        (statistics, schema)
+    }
+
+    #[test]
+    fn extract_constant_columns_non_null() {
+        let (statistics, schema) = constant_int_stats();
+        let constants = constant_columns_from_stats(Some(&statistics), &schema);
+        assert_eq!(constants.len(), 1);
+        assert_eq!(constants.get("a"), Some(&ScalarValue::from(5i32)));
+        assert!(!constants.contains_key("b"));
+    }
+
+    #[test]
+    fn extract_constant_columns_all_null() {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
+        let statistics = Statistics {
+            num_rows: Precision::Exact(2),
+            total_byte_size: Precision::Absent,
+            column_statistics: vec![ColumnStatistics {
+                null_count: Precision::Exact(2),
+                max_value: Precision::Absent,
+                min_value: Precision::Absent,
+                sum_value: Precision::Absent,
+                distinct_count: Precision::Absent,
+                byte_size: Precision::Absent,
+            }],
+        };
+
+        let constants = constant_columns_from_stats(Some(&statistics), &schema);
+        assert_eq!(
+            constants.get("a"),
+            Some(&ScalarValue::Utf8(None)),
+            "all-null column should be treated as constant null"
+        );
+    }
+
+    #[test]
+    fn rewrite_projection_to_literals() {
+        let (statistics, schema) = constant_int_stats();
+        let constants = constant_columns_from_stats(Some(&statistics), &schema);
+        let projection = ProjectionExprs::from_indices(&[0, 1], &schema);
+
+        let rewritten = projection
+            .try_map_exprs(|expr| replace_columns_with_literals(expr, &constants))
+            .unwrap();
+        let exprs = rewritten.as_ref();
+        assert!(exprs[0].expr.as_any().downcast_ref::<Literal>().is_some());
+        assert!(exprs[1].expr.as_any().downcast_ref::<Column>().is_some());
+
+        // Only column `b` should remain in the projection mask
+        assert_eq!(rewritten.column_indices(), vec![1]);
+    }
+
+    #[test]
+    fn rewrite_physical_expr_literal() {
+        let mut constants = ConstantColumns::new();
+        constants.insert("a".to_string(), ScalarValue::from(7i32));
+        let expr: Arc<dyn PhysicalExpr> = Arc::new(Column::new("a", 0));
+
+        let rewritten = replace_columns_with_literals(expr, &constants).unwrap();
+        assert!(rewritten.as_any().downcast_ref::<Literal>().is_some());
+    }
 
     async fn count_batches_and_rows(
         mut stream: std::pin::Pin<
@@ -802,31 +1246,54 @@ mod test {
         (num_batches, num_rows)
     }
 
-    async fn collect_batches(
+    /// Helper to collect all int32 values from the first column of batches
+    async fn collect_int32_values(
         mut stream: std::pin::Pin<
             Box<
                 dyn Stream<Item = Result<arrow::array::RecordBatch, DataFusionError>>
                     + Send,
             >,
         >,
-    ) -> Vec<arrow::array::RecordBatch> {
-        let mut batches = vec![];
+    ) -> Vec<i32> {
+        use arrow::array::Array;
+        let mut values = vec![];
         while let Some(Ok(batch)) = stream.next().await {
-            batches.push(batch);
+            let array = batch
+                .column(0)
+                .as_any()
+                .downcast_ref::<arrow::array::Int32Array>()
+                .unwrap();
+            for i in 0..array.len() {
+                if !array.is_null(i) {
+                    values.push(array.value(i));
+                }
+            }
         }
-        batches
+        values
     }
 
     async fn write_parquet(
         store: Arc<dyn ObjectStore>,
         filename: &str,
         batch: arrow::record_batch::RecordBatch,
+    ) -> usize {
+        write_parquet_batches(store, filename, vec![batch], None).await
+    }
+
+    /// Write multiple batches to a parquet file with optional writer properties
+    async fn write_parquet_batches(
+        store: Arc<dyn ObjectStore>,
+        filename: &str,
+        batches: Vec<arrow::record_batch::RecordBatch>,
+        props: Option<WriterProperties>,
     ) -> usize {
         let mut out = BytesMut::new().writer();
         {
-            let mut writer =
-                ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap();
-            writer.write(&batch).unwrap();
+            let schema = batches[0].schema();
+            let mut writer = ArrowWriter::try_new(&mut out, schema, props).unwrap();
+            for batch in batches {
+                writer.write(&batch).unwrap();
+            }
             writer.finish().unwrap();
         }
         let data = out.into_inner().freeze();
@@ -872,33 +1339,13 @@ mod test {
         ));
 
         let make_opener = |predicate| {
-            ParquetOpener {
-                partition_index: 0,
-                projection: Arc::new([0, 1]),
-                batch_size: 1024,
-                limit: None,
-                predicate: Some(predicate),
-                logical_file_schema: schema.clone(),
-                metadata_size_hint: None,
-                metrics: ExecutionPlanMetricsSet::new(),
-                parquet_file_reader_factory: Arc::new(
-                    DefaultParquetFileReaderFactory::new(Arc::clone(&store)),
-                ),
-                partition_fields: vec![],
-                pushdown_filters: false, // note that this is false!
-                reorder_filters: false,
-                enable_page_index: false,
-                enable_bloom_filter: false,
-                schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
-                enable_row_group_stats_pruning: true,
-                coerce_int96: None,
-                #[cfg(feature = "parquet_encryption")]
-                file_decryption_properties: None,
-                expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)),
-                #[cfg(feature = "parquet_encryption")]
-                encryption_factory: None,
-                max_predicate_cache_size: None,
-            }
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_schema(Arc::clone(&schema))
+                .with_projection_indices(&[0, 1])
+                .with_predicate(predicate)
+                .with_row_group_stats_pruning(true)
+                .build()
         };
 
         // A filter on "a" should not exclude any rows even if it matches the data
@@ -940,38 +1387,18 @@ mod test {
             Field::new("a", DataType::Int32, false),
         ]));
 
+        let table_schema_for_opener = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("part", DataType::Int32, false))],
+        );
         let make_opener = |predicate| {
-            ParquetOpener {
-                partition_index: 0,
-                projection: Arc::new([0]),
-                batch_size: 1024,
-                limit: None,
-                predicate: Some(predicate),
-                logical_file_schema: file_schema.clone(),
-                metadata_size_hint: None,
-                metrics: ExecutionPlanMetricsSet::new(),
-                parquet_file_reader_factory: Arc::new(
-                    DefaultParquetFileReaderFactory::new(Arc::clone(&store)),
-                ),
-                partition_fields: vec![Arc::new(Field::new(
-                    "part",
-                    DataType::Int32,
-                    false,
-                ))],
-                pushdown_filters: false, // note that this is false!
-                reorder_filters: false,
-                enable_page_index: false,
-                enable_bloom_filter: false,
-                schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
-                enable_row_group_stats_pruning: true,
-                coerce_int96: None,
-                #[cfg(feature = "parquet_encryption")]
-                file_decryption_properties: None,
-                expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)),
-                #[cfg(feature = "parquet_encryption")]
-                encryption_factory: None,
-                max_predicate_cache_size: None,
-            }
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_table_schema(table_schema_for_opener.clone())
+                .with_projection_indices(&[0])
+                .with_predicate(predicate)
+                .with_row_group_stats_pruning(true)
+                .build()
         };
 
         // Filter should match the partition value
@@ -1029,38 +1456,18 @@ mod test {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Float32, true),
         ]));
+        let table_schema_for_opener = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("part", DataType::Int32, false))],
+        );
         let make_opener = |predicate| {
-            ParquetOpener {
-                partition_index: 0,
-                projection: Arc::new([0]),
-                batch_size: 1024,
-                limit: None,
-                predicate: Some(predicate),
-                logical_file_schema: file_schema.clone(),
-                metadata_size_hint: None,
-                metrics: ExecutionPlanMetricsSet::new(),
-                parquet_file_reader_factory: Arc::new(
-                    DefaultParquetFileReaderFactory::new(Arc::clone(&store)),
-                ),
-                partition_fields: vec![Arc::new(Field::new(
-                    "part",
-                    DataType::Int32,
-                    false,
-                ))],
-                pushdown_filters: false, // note that this is false!
-                reorder_filters: false,
-                enable_page_index: false,
-                enable_bloom_filter: false,
-                schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
-                enable_row_group_stats_pruning: true,
-                coerce_int96: None,
-                #[cfg(feature = "parquet_encryption")]
-                file_decryption_properties: None,
-                expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)),
-                #[cfg(feature = "parquet_encryption")]
-                encryption_factory: None,
-                max_predicate_cache_size: None,
-            }
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_table_schema(table_schema_for_opener.clone())
+                .with_projection_indices(&[0])
+                .with_predicate(predicate)
+                .with_row_group_stats_pruning(true)
+                .build()
         };
 
         // Filter should match the partition value and file statistics
@@ -1121,38 +1528,19 @@ mod test {
             Field::new("a", DataType::Int32, false),
         ]));
 
+        let table_schema_for_opener = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("part", DataType::Int32, false))],
+        );
         let make_opener = |predicate| {
-            ParquetOpener {
-                partition_index: 0,
-                projection: Arc::new([0]),
-                batch_size: 1024,
-                limit: None,
-                predicate: Some(predicate),
-                logical_file_schema: file_schema.clone(),
-                metadata_size_hint: None,
-                metrics: ExecutionPlanMetricsSet::new(),
-                parquet_file_reader_factory: Arc::new(
-                    DefaultParquetFileReaderFactory::new(Arc::clone(&store)),
-                ),
-                partition_fields: vec![Arc::new(Field::new(
-                    "part",
-                    DataType::Int32,
-                    false,
-                ))],
-                pushdown_filters: true, // note that this is true!
-                reorder_filters: true,
-                enable_page_index: false,
-                enable_bloom_filter: false,
-                schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
-                enable_row_group_stats_pruning: false, // note that this is false!
-                coerce_int96: None,
-                #[cfg(feature = "parquet_encryption")]
-                file_decryption_properties: None,
-                expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)),
-                #[cfg(feature = "parquet_encryption")]
-                encryption_factory: None,
-                max_predicate_cache_size: None,
-            }
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_table_schema(table_schema_for_opener.clone())
+                .with_projection_indices(&[0])
+                .with_predicate(predicate)
+                .with_pushdown_filters(true) // note that this is true!
+                .with_reorder_filters(true)
+                .build()
         };
 
         // Filter should match the partition value and data value
@@ -1207,211 +1595,260 @@ mod test {
             u64::try_from(data_size).unwrap(),
         );
         file.partition_values = vec![ScalarValue::Int32(Some(1))];
+        file.statistics = Some(Arc::new(
+            Statistics::default().add_column_statistics(
+                ColumnStatistics::new_unknown()
+                    .with_min_value(Precision::Exact(ScalarValue::Int32(Some(1))))
+                    .with_max_value(Precision::Exact(ScalarValue::Int32(Some(3))))
+                    .with_null_count(Precision::Exact(0)),
+            ),
+        ));
 
         let table_schema = Arc::new(Schema::new(vec![
-            Field::new("part", DataType::Int32, false),
             Field::new("a", DataType::Int32, false),
+            Field::new("part", DataType::Int32, false),
         ]));
 
+        let table_schema_for_opener = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("part", DataType::Int32, false))],
+        );
         let make_opener = |predicate| {
-            ParquetOpener {
-                partition_index: 0,
-                projection: Arc::new([0]),
-                batch_size: 1024,
-                limit: None,
-                predicate: Some(predicate),
-                logical_file_schema: file_schema.clone(),
-                metadata_size_hint: None,
-                metrics: ExecutionPlanMetricsSet::new(),
-                parquet_file_reader_factory: Arc::new(
-                    DefaultParquetFileReaderFactory::new(Arc::clone(&store)),
-                ),
-                partition_fields: vec![Arc::new(Field::new(
-                    "part",
-                    DataType::Int32,
-                    false,
-                ))],
-                pushdown_filters: false, // note that this is false!
-                reorder_filters: false,
-                enable_page_index: false,
-                enable_bloom_filter: false,
-                schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
-                enable_row_group_stats_pruning: true,
-                coerce_int96: None,
-                #[cfg(feature = "parquet_encryption")]
-                file_decryption_properties: None,
-                expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)),
-                #[cfg(feature = "parquet_encryption")]
-                encryption_factory: None,
-                max_predicate_cache_size: None,
-            }
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_table_schema(table_schema_for_opener.clone())
+                .with_projection_indices(&[0])
+                .with_predicate(predicate)
+                .build()
         };
 
-        // Filter should NOT match the stats but the file is never attempted to be pruned because the filters are not dynamic
-        let expr = col("part").eq(lit(2));
+        // This filter could prune based on statistics, but since it's not dynamic it's not applied for pruning
+        // (the assumption is this happened already at planning time)
+        let expr = col("a").eq(lit(42));
         let predicate = logical2physical(&expr, &table_schema);
         let opener = make_opener(predicate);
         let stream = opener.open(file.clone()).unwrap().await.unwrap();
         let (num_batches, num_rows) = count_batches_and_rows(stream).await;
-        assert_eq!(num_batches, 1);
-        assert_eq!(num_rows, 3);
+        assert_eq!(num_batches, 0);
+        assert_eq!(num_rows, 0);
 
-        // If we make the filter dynamic, it should prune
+        // If we make the filter dynamic, it should prune.
+        // This allows dynamic filters to prune partitions/files even if they are populated late into execution.
         let predicate = make_dynamic_expr(logical2physical(&expr, &table_schema));
         let opener = make_opener(predicate);
         let stream = opener.open(file.clone()).unwrap().await.unwrap();
         let (num_batches, num_rows) = count_batches_and_rows(stream).await;
         assert_eq!(num_batches, 0);
         assert_eq!(num_rows, 0);
-    }
 
-    fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize {
-        match metrics.sum_by_name(metric_name) {
-            Some(v) => v.as_usize(),
-            _ => {
-                panic!(
-                    "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
-                );
-            }
-        }
+        // If we have a filter that touches partition columns only and is dynamic, it should prune even if there are no stats.
+        file.statistics = Some(Arc::new(Statistics::new_unknown(&file_schema)));
+        let expr = col("part").eq(lit(2));
+        let predicate = make_dynamic_expr(logical2physical(&expr, &table_schema));
+        let opener = make_opener(predicate);
+        let stream = opener.open(file.clone()).unwrap().await.unwrap();
+        let (num_batches, num_rows) = count_batches_and_rows(stream).await;
+        assert_eq!(num_batches, 0);
+        assert_eq!(num_rows, 0);
+
+        // Similarly a filter that combines partition and data columns should prune even if there are no stats.
+        let expr = col("part").eq(lit(2)).and(col("a").eq(lit(42)));
+        let predicate = make_dynamic_expr(logical2physical(&expr, &table_schema));
+        let opener = make_opener(predicate);
+        let stream = opener.open(file.clone()).unwrap().await.unwrap();
+        let (num_batches, num_rows) = count_batches_and_rows(stream).await;
+        assert_eq!(num_batches, 0);
+        assert_eq!(num_rows, 0);
     }
 
     #[tokio::test]
-    async fn test_custom_schema_adapter_no_rewriter() {
-        // Make a hardcoded schema adapter that adds a new column "b" with default value 0.0
-        // and converts the first column "a" from Int32 to UInt64.
-        #[derive(Debug, Clone)]
-        struct CustomSchemaMapper;
-
-        impl SchemaMapper for CustomSchemaMapper {
-            fn map_batch(
-                &self,
-                batch: arrow::array::RecordBatch,
-            ) -> datafusion_common::Result<arrow::array::RecordBatch> {
-                let a_column = cast(batch.column(0), &DataType::UInt64)?;
-                // Add in a new column "b" with default value 0.0
-                let b_column =
-                    arrow::array::Float64Array::from(vec![Some(0.0); batch.num_rows()]);
-                let columns = vec![a_column, Arc::new(b_column)];
-                let new_schema = Arc::new(Schema::new(vec![
-                    Field::new("a", DataType::UInt64, false),
-                    Field::new("b", DataType::Float64, false),
-                ]));
-                Ok(arrow::record_batch::RecordBatch::try_new(
-                    new_schema, columns,
-                )?)
-            }
+    async fn test_reverse_scan_row_groups() {
+        use parquet::file::properties::WriterProperties;
 
-            fn map_column_statistics(
-                &self,
-                file_col_statistics: &[ColumnStatistics],
-            ) -> datafusion_common::Result<Vec<ColumnStatistics>> {
-                Ok(vec![
-                    file_col_statistics[0].clone(),
-                    ColumnStatistics::new_unknown(),
-                ])
-            }
-        }
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
 
-        #[derive(Debug, Clone)]
-        struct CustomSchemaAdapter;
+        // Create multiple batches to ensure multiple row groups
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(4), Some(5), Some(6)])).unwrap();
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(7), Some(8), Some(9)])).unwrap();
+
+        // Write parquet file with multiple row groups
+        // Force small row groups by setting max_row_group_size
+        let props = WriterProperties::builder()
+            .set_max_row_group_size(3) // Force each batch into its own row group
+            .build();
+
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
 
-        impl SchemaAdapter for CustomSchemaAdapter {
-            fn map_schema(
-                &self,
-                _file_schema: &Schema,
-            ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)>
-            {
-                let mapper = Arc::new(CustomSchemaMapper);
-                let projection = vec![0]; // We only need to read the first column "a" from the file
-                Ok((mapper, projection))
-            }
+        let schema = batch1.schema();
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        );
 
-            fn map_column_index(
-                &self,
-                index: usize,
-                file_schema: &Schema,
-            ) -> Option<usize> {
-                if index < file_schema.fields().len() {
-                    Some(index)
-                } else {
-                    None // The new column "b" is not in the original schema
-                }
-            }
-        }
+        let make_opener = |reverse_scan: bool| {
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_schema(Arc::clone(&schema))
+                .with_projection_indices(&[0])
+                .with_reverse_row_groups(reverse_scan)
+                .build()
+        };
 
-        #[derive(Debug, Clone)]
-        struct CustomSchemaAdapterFactory;
+        // Test normal scan (forward)
+        let opener = make_opener(false);
+        let stream = opener.open(file.clone()).unwrap().await.unwrap();
+        let forward_values = collect_int32_values(stream).await;
 
-        impl SchemaAdapterFactory for CustomSchemaAdapterFactory {
-            fn create(
-                &self,
-                _projected_table_schema: SchemaRef,
-                _table_schema: SchemaRef,
-            ) -> Box<dyn SchemaAdapter> {
-                Box::new(CustomSchemaAdapter)
-            }
-        }
+        // Test reverse scan
+        let opener = make_opener(true);
+        let stream = opener.open(file.clone()).unwrap().await.unwrap();
+        let reverse_values = collect_int32_values(stream).await;
+
+        // The forward scan should return data in the order written
+        assert_eq!(forward_values, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
 
-        // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expression
+        // With reverse scan, row groups are reversed, so we expect:
+        // Row group 3 (7,8,9), then row group 2 (4,5,6), then row group 1 (1,2,3)
+        assert_eq!(reverse_values, vec![7, 8, 9, 4, 5, 6, 1, 2, 3]);
+    }
+
+    #[tokio::test]
+    async fn test_reverse_scan_single_row_group() {
         let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        // Create a single batch (single row group)
         let batch = record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
-        // Write out the batch to a Parquet file
         let data_size =
             write_parquet(Arc::clone(&store), "test.parquet", batch.clone()).await;
+
+        let schema = batch.schema();
         let file = PartitionedFile::new(
             "test.parquet".to_string(),
             u64::try_from(data_size).unwrap(),
         );
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::UInt64, false),
-            Field::new("b", DataType::Float64, false),
-        ]));
 
-        let make_opener = |predicate| ParquetOpener {
-            partition_index: 0,
-            projection: Arc::new([0, 1]),
-            batch_size: 1024,
-            limit: None,
-            predicate: Some(predicate),
-            logical_file_schema: Arc::clone(&table_schema),
-            metadata_size_hint: None,
-            metrics: ExecutionPlanMetricsSet::new(),
-            parquet_file_reader_factory: Arc::new(DefaultParquetFileReaderFactory::new(
-                Arc::clone(&store),
-            )),
-            partition_fields: vec![],
-            pushdown_filters: true,
-            reorder_filters: false,
-            enable_page_index: false,
-            enable_bloom_filter: false,
-            schema_adapter_factory: Arc::new(CustomSchemaAdapterFactory),
-            enable_row_group_stats_pruning: false,
-            coerce_int96: None,
-            #[cfg(feature = "parquet_encryption")]
-            file_decryption_properties: None,
-            expr_adapter_factory: None,
-            #[cfg(feature = "parquet_encryption")]
-            encryption_factory: None,
-            max_predicate_cache_size: None,
+        let make_opener = |reverse_scan: bool| {
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_schema(Arc::clone(&schema))
+                .with_projection_indices(&[0])
+                .with_reverse_row_groups(reverse_scan)
+                .build()
         };
 
-        let predicate = logical2physical(&col("a").eq(lit(1u64)), &table_schema);
-        let opener = make_opener(predicate);
+        // With a single row group, forward and reverse should be the same
+        // (only the row group order is reversed, not the rows within)
+        let opener_forward = make_opener(false);
+        let stream_forward = opener_forward.open(file.clone()).unwrap().await.unwrap();
+        let (batches_forward, _) = count_batches_and_rows(stream_forward).await;
+
+        let opener_reverse = make_opener(true);
+        let stream_reverse = opener_reverse.open(file).unwrap().await.unwrap();
+        let (batches_reverse, _) = count_batches_and_rows(stream_reverse).await;
+
+        // Both should have the same number of batches since there's only one row group
+        assert_eq!(batches_forward, batches_reverse);
+    }
+
+    #[tokio::test]
+    async fn test_reverse_scan_with_row_selection() {
+        use parquet::file::properties::WriterProperties;
+
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        // Create 3 batches with DIFFERENT selection patterns
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3), Some(4)]))
+                .unwrap(); // 4 rows
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(5), Some(6), Some(7), Some(8)]))
+                .unwrap(); // 4 rows
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(9), Some(10), Some(11), Some(12)]))
+                .unwrap(); // 4 rows
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_size(4)
+            .build();
+
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
+
+        let schema = batch1.schema();
+
+        use crate::ParquetAccessPlan;
+        use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+
+        let mut access_plan = ParquetAccessPlan::new_all(3);
+        // Row group 0: skip first 2, select last 2 (should get: 3, 4)
+        access_plan.scan_selection(
+            0,
+            RowSelection::from(vec![RowSelector::skip(2), RowSelector::select(2)]),
+        );
+        // Row group 1: select all (should get: 5, 6, 7, 8)
+        // Row group 2: select first 2, skip last 2 (should get: 9, 10)
+        access_plan.scan_selection(
+            2,
+            RowSelection::from(vec![RowSelector::select(2), RowSelector::skip(2)]),
+        );
+
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        )
+        .with_extensions(Arc::new(access_plan));
+
+        let make_opener = |reverse_scan: bool| {
+            ParquetOpenerBuilder::new()
+                .with_store(Arc::clone(&store))
+                .with_schema(Arc::clone(&schema))
+                .with_projection_indices(&[0])
+                .with_reverse_row_groups(reverse_scan)
+                .build()
+        };
+
+        // Forward scan: RG0(3,4), RG1(5,6,7,8), RG2(9,10)
+        let opener = make_opener(false);
         let stream = opener.open(file.clone()).unwrap().await.unwrap();
-        let batches = collect_batches(stream).await;
-
-        #[rustfmt::skip]
-        let expected = [
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| 1 | 0.0 |",
-            "+---+-----+",
-        ];
-        assert_batches_eq!(expected, &batches);
-        let metrics = opener.metrics.clone_inner();
-        assert_eq!(get_value(&metrics, "row_groups_pruned_statistics"), 0);
-        assert_eq!(get_value(&metrics, "pushdown_rows_pruned"), 2);
+        let forward_values = collect_int32_values(stream).await;
+
+        // Forward scan should produce: RG0(3,4), RG1(5,6,7,8), RG2(9,10)
+        assert_eq!(
+            forward_values,
+            vec![3, 4, 5, 6, 7, 8, 9, 10],
+            "Forward scan should select correct rows based on RowSelection"
+        );
+
+        // Reverse scan
+        // CORRECT behavior: reverse row groups AND their corresponding selections
+        // - RG2 is read first, WITH RG2's selection (select 2, skip 2) -> 9, 10
+        // - RG1 is read second, WITH RG1's selection (select all) -> 5, 6, 7, 8
+        // - RG0 is read third, WITH RG0's selection (skip 2, select 2) -> 3, 4
+        let opener = make_opener(true);
+        let stream = opener.open(file).unwrap().await.unwrap();
+        let reverse_values = collect_int32_values(stream).await;
+
+        // Correct expected result: row groups reversed but each keeps its own selection
+        // RG2 with its selection (9,10), RG1 with its selection (5,6,7,8), RG0 with its selection (3,4)
+        assert_eq!(
+            reverse_values,
+            vec![9, 10, 5, 6, 7, 8, 3, 4],
+            "Reverse scan should reverse row group order while maintaining correct RowSelection for each group"
+        );
     }
 }
diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs
index 2698b6c5fbb67..e25e33835f790 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -28,9 +28,9 @@ use arrow::{
     array::ArrayRef,
     datatypes::{Schema, SchemaRef},
 };
-use datafusion_common::pruning::PruningStatistics;
 use datafusion_common::ScalarValue;
-use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
+use datafusion_common::pruning::PruningStatistics;
+use datafusion_physical_expr::{PhysicalExpr, split_conjunction};
 use datafusion_pruning::PruningPredicate;
 
 use log::{debug, trace};
@@ -118,6 +118,7 @@ pub struct PagePruningAccessPlanFilter {
 impl PagePruningAccessPlanFilter {
     /// Create a new [`PagePruningAccessPlanFilter`] from a physical
     /// expression.
+    #[expect(clippy::needless_pass_by_value)]
     pub fn new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Self {
         // extract any single column predicates
         let predicates = split_conjunction(expr)
@@ -177,9 +178,10 @@ impl PagePruningAccessPlanFilter {
             || parquet_metadata.column_index().is_none()
         {
             debug!(
-                    "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}",
-                    parquet_metadata.offset_index().is_some(), parquet_metadata.column_index().is_some()
-                );
+                "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}",
+                parquet_metadata.offset_index().is_some(),
+                parquet_metadata.column_index().is_some()
+            );
             return access_plan;
         };
 
@@ -229,7 +231,8 @@ impl PagePruningAccessPlanFilter {
                     continue;
                 };
 
-                debug!("Use filter and page index to create RowSelection {:?} from predicate: {:?}",
+                debug!(
+                    "Use filter and page index to create RowSelection {:?} from predicate: {:?}",
                     &selection,
                     predicate.predicate_expr(),
                 );
@@ -252,7 +255,9 @@ impl PagePruningAccessPlanFilter {
                 let rows_selected = overall_selection.row_count();
                 if rows_selected > 0 {
                     let rows_skipped = overall_selection.skipped_row_count();
-                    trace!("Overall selection from predicate skipped {rows_skipped}, selected {rows_selected}: {overall_selection:?}");
+                    trace!(
+                        "Overall selection from predicate skipped {rows_skipped}, selected {rows_selected}: {overall_selection:?}"
+                    );
                     total_skip += rows_skipped;
                     total_select += rows_selected;
                     access_plan.scan_selection(row_group_index, overall_selection)
diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs
index 88a3cea5623bc..4291c9af76a63 100644
--- a/datafusion/datasource-parquet/src/reader.rs
+++ b/datafusion/datasource-parquet/src/reader.rs
@@ -18,15 +18,15 @@
 //! [`ParquetFileReaderFactory`] and [`DefaultParquetFileReaderFactory`] for
 //! low level control of parquet file readers
 
-use crate::metadata::DFParquetMetadata;
 use crate::ParquetFileMetrics;
+use crate::metadata::DFParquetMetadata;
 use bytes::Bytes;
 use datafusion_datasource::PartitionedFile;
 use datafusion_execution::cache::cache_manager::FileMetadata;
 use datafusion_execution::cache::cache_manager::FileMetadataCache;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
-use futures::future::BoxFuture;
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use object_store::ObjectStore;
 use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
@@ -97,6 +97,7 @@ impl DefaultParquetFileReaderFactory {
 pub struct ParquetFileReader {
     pub file_metrics: ParquetFileMetrics,
     pub inner: ParquetObjectReader,
+    pub partitioned_file: PartitionedFile,
 }
 
 impl AsyncFileReader for ParquetFileReader {
@@ -129,6 +130,18 @@ impl AsyncFileReader for ParquetFileReader {
     }
 }
 
+impl Drop for ParquetFileReader {
+    fn drop(&mut self) {
+        self.file_metrics
+            .scan_efficiency_ratio
+            .add_part(self.file_metrics.bytes_scanned.value());
+        // Multiple ParquetFileReaders may run, so we set_total to avoid adding the total multiple times
+        self.file_metrics
+            .scan_efficiency_ratio
+            .set_total(self.partitioned_file.object_meta.size as usize);
+    }
+}
+
 impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
     fn create_reader(
         &self,
@@ -156,6 +169,7 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
         Ok(Box::new(ParquetFileReader {
             inner,
             file_metrics,
+            partitioned_file,
         }))
     }
 }
@@ -208,14 +222,14 @@ impl ParquetFileReaderFactory for CachedParquetFileReaderFactory {
             inner = inner.with_footer_size_hint(hint)
         };
 
-        Ok(Box::new(CachedParquetFileReader {
-            store: Arc::clone(&self.store),
-            inner,
+        Ok(Box::new(CachedParquetFileReader::new(
             file_metrics,
+            Arc::clone(&self.store),
+            inner,
             partitioned_file,
-            metadata_cache: Arc::clone(&self.metadata_cache),
+            Arc::clone(&self.metadata_cache),
             metadata_size_hint,
-        }))
+        )))
     }
 }
 
@@ -231,6 +245,26 @@ pub struct CachedParquetFileReader {
     metadata_size_hint: Option<usize>,
 }
 
+impl CachedParquetFileReader {
+    pub fn new(
+        file_metrics: ParquetFileMetrics,
+        store: Arc<dyn ObjectStore>,
+        inner: ParquetObjectReader,
+        partitioned_file: PartitionedFile,
+        metadata_cache: Arc<dyn FileMetadataCache>,
+        metadata_size_hint: Option<usize>,
+    ) -> Self {
+        Self {
+            file_metrics,
+            store,
+            inner,
+            partitioned_file,
+            metadata_cache,
+            metadata_size_hint,
+        }
+    }
+}
+
 impl AsyncFileReader for CachedParquetFileReader {
     fn get_bytes(
         &mut self,
@@ -255,7 +289,8 @@ impl AsyncFileReader for CachedParquetFileReader {
 
     fn get_metadata<'a>(
         &'a mut self,
-        #[allow(unused_variables)] options: Option<&'a ArrowReaderOptions>,
+        #[cfg_attr(not(feature = "parquet_encryption"), expect(unused_variables))]
+        options: Option<&'a ArrowReaderOptions>,
     ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
         let object_meta = self.partitioned_file.object_meta.clone();
         let metadata_cache = Arc::clone(&self.metadata_cache);
@@ -286,6 +321,18 @@ impl AsyncFileReader for CachedParquetFileReader {
     }
 }
 
+impl Drop for CachedParquetFileReader {
+    fn drop(&mut self) {
+        self.file_metrics
+            .scan_efficiency_ratio
+            .add_part(self.file_metrics.bytes_scanned.value());
+        // Multiple ParquetFileReaders may run, so we set_total to avoid adding the total multiple times
+        self.file_metrics
+            .scan_efficiency_ratio
+            .set_total(self.partitioned_file.object_meta.size as usize);
+    }
+}
+
 /// Wrapper to implement [`FileMetadata`] for [`ParquetMetaData`].
 pub struct CachedParquetMetaData(Arc<ParquetMetaData>);
 
diff --git a/datafusion/datasource-parquet/src/row_filter.rs b/datafusion/datasource-parquet/src/row_filter.rs
index 660b32f486120..ba3b29be40d74 100644
--- a/datafusion/datasource-parquet/src/row_filter.rs
+++ b/datafusion/datasource-parquet/src/row_filter.rs
@@ -67,17 +67,16 @@ use arrow::array::BooleanArray;
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 use arrow::error::{ArrowError, Result as ArrowResult};
 use arrow::record_batch::RecordBatch;
-use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter};
 use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter};
 use parquet::file::metadata::ParquetMetaData;
 
+use datafusion_common::Result;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
-use datafusion_common::Result;
-use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaMapper};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::reassign_expr_columns;
-use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
+use datafusion_physical_expr::{PhysicalExpr, split_conjunction};
 
 use datafusion_physical_plan::metrics;
 
@@ -106,8 +105,6 @@ pub(crate) struct DatafusionArrowPredicate {
     rows_matched: metrics::Count,
     /// how long was spent evaluating this predicate
     time: metrics::Time,
-    /// used to perform type coercion while filtering rows
-    schema_mapper: Arc<dyn SchemaMapper>,
 }
 
 impl DatafusionArrowPredicate {
@@ -131,7 +128,6 @@ impl DatafusionArrowPredicate {
             rows_pruned,
             rows_matched,
             time,
-            schema_mapper: candidate.schema_mapper,
         })
     }
 }
@@ -142,8 +138,6 @@ impl ArrowPredicate for DatafusionArrowPredicate {
     }
 
     fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult<BooleanArray> {
-        let batch = self.schema_mapper.map_batch(batch)?;
-
         // scoped timer updates on drop
         let mut timer = self.time.timer();
 
@@ -182,73 +176,32 @@ pub(crate) struct FilterCandidate {
     required_bytes: usize,
     /// Can this filter use an index (e.g. a page index) to prune rows?
     can_use_index: bool,
-    /// The projection to read from the file schema to get the columns
-    /// required to pass through a `SchemaMapper` to the table schema
-    /// upon which we then evaluate the filter expression.
+    /// Column indices into the parquet file schema required to evaluate this filter.
     projection: Vec<usize>,
-    ///  A `SchemaMapper` used to map batches read from the file schema to
-    /// the filter's projection of the table schema.
-    schema_mapper: Arc<dyn SchemaMapper>,
-    /// The projected table schema that this filter references
+    /// The Arrow schema containing only the columns required by this filter,
+    /// projected from the file's Arrow schema.
     filter_schema: SchemaRef,
 }
 
 /// Helper to build a `FilterCandidate`.
 ///
-/// This will do several things
+/// This will do several things:
 /// 1. Determine the columns required to evaluate the expression
 /// 2. Calculate data required to estimate the cost of evaluating the filter
-/// 3. Rewrite column expressions in the predicate which reference columns not
-///    in the particular file schema.
-///
-/// # Schema Rewrite
-///
-/// When parquet files are read in the context of "schema evolution" there are
-/// potentially wo schemas:
-///
-/// 1. The table schema (the columns of the table that the parquet file is part of)
-/// 2. The file schema (the columns actually in the parquet file)
-///
-/// There are times when the table schema contains columns that are not in the
-/// file schema, such as when new columns have been added in new parquet files
-/// but old files do not have the columns.
 ///
-/// When a file is missing a column from the table schema, the value of the
-/// missing column is filled in by a `SchemaAdapter` (by default as `NULL`).
-///
-/// When a predicate is pushed down to the parquet reader, the predicate is
-/// evaluated in the context of the file schema.
-/// For each predicate we build a filter schema which is the projection of the table
-/// schema that contains only the columns that this filter references.
-/// If any columns from the file schema are missing from a particular file they are
-/// added by the `SchemaAdapter`, by default as `NULL`.
+/// Note: This does *not* handle any adaptation of the expression to the file schema.
+/// The expression must already be adapted before being passed in here, generally using
+/// [`PhysicalExprAdapter`](datafusion_physical_expr_adapter::PhysicalExprAdapter).
 struct FilterCandidateBuilder {
     expr: Arc<dyn PhysicalExpr>,
-    /// The schema of this parquet file.
-    /// Columns may have different types from the table schema and there may be
-    /// columns in the file schema that are not in the table schema or columns that
-    /// are in the table schema that are not in the file schema.
+    /// The Arrow schema of this parquet file (the result of converting the
+    /// parquet schema to Arrow, potentially with type coercions applied).
     file_schema: SchemaRef,
-    /// The schema of the table (merged schema) -- columns may be in different
-    /// order than in the file and have columns that are not in the file schema
-    table_schema: SchemaRef,
-    /// A `SchemaAdapterFactory` used to map the file schema to the table schema.
-    schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
 }
 
 impl FilterCandidateBuilder {
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        file_schema: Arc<Schema>,
-        table_schema: Arc<Schema>,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            expr,
-            file_schema,
-            table_schema,
-            schema_adapter_factory,
-        }
+    pub fn new(expr: Arc<dyn PhysicalExpr>, file_schema: Arc<Schema>) -> Self {
+        Self { expr, file_schema }
     }
 
     /// Attempt to build a `FilterCandidate` from the expression
@@ -259,72 +212,64 @@ impl FilterCandidateBuilder {
     /// * `Ok(None)` if the expression cannot be used as an ArrowFilter
     /// * `Err(e)` if an error occurs while building the candidate
     pub fn build(self, metadata: &ParquetMetaData) -> Result<Option<FilterCandidate>> {
-        let Some(required_indices_into_table_schema) =
-            pushdown_columns(&self.expr, &self.table_schema)?
+        let Some(required_column_indices) =
+            pushdown_columns(&self.expr, &self.file_schema)?
         else {
             return Ok(None);
         };
 
-        let projected_table_schema = Arc::new(
-            self.table_schema
-                .project(&required_indices_into_table_schema)?,
-        );
-
-        let (schema_mapper, projection_into_file_schema) = self
-            .schema_adapter_factory
-            .create(Arc::clone(&projected_table_schema), self.table_schema)
-            .map_schema(&self.file_schema)?;
+        let projected_schema =
+            Arc::new(self.file_schema.project(&required_column_indices)?);
 
-        let required_bytes = size_of_columns(&projection_into_file_schema, metadata)?;
-        let can_use_index = columns_sorted(&projection_into_file_schema, metadata)?;
+        let required_bytes = size_of_columns(&required_column_indices, metadata)?;
+        let can_use_index = columns_sorted(&required_column_indices, metadata)?;
 
         Ok(Some(FilterCandidate {
             expr: self.expr,
             required_bytes,
             can_use_index,
-            projection: projection_into_file_schema,
-            schema_mapper: Arc::clone(&schema_mapper),
-            filter_schema: Arc::clone(&projected_table_schema),
+            projection: required_column_indices,
+            filter_schema: projected_schema,
         }))
     }
 }
 
-// a struct that implements TreeNodeRewriter to traverse a PhysicalExpr tree structure to determine
-// if any column references in the expression would prevent it from being predicate-pushed-down.
-// if non_primitive_columns || projected_columns, it can't be pushed down.
-// can't be reused between calls to `rewrite`; each construction must be used only once.
+/// Traverses a `PhysicalExpr` tree to determine if any column references would
+/// prevent the expression from being pushed down to the parquet decoder.
+///
+/// An expression cannot be pushed down if it references:
+/// - Non-primitive columns (like structs or lists)
+/// - Columns that don't exist in the file schema
 struct PushdownChecker<'schema> {
     /// Does the expression require any non-primitive columns (like structs)?
     non_primitive_columns: bool,
-    /// Does the expression reference any columns that are in the table
-    /// schema but not in the file schema?
-    /// This includes partition columns and projected columns.
+    /// Does the expression reference any columns not present in the file schema?
     projected_columns: bool,
-    // Indices into the table schema of the columns required to evaluate the expression
+    /// Indices into the file schema of columns required to evaluate the expression.
     required_columns: BTreeSet<usize>,
-    table_schema: &'schema Schema,
+    /// The Arrow schema of the parquet file.
+    file_schema: &'schema Schema,
 }
 
 impl<'schema> PushdownChecker<'schema> {
-    fn new(table_schema: &'schema Schema) -> Self {
+    fn new(file_schema: &'schema Schema) -> Self {
         Self {
             non_primitive_columns: false,
             projected_columns: false,
             required_columns: BTreeSet::default(),
-            table_schema,
+            file_schema,
         }
     }
 
     fn check_single_column(&mut self, column_name: &str) -> Option<TreeNodeRecursion> {
-        if let Ok(idx) = self.table_schema.index_of(column_name) {
+        if let Ok(idx) = self.file_schema.index_of(column_name) {
             self.required_columns.insert(idx);
-            if DataType::is_nested(self.table_schema.field(idx).data_type()) {
+            if DataType::is_nested(self.file_schema.field(idx).data_type()) {
                 self.non_primitive_columns = true;
                 return Some(TreeNodeRecursion::Jump);
             }
         } else {
-            // If the column does not exist in the (un-projected) table schema then
-            // it must be a projected column.
+            // Column does not exist in the file schema, so we can't push this down.
             self.projected_columns = true;
             return Some(TreeNodeRecursion::Jump);
         }
@@ -342,35 +287,44 @@ impl TreeNodeVisitor<'_> for PushdownChecker<'_> {
     type Node = Arc<dyn PhysicalExpr>;
 
     fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        if let Some(column) = node.as_any().downcast_ref::<Column>() {
-            if let Some(recursion) = self.check_single_column(column.name()) {
-                return Ok(recursion);
-            }
+        if let Some(column) = node.as_any().downcast_ref::<Column>()
+            && let Some(recursion) = self.check_single_column(column.name())
+        {
+            return Ok(recursion);
         }
 
         Ok(TreeNodeRecursion::Continue)
     }
 }
 
-// Checks if a given expression can be pushed down into `DataSourceExec` as opposed to being evaluated
-// post-parquet-scan in a `FilterExec`. If it can be pushed down, this returns all the
-// columns in the given expression so that they can be used in the parquet scanning, along with the
-// expression rewritten as defined in [`PushdownChecker::f_up`]
+/// Checks if a given expression can be pushed down to the parquet decoder.
+///
+/// Returns `Some(column_indices)` if the expression can be pushed down,
+/// where `column_indices` are the indices into the file schema of all columns
+/// required to evaluate the expression.
+///
+/// Returns `None` if the expression cannot be pushed down (e.g., references
+/// non-primitive types or columns not in the file).
 fn pushdown_columns(
     expr: &Arc<dyn PhysicalExpr>,
-    table_schema: &Schema,
+    file_schema: &Schema,
 ) -> Result<Option<Vec<usize>>> {
-    let mut checker = PushdownChecker::new(table_schema);
+    let mut checker = PushdownChecker::new(file_schema);
     expr.visit(&mut checker)?;
     Ok((!checker.prevents_pushdown())
         .then_some(checker.required_columns.into_iter().collect()))
 }
 
-/// Recurses through expr as a tree, finds all `column`s, and checks if any of them would prevent
-/// this expression from being predicate pushed down. If any of them would, this returns false.
-/// Otherwise, true.
-/// Note that the schema passed in here is *not* the physical file schema (as it is not available at that point in time);
-/// it is the schema of the table that this expression is being evaluated against minus any projected columns and partition columns.
+/// Checks if a predicate expression can be pushed down to the parquet decoder.
+///
+/// Returns `true` if all columns referenced by the expression:
+/// - Exist in the provided schema
+/// - Are primitive types (not structs, lists, etc.)
+///
+/// # Arguments
+/// * `expr` - The filter expression to check
+/// * `file_schema` - The Arrow schema of the parquet file (or table schema when
+///   the file schema is not yet available during planning)
 pub fn can_expr_be_pushed_down_with_schemas(
     expr: &Arc<dyn PhysicalExpr>,
     file_schema: &Schema,
@@ -408,28 +362,33 @@ fn columns_sorted(_columns: &[usize], _metadata: &ParquetMetaData) -> Result<boo
     Ok(false)
 }
 
-/// Build a [`RowFilter`] from the given predicate `Expr` if possible
+/// Build a [`RowFilter`] from the given predicate expression if possible.
+///
+/// # Arguments
+/// * `expr` - The filter predicate, already adapted to reference columns in `file_schema`
+/// * `file_schema` - The Arrow schema of the parquet file (the result of converting
+///   the parquet schema to Arrow, potentially with type coercions applied)
+/// * `metadata` - Parquet file metadata used for cost estimation
+/// * `reorder_predicates` - If true, reorder predicates to minimize I/O
+/// * `file_metrics` - Metrics for tracking filter performance
 ///
-/// # returns
-/// * `Ok(Some(row_filter))` if the expression can be used as RowFilter
-/// * `Ok(None)` if the expression cannot be used as an RowFilter
+/// # Returns
+/// * `Ok(Some(row_filter))` if the expression can be used as a RowFilter
+/// * `Ok(None)` if the expression cannot be used as a RowFilter
 /// * `Err(e)` if an error occurs while building the filter
 ///
-/// Note that the returned `RowFilter` may not contains all conjuncts in the
-/// original expression. This is because some conjuncts may not be able to be
-/// evaluated as an `ArrowPredicate` and will be ignored.
+/// Note: The returned `RowFilter` may not contain all conjuncts from the original
+/// expression. Conjuncts that cannot be evaluated as an `ArrowPredicate` are ignored.
 ///
 /// For example, if the expression is `a = 1 AND b = 2 AND c = 3` and `b = 2`
-/// can not be evaluated for some reason, the returned `RowFilter` will contain
-/// `a = 1` and `c = 3`.
+/// cannot be evaluated for some reason, the returned `RowFilter` will contain
+/// only `a = 1` and `c = 3`.
 pub fn build_row_filter(
     expr: &Arc<dyn PhysicalExpr>,
-    physical_file_schema: &SchemaRef,
-    predicate_file_schema: &SchemaRef,
+    file_schema: &SchemaRef,
     metadata: &ParquetMetaData,
     reorder_predicates: bool,
     file_metrics: &ParquetFileMetrics,
-    schema_adapter_factory: &Arc<dyn SchemaAdapterFactory>,
 ) -> Result<Option<RowFilter>> {
     let rows_pruned = &file_metrics.pushdown_rows_pruned;
     let rows_matched = &file_metrics.pushdown_rows_matched;
@@ -443,13 +402,8 @@ pub fn build_row_filter(
     let mut candidates: Vec<FilterCandidate> = predicates
         .into_iter()
         .map(|expr| {
-            FilterCandidateBuilder::new(
-                Arc::clone(expr),
-                Arc::clone(physical_file_schema),
-                Arc::clone(predicate_file_schema),
-                Arc::clone(schema_adapter_factory),
-            )
-            .build(metadata)
+            FilterCandidateBuilder::new(Arc::clone(expr), Arc::clone(file_schema))
+                .build(metadata)
         })
         .collect::<Result<Vec<_>, _>>()?
         .into_iter()
@@ -470,14 +424,33 @@ pub fn build_row_filter(
         });
     }
 
+    // To avoid double-counting metrics when multiple predicates are used:
+    // - All predicates should count rows_pruned (cumulative pruned rows)
+    // - Only the last predicate should count rows_matched (final result)
+    // This ensures: rows_matched + rows_pruned = total rows processed
+    let total_candidates = candidates.len();
+
     candidates
         .into_iter()
-        .map(|candidate| {
+        .enumerate()
+        .map(|(idx, candidate)| {
+            let is_last = idx == total_candidates - 1;
+
+            // All predicates share the pruned counter (cumulative)
+            let predicate_rows_pruned = rows_pruned.clone();
+
+            // Only the last predicate tracks matched rows (final result)
+            let predicate_rows_matched = if is_last {
+                rows_matched.clone()
+            } else {
+                metrics::Count::new()
+            };
+
             DatafusionArrowPredicate::try_new(
                 candidate,
                 metadata,
-                rows_pruned.clone(),
-                rows_matched.clone(),
+                predicate_rows_pruned,
+                predicate_rows_matched,
                 time.clone(),
             )
             .map(|pred| Box::new(pred) as _)
@@ -492,9 +465,11 @@ mod test {
     use datafusion_common::ScalarValue;
 
     use arrow::datatypes::{Field, TimeUnit::Nanosecond};
-    use datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory;
-    use datafusion_expr::{col, Expr};
+    use datafusion_expr::{Expr, col};
     use datafusion_physical_expr::planner::logical2physical;
+    use datafusion_physical_expr_adapter::{
+        DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory,
+    };
     use datafusion_physical_plan::metrics::{Count, Time};
 
     use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
@@ -519,17 +494,11 @@ mod test {
         let expr = col("int64_list").is_not_null();
         let expr = logical2physical(&expr, &table_schema);
 
-        let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory);
         let table_schema = Arc::new(table_schema.clone());
 
-        let candidate = FilterCandidateBuilder::new(
-            expr,
-            table_schema.clone(),
-            table_schema,
-            schema_adapter_factory,
-        )
-        .build(metadata)
-        .expect("building candidate");
+        let candidate = FilterCandidateBuilder::new(expr, table_schema)
+            .build(metadata)
+            .expect("building candidate");
 
         assert!(candidate.is_none());
     }
@@ -559,17 +528,14 @@ mod test {
             None,
         ));
         let expr = logical2physical(&expr, &table_schema);
-        let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory);
-        let table_schema = Arc::new(table_schema.clone());
-        let candidate = FilterCandidateBuilder::new(
-            expr,
-            file_schema.clone(),
-            table_schema.clone(),
-            schema_adapter_factory,
-        )
-        .build(&metadata)
-        .expect("building candidate")
-        .expect("candidate expected");
+        let expr = DefaultPhysicalExprAdapterFactory {}
+            .create(Arc::new(table_schema.clone()), Arc::clone(&file_schema))
+            .rewrite(expr)
+            .expect("rewriting expression");
+        let candidate = FilterCandidateBuilder::new(expr, file_schema.clone())
+            .build(&metadata)
+            .expect("building candidate")
+            .expect("candidate expected");
 
         let mut row_filter = DatafusionArrowPredicate::try_new(
             candidate,
@@ -600,16 +566,15 @@ mod test {
             None,
         ));
         let expr = logical2physical(&expr, &table_schema);
-        let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory);
-        let candidate = FilterCandidateBuilder::new(
-            expr,
-            file_schema,
-            table_schema,
-            schema_adapter_factory,
-        )
-        .build(&metadata)
-        .expect("building candidate")
-        .expect("candidate expected");
+        // Rewrite the expression to add CastExpr for type coercion
+        let expr = DefaultPhysicalExprAdapterFactory {}
+            .create(Arc::new(table_schema), Arc::clone(&file_schema))
+            .rewrite(expr)
+            .expect("rewriting expression");
+        let candidate = FilterCandidateBuilder::new(expr, file_schema)
+            .build(&metadata)
+            .expect("building candidate")
+            .expect("candidate expected");
 
         let mut row_filter = DatafusionArrowPredicate::try_new(
             candidate,
diff --git a/datafusion/datasource-parquet/src/row_group_filter.rs b/datafusion/datasource-parquet/src/row_group_filter.rs
index 2043f75070b5c..1264197609f3f 100644
--- a/datafusion/datasource-parquet/src/row_group_filter.rs
+++ b/datafusion/datasource-parquet/src/row_group_filter.rs
@@ -31,7 +31,7 @@ use parquet::basic::Type;
 use parquet::data_type::Decimal;
 use parquet::schema::types::SchemaDescriptor;
 use parquet::{
-    arrow::{async_reader::AsyncFileReader, ParquetRecordBatchStreamBuilder},
+    arrow::{ParquetRecordBatchStreamBuilder, async_reader::AsyncFileReader},
     bloom_filter::Sbbf,
     file::metadata::RowGroupMetaData,
 };
@@ -444,11 +444,11 @@ mod tests {
     use arrow::datatypes::DataType::Decimal128;
     use arrow::datatypes::{DataType, Field};
     use datafusion_common::Result;
-    use datafusion_expr::{cast, col, lit, Expr};
+    use datafusion_expr::{Expr, cast, col, lit};
     use datafusion_physical_expr::planner::logical2physical;
     use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
-    use parquet::arrow::async_reader::ParquetObjectReader;
     use parquet::arrow::ArrowSchemaConverter;
+    use parquet::arrow::async_reader::ParquetObjectReader;
     use parquet::basic::LogicalType;
     use parquet::data_type::{ByteArray, FixedLenByteArray};
     use parquet::file::metadata::ColumnChunkMetaData;
@@ -1425,7 +1425,10 @@ mod tests {
                 }
                 ExpectedPruning::Some(expected) => {
                     let actual = row_groups.access_plan.row_group_indexes();
-                    assert_eq!(expected, &actual, "Unexpected row groups pruned. Expected {expected:?}, got {actual:?}");
+                    assert_eq!(
+                        expected, &actual,
+                        "Unexpected row groups pruned. Expected {expected:?}, got {actual:?}"
+                    );
                 }
             }
         }
@@ -1533,6 +1536,7 @@ mod tests {
         data: bytes::Bytes,
         pruning_predicate: &PruningPredicate,
     ) -> Result<RowGroupAccessPlanFilter> {
+        use datafusion_datasource::PartitionedFile;
         use object_store::{ObjectMeta, ObjectStore};
 
         let object_meta = ObjectMeta {
@@ -1551,12 +1555,23 @@ mod tests {
         let metrics = ExecutionPlanMetricsSet::new();
         let file_metrics =
             ParquetFileMetrics::new(0, object_meta.location.as_ref(), &metrics);
-        let inner = ParquetObjectReader::new(Arc::new(in_memory), object_meta.location)
-            .with_file_size(object_meta.size);
+        let inner =
+            ParquetObjectReader::new(Arc::new(in_memory), object_meta.location.clone())
+                .with_file_size(object_meta.size);
+
+        let partitioned_file = PartitionedFile {
+            object_meta,
+            partition_values: vec![],
+            range: None,
+            statistics: None,
+            extensions: None,
+            metadata_size_hint: None,
+        };
 
         let reader = ParquetFileReader {
             inner,
             file_metrics: file_metrics.clone(),
+            partitioned_file,
         };
         let mut builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
 
diff --git a/datafusion/datasource-parquet/src/sort.rs b/datafusion/datasource-parquet/src/sort.rs
new file mode 100644
index 0000000000000..4255d4d6960b1
--- /dev/null
+++ b/datafusion/datasource-parquet/src/sort.rs
@@ -0,0 +1,407 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Sort-related utilities for Parquet scanning
+
+use datafusion_common::Result;
+use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+use parquet::file::metadata::ParquetMetaData;
+use std::collections::HashMap;
+
+/// Reverse a row selection to match reversed row group order.
+///
+/// When scanning row groups in reverse order, we need to adjust the row selection
+/// to account for the new ordering. This function:
+/// 1. Maps each selection to its corresponding row group
+/// 2. Reverses the order of row groups
+/// 3. Reconstructs the row selection for the new order
+///
+/// # Arguments
+/// * `row_selection` - Original row selection
+/// * `parquet_metadata` - Metadata containing row group information
+///
+/// # Returns
+/// A new `RowSelection` adjusted for reversed row group order
+pub fn reverse_row_selection(
+    row_selection: &RowSelection,
+    parquet_metadata: &ParquetMetaData,
+) -> Result<RowSelection> {
+    let rg_metadata = parquet_metadata.row_groups();
+
+    // Build a mapping of row group index to its row range in the file
+    let mut rg_row_ranges: Vec<(usize, usize, usize)> =
+        Vec::with_capacity(rg_metadata.len());
+    let mut current_row = 0;
+    for (rg_idx, rg) in rg_metadata.iter().enumerate() {
+        let num_rows = rg.num_rows() as usize;
+        rg_row_ranges.push((rg_idx, current_row, current_row + num_rows));
+        current_row += num_rows;
+    }
+
+    // Map selections to row groups
+    let mut rg_selections: HashMap<usize, Vec<RowSelector>> = HashMap::new();
+
+    let mut current_file_row = 0;
+    for selector in row_selection.iter() {
+        let selector_end = current_file_row + selector.row_count;
+
+        // Find which row groups this selector spans
+        for (rg_idx, rg_start, rg_end) in rg_row_ranges.iter() {
+            if current_file_row < *rg_end && selector_end > *rg_start {
+                // This selector overlaps with this row group
+                let overlap_start = current_file_row.max(*rg_start);
+                let overlap_end = selector_end.min(*rg_end);
+                let overlap_count = overlap_end - overlap_start;
+
+                if overlap_count > 0 {
+                    let entry = rg_selections.entry(*rg_idx).or_default();
+                    if selector.skip {
+                        entry.push(RowSelector::skip(overlap_count));
+                    } else {
+                        entry.push(RowSelector::select(overlap_count));
+                    }
+                }
+            }
+        }
+
+        current_file_row = selector_end;
+    }
+
+    // Build new selection for reversed row group order
+    let mut reversed_selectors = Vec::new();
+    for rg_idx in (0..rg_metadata.len()).rev() {
+        if let Some(selectors) = rg_selections.get(&rg_idx) {
+            reversed_selectors.extend(selectors.iter().cloned());
+        } else {
+            // No specific selection for this row group means select all
+            if let Some((_, start, end)) =
+                rg_row_ranges.iter().find(|(idx, _, _)| *idx == rg_idx)
+            {
+                reversed_selectors.push(RowSelector::select(end - start));
+            }
+        }
+    }
+
+    Ok(RowSelection::from(reversed_selectors))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use bytes::Bytes;
+    use parquet::arrow::ArrowWriter;
+    use parquet::file::reader::FileReader;
+    use parquet::file::serialized_reader::SerializedFileReader;
+    use std::sync::Arc;
+
+    /// Helper function to create a ParquetMetaData with specified row group sizes
+    /// by actually writing a parquet file in memory
+    fn create_test_metadata(row_group_sizes: Vec<i64>) -> ParquetMetaData {
+        // Create a simple schema
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+
+        // Create in-memory parquet file with the specified row groups
+        let mut buffer = Vec::new();
+        {
+            let props = parquet::file::properties::WriterProperties::builder()
+                .set_max_row_group_size(row_group_sizes[0] as usize)
+                .build();
+
+            let mut writer =
+                ArrowWriter::try_new(&mut buffer, schema.clone(), Some(props)).unwrap();
+
+            for &size in &row_group_sizes {
+                // Create a batch with the specified number of rows
+                let array = arrow::array::Int32Array::from(vec![1; size as usize]);
+                let batch = arrow::record_batch::RecordBatch::try_new(
+                    schema.clone(),
+                    vec![Arc::new(array)],
+                )
+                .unwrap();
+                writer.write(&batch).unwrap();
+            }
+            writer.close().unwrap();
+        }
+
+        // Read back the metadata
+        let bytes = Bytes::from(buffer);
+        let reader = SerializedFileReader::new(bytes).unwrap();
+        reader.metadata().clone()
+    }
+
+    #[test]
+    fn test_reverse_simple_selection() {
+        // 3 row groups with 100 rows each
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Select first 50 rows from first row group
+        let selection =
+            RowSelection::from(vec![RowSelector::select(50), RowSelector::skip(250)]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // Verify total selected rows remain the same
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+    }
+
+    #[test]
+    fn test_reverse_multi_row_group_selection() {
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Select rows spanning multiple row groups
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(50),
+            RowSelector::select(100), // Spans RG0 and RG1
+            RowSelector::skip(150),
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // Verify total selected rows remain the same
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+    }
+
+    #[test]
+    fn test_reverse_full_selection() {
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Select all rows
+        let selection = RowSelection::from(vec![RowSelector::select(300)]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // Should still select all rows, just in reversed row group order
+        let total_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(total_selected, 300);
+    }
+
+    #[test]
+    fn test_reverse_empty_selection() {
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Skip all rows
+        let selection = RowSelection::from(vec![RowSelector::skip(300)]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // Should still skip all rows
+        let total_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(total_selected, 0);
+    }
+
+    #[test]
+    fn test_reverse_with_different_row_group_sizes() {
+        let metadata = create_test_metadata(vec![50, 150, 100]);
+
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(25),
+            RowSelector::select(200), // Spans all row groups
+            RowSelector::skip(75),
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+    }
+
+    #[test]
+    fn test_reverse_single_row_group() {
+        let metadata = create_test_metadata(vec![100]);
+
+        let selection =
+            RowSelection::from(vec![RowSelector::select(50), RowSelector::skip(50)]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // With single row group, selection should remain the same
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+    }
+
+    #[test]
+    fn test_reverse_complex_pattern() {
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Complex pattern: select some, skip some, select some more
+        let selection = RowSelection::from(vec![
+            RowSelector::select(30),
+            RowSelector::skip(40),
+            RowSelector::select(80),
+            RowSelector::skip(50),
+            RowSelector::select(100),
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+        assert_eq!(original_selected, 210); // 30 + 80 + 100
+    }
+
+    #[test]
+    fn test_reverse_with_skipped_row_group() {
+        // This test covers the "no specific selection" code path (lines 90-95)
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Select only from first and third row groups, skip middle one entirely
+        let selection = RowSelection::from(vec![
+            RowSelector::select(50), // First 50 of RG0
+            RowSelector::skip(150),  // Rest of RG0 + all of RG1 + half of RG2
+            RowSelector::select(50), // Last 50 of RG2
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        // Verify total selected rows remain the same
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+        assert_eq!(original_selected, 100); // 50 + 50
+    }
+
+    #[test]
+    fn test_reverse_middle_row_group_only() {
+        // Another test to ensure skipped row groups are handled correctly
+        let metadata = create_test_metadata(vec![100, 100, 100]);
+
+        // Select only middle row group
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(100),   // Skip RG0
+            RowSelector::select(100), // Select all of RG1
+            RowSelector::skip(100),   // Skip RG2
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+        assert_eq!(original_selected, 100);
+    }
+
+    #[test]
+    fn test_reverse_alternating_row_groups() {
+        // Test with more complex skipping pattern
+        let metadata = create_test_metadata(vec![100, 100, 100, 100]);
+
+        // Select first and third row groups, skip second and fourth
+        let selection = RowSelection::from(vec![
+            RowSelector::select(100), // RG0
+            RowSelector::skip(100),   // RG1
+            RowSelector::select(100), // RG2
+            RowSelector::skip(100),   // RG3
+        ]);
+
+        let reversed = reverse_row_selection(&selection, &metadata).unwrap();
+
+        let original_selected: usize = selection
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+        let reversed_selected: usize = reversed
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        assert_eq!(original_selected, reversed_selected);
+        assert_eq!(original_selected, 200);
+    }
+}
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index 450ccc5d0620e..2e0919b1447de 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -21,40 +21,40 @@ use std::fmt::Debug;
 use std::fmt::Formatter;
 use std::sync::Arc;
 
-use crate::opener::build_pruning_predicates;
-use crate::opener::ParquetOpener;
-use crate::row_filter::can_expr_be_pushed_down_with_schemas;
 use crate::DefaultParquetFileReaderFactory;
 use crate::ParquetFileReaderFactory;
+use crate::opener::ParquetOpener;
+use crate::opener::build_pruning_predicates;
+use crate::row_filter::can_expr_be_pushed_down_with_schemas;
 use datafusion_common::config::ConfigOptions;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_common::config::EncryptionFactoryOptions;
 use datafusion_datasource::as_file_source;
 use datafusion_datasource::file_stream::FileOpener;
-use datafusion_datasource::schema_adapter::{
-    DefaultSchemaAdapterFactory, SchemaAdapterFactory,
-};
 
 use arrow::datatypes::TimeUnit;
+use datafusion_common::DataFusionError;
 use datafusion_common::config::TableParquetOptions;
-use datafusion_common::{DataFusionError, Statistics};
+use datafusion_datasource::TableSchema;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
-use datafusion_datasource::TableSchema;
-use datafusion_physical_expr::conjunction;
+use datafusion_physical_expr::projection::ProjectionExprs;
+use datafusion_physical_expr::{EquivalenceProperties, conjunction};
 use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory;
-use datafusion_physical_expr_common::physical_expr::fmt_sql;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
+use datafusion_physical_plan::DisplayFormatType;
+use datafusion_physical_plan::SortOrderPushdownResult;
 use datafusion_physical_plan::filter_pushdown::PushedDown;
 use datafusion_physical_plan::filter_pushdown::{
     FilterPushdownPropagation, PushedDownPredicate,
 };
 use datafusion_physical_plan::metrics::Count;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
-use datafusion_physical_plan::DisplayFormatType;
 
 #[cfg(feature = "parquet_encryption")]
 use datafusion_execution::parquet_encryption::EncryptionFactory;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use itertools::Itertools;
 use object_store::ObjectStore;
 #[cfg(feature = "parquet_encryption")]
@@ -104,11 +104,11 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
 /// # let object_store_url = ObjectStoreUrl::local_filesystem();
 /// # let predicate = lit(true);
 /// let source = Arc::new(
-///     ParquetSource::default()
-///     .with_predicate(predicate)
+///     ParquetSource::new(Arc::clone(&file_schema))
+///         .with_predicate(predicate)
 /// );
 /// // Create a DataSourceExec for reading `file1.parquet` with a file size of 100MB
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+/// let config = FileScanConfigBuilder::new(object_store_url, source)
 ///    .with_file(PartitionedFile::new("file1.parquet", 100*1024*1024)).build();
 /// let exec = DataSourceExec::from_data_source(config);
 /// ```
@@ -133,7 +133,7 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
 ///   details.
 ///
 /// * Schema evolution: read parquet files with different schemas into a unified
-///   table schema. See [`SchemaAdapterFactory`] for more details.
+///   table schema. See [`DefaultPhysicalExprAdapterFactory`] for more details.
 ///
 /// * metadata_size_hint: controls the number of bytes read from the end of the
 ///   file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
@@ -231,7 +231,7 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
 /// let partitioned_file = PartitionedFile::new("my_file.parquet", 1234)
 ///   .with_extensions(Arc::new(access_plan));
 /// // create a FileScanConfig to scan this file
-/// let config = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), schema(), Arc::new(ParquetSource::default()))
+/// let config = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), Arc::new(ParquetSource::new(schema())))
 ///     .with_file(partitioned_file).build();
 /// // this parquet DataSourceExec will not even try to read row groups 2 and 4. Additional
 /// // pruning based on predicates may also happen
@@ -240,7 +240,7 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
 ///
 /// For a complete example, see the [`advanced_parquet_index` example]).
 ///
-/// [`parquet_index_advanced` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_parquet_index.rs
+/// [`parquet_index_advanced` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/parquet_advanced_index.rs
 ///
 /// # Execution Overview
 ///
@@ -260,13 +260,13 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
 ///   [`Self::with_pushdown_filters`]).
 ///
 /// * Step 5: As each [`RecordBatch`] is read, it may be adapted by a
-///   [`SchemaAdapter`] to match the table schema. By default missing columns are
-///   filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
+///   [`DefaultPhysicalExprAdapterFactory`] to match the table schema. By default missing columns are
+///   filled with nulls, but this can be customized via [`PhysicalExprAdapterFactory`].
 ///
 /// [`RecordBatch`]: arrow::record_batch::RecordBatch
-/// [`SchemaAdapter`]: datafusion_datasource::schema_adapter::SchemaAdapter
 /// [`ParquetMetadata`]: parquet::file::metadata::ParquetMetaData
-#[derive(Clone, Default, Debug)]
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[derive(Clone, Debug)]
 pub struct ParquetSource {
     /// Options for reading Parquet files
     pub(crate) table_parquet_options: TableParquetOptions,
@@ -275,33 +275,61 @@ pub struct ParquetSource {
     /// The schema of the file.
     /// In particular, this is the schema of the table without partition columns,
     /// *not* the physical schema of the file.
-    pub(crate) table_schema: Option<TableSchema>,
+    pub(crate) table_schema: TableSchema,
     /// Optional predicate for row filtering during parquet scan
     pub(crate) predicate: Option<Arc<dyn PhysicalExpr>>,
     /// Optional user defined parquet file reader factory
     pub(crate) parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
-    /// Optional user defined schema adapter
-    pub(crate) schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     /// Batch size configuration
     pub(crate) batch_size: Option<usize>,
     /// Optional hint for the size of the parquet metadata
     pub(crate) metadata_size_hint: Option<usize>,
-    pub(crate) projected_statistics: Option<Statistics>,
+    /// Projection to apply to the output.
+    pub(crate) projection: ProjectionExprs,
     #[cfg(feature = "parquet_encryption")]
     pub(crate) encryption_factory: Option<Arc<dyn EncryptionFactory>>,
+    /// If true, read files in reverse order and reverse row groups within files.
+    /// But it's not guaranteed that rows within row groups are in reverse order,
+    /// so we still need to sort them after reading, so the reverse scan is inexact.
+    /// Used to optimize ORDER BY ... DESC on sorted data.
+    reverse_row_groups: bool,
 }
 
 impl ParquetSource {
     /// Create a new ParquetSource to read the data specified in the file scan
-    /// configuration with the provided `TableParquetOptions`.
-    /// if default values are going to be used, use `ParguetConfig::default()` instead
-    pub fn new(table_parquet_options: TableParquetOptions) -> Self {
+    /// configuration with the provided schema.
+    ///
+    /// Uses default `TableParquetOptions`.
+    /// To set custom options, use [ParquetSource::with_table_parquet_options`].
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        // Projection over the full table schema (file columns + partition columns)
+        let full_schema = table_schema.table_schema();
+        let indices: Vec<usize> = (0..full_schema.fields().len()).collect();
         Self {
-            table_parquet_options,
-            ..Self::default()
+            projection: ProjectionExprs::from_indices(&indices, full_schema),
+            table_schema,
+            table_parquet_options: TableParquetOptions::default(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            predicate: None,
+            parquet_file_reader_factory: None,
+            batch_size: None,
+            metadata_size_hint: None,
+            #[cfg(feature = "parquet_encryption")]
+            encryption_factory: None,
+            reverse_row_groups: false,
         }
     }
 
+    /// Set the `TableParquetOptions` for this ParquetSource.
+    pub fn with_table_parquet_options(
+        mut self,
+        table_parquet_options: TableParquetOptions,
+    ) -> Self {
+        self.table_parquet_options = table_parquet_options;
+        self
+    }
+
     /// Set the metadata size hint
     ///
     /// This value determines how many bytes at the end of the file the default
@@ -314,6 +342,7 @@ impl ParquetSource {
     }
 
     /// Set predicate information
+    #[expect(clippy::needless_pass_by_value)]
     pub fn with_predicate(&self, predicate: Arc<dyn PhysicalExpr>) -> Self {
         let mut conf = self.clone();
         conf.predicate = Some(Arc::clone(&predicate));
@@ -385,6 +414,11 @@ impl ParquetSource {
         self.table_parquet_options.global.reorder_filters
     }
 
+    /// Return the value of [`datafusion_common::config::ParquetOptions::force_filter_selections`]
+    fn force_filter_selections(&self) -> bool {
+        self.table_parquet_options.global.force_filter_selections
+    }
+
     /// If enabled, the reader will read the page index
     /// This is used to optimize filter pushdown
     /// via `RowSelector` and `RowFilter` by
@@ -426,28 +460,6 @@ impl ParquetSource {
         self.table_parquet_options.global.max_predicate_cache_size
     }
 
-    /// Applies schema adapter factory from the FileScanConfig if present.
-    ///
-    /// # Arguments
-    /// * `conf` - FileScanConfig that may contain a schema adapter factory
-    /// # Returns
-    /// The converted FileSource with schema adapter factory applied if provided
-    pub fn apply_schema_adapter(
-        self,
-        conf: &FileScanConfig,
-    ) -> datafusion_common::Result<Arc<dyn FileSource>> {
-        let file_source: Arc<dyn FileSource> = self.into();
-
-        // If the FileScanConfig.file_source() has a schema adapter factory, apply it
-        if let Some(factory) = conf.file_source().schema_adapter_factory() {
-            file_source.with_schema_adapter_factory(
-                Arc::<dyn SchemaAdapterFactory>::clone(&factory),
-            )
-        } else {
-            Ok(file_source)
-        }
-    }
-
     #[cfg(feature = "parquet_encryption")]
     fn get_encryption_factory_with_config(
         &self,
@@ -460,6 +472,15 @@ impl ParquetSource {
             )),
         }
     }
+
+    pub(crate) fn with_reverse_row_groups(mut self, reverse_row_groups: bool) -> Self {
+        self.reverse_row_groups = reverse_row_groups;
+        self
+    }
+    #[cfg(test)]
+    pub(crate) fn reverse_row_groups(&self) -> bool {
+        self.reverse_row_groups
+    }
 }
 
 /// Parses datafusion.common.config.ParquetOptions.coerce_int96 String to a arrow_schema.datatype.TimeUnit
@@ -493,48 +514,11 @@ impl FileSource for ParquetSource {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        let projection = base_config
-            .file_column_projection_indices()
-            .unwrap_or_else(|| (0..base_config.file_schema().fields().len()).collect());
-
-        let (expr_adapter_factory, schema_adapter_factory) = match (
-            base_config.expr_adapter_factory.as_ref(),
-            self.schema_adapter_factory.as_ref(),
-        ) {
-            (Some(expr_adapter_factory), Some(schema_adapter_factory)) => {
-                // Use both the schema adapter factory and the expr adapter factory.
-                // This results in the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
-                // but the PhysicalExprAdapterFactory being used for predicate pushdown and stats pruning.
-                (
-                    Some(Arc::clone(expr_adapter_factory)),
-                    Arc::clone(schema_adapter_factory),
-                )
-            }
-            (Some(expr_adapter_factory), None) => {
-                // If no custom schema adapter factory is provided but an expr adapter factory is provided use the expr adapter factory alongside the default schema adapter factory.
-                // This means that the PhysicalExprAdapterFactory will be used for predicate pushdown and stats pruning, while the default schema adapter factory will be used for projections.
-                (
-                    Some(Arc::clone(expr_adapter_factory)),
-                    Arc::new(DefaultSchemaAdapterFactory) as _,
-                )
-            }
-            (None, Some(schema_adapter_factory)) => {
-                // If a custom schema adapter factory is provided but no expr adapter factory is provided use the custom SchemaAdapter for both projections and predicate pushdown.
-                // This maximizes compatibility with existing code that uses the SchemaAdapter API and did not explicitly opt into the PhysicalExprAdapterFactory API.
-                (None, Arc::clone(schema_adapter_factory) as _)
-            }
-            (None, None) => {
-                // If no custom schema adapter factory or expr adapter factory is provided, use the default schema adapter factory and the default physical expr adapter factory.
-                // This means that the default SchemaAdapter will be used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
-                // and the default PhysicalExprAdapterFactory will be used for predicate pushdown and stats pruning.
-                // This is the default behavior with not customization and means that most users of DataFusion will be cut over to the new PhysicalExprAdapterFactory API.
-                (
-                    Some(Arc::new(DefaultPhysicalExprAdapterFactory) as _),
-                    Arc::new(DefaultSchemaAdapterFactory) as _,
-                )
-            }
-        };
+    ) -> datafusion_common::Result<Arc<dyn FileOpener>> {
+        let expr_adapter_factory = base_config
+            .expr_adapter_factory
+            .clone()
+            .unwrap_or_else(|| Arc::new(DefaultPhysicalExprAdapterFactory) as _);
 
         let parquet_file_reader_factory =
             self.parquet_file_reader_factory.clone().unwrap_or_else(|| {
@@ -557,25 +541,24 @@ impl FileSource for ParquetSource {
             .as_ref()
             .map(|time_unit| parse_coerce_int96_string(time_unit.as_str()).unwrap());
 
-        Arc::new(ParquetOpener {
+        let opener = Arc::new(ParquetOpener {
             partition_index: partition,
-            projection: Arc::from(projection),
+            projection: self.projection.clone(),
             batch_size: self
                 .batch_size
                 .expect("Batch size must set before creating ParquetOpener"),
             limit: base_config.limit,
             predicate: self.predicate.clone(),
-            logical_file_schema: Arc::clone(base_config.file_schema()),
-            partition_fields: base_config.table_partition_cols().clone(),
+            table_schema: self.table_schema.clone(),
             metadata_size_hint: self.metadata_size_hint,
             metrics: self.metrics().clone(),
             parquet_file_reader_factory,
             pushdown_filters: self.pushdown_filters(),
             reorder_filters: self.reorder_filters(),
+            force_filter_selections: self.force_filter_selections(),
             enable_page_index: self.enable_page_index(),
             enable_bloom_filter: self.bloom_filter_on_read(),
             enable_row_group_stats_pruning: self.table_parquet_options.global.pruning,
-            schema_adapter_factory,
             coerce_int96,
             #[cfg(feature = "parquet_encryption")]
             file_decryption_properties,
@@ -583,13 +566,19 @@ impl FileSource for ParquetSource {
             #[cfg(feature = "parquet_encryption")]
             encryption_factory: self.get_encryption_factory_with_config(),
             max_predicate_cache_size: self.max_predicate_cache_size(),
-        })
+            reverse_row_groups: self.reverse_row_groups,
+        });
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
         self
     }
 
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
         self.predicate.clone()
     }
@@ -600,44 +589,23 @@ impl FileSource for ParquetSource {
         Arc::new(conf)
     }
 
-    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
-        Arc::new(Self {
-            table_schema: Some(schema),
-            ..self.clone()
-        })
-    }
-
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projected_statistics = Some(statistics);
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> datafusion_common::Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        source.projection = self.projection.try_merge(projection)?;
+        Ok(Some(Arc::new(source)))
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
 
-    fn statistics(&self) -> datafusion_common::Result<Statistics> {
-        let statistics = &self.projected_statistics;
-        let statistics = statistics
-            .clone()
-            .expect("projected_statistics must be set");
-        // When filters are pushed down, we have no way of knowing the exact statistics.
-        // Note that pruning predicate is also a kind of filter pushdown.
-        // (bloom filters use `pruning_predicate` too).
-        // Because filter pushdown may happen dynamically as long as there is a predicate
-        // if we have *any* predicate applied, we can't guarantee the statistics are exact.
-        if self.filter().is_some() {
-            Ok(statistics.to_inexact())
-        } else {
-            Ok(statistics)
-        }
-    }
-
     fn file_type(&self) -> &str {
         "parquet"
     }
@@ -652,6 +620,11 @@ impl FileSource for ParquetSource {
 
                 write!(f, "{predicate_string}")?;
 
+                // Add reverse_scan info if enabled
+                if self.reverse_row_groups {
+                    write!(f, ", reverse_row_groups=true")?;
+                }
+
                 // Try to build a the pruning predicates.
                 // These are only generated here because it's useful to have *some*
                 // idea of what pushdown is happening when viewing plans.
@@ -660,14 +633,11 @@ impl FileSource for ParquetSource {
                 // the actual predicates are built in reference to the physical schema of
                 // each file, which we do not have at this point and hence cannot use.
                 // Instead we use the logical schema of the file (the table schema without partition columns).
-                if let (Some(file_schema), Some(predicate)) = (
-                    &self.table_schema.as_ref().map(|ts| ts.file_schema()),
-                    &self.predicate,
-                ) {
+                if let Some(predicate) = &self.predicate {
                     let predicate_creation_errors = Count::new();
                     if let (Some(pruning_predicate), _) = build_pruning_predicates(
                         Some(predicate),
-                        file_schema,
+                        self.table_schema.table_schema(),
                         &predicate_creation_errors,
                     ) {
                         let mut guarantees = pruning_predicate
@@ -700,16 +670,7 @@ impl FileSource for ParquetSource {
         filters: Vec<Arc<dyn PhysicalExpr>>,
         config: &ConfigOptions,
     ) -> datafusion_common::Result<FilterPushdownPropagation<Arc<dyn FileSource>>> {
-        let Some(table_schema) = self
-            .table_schema
-            .as_ref()
-            .map(|ts| ts.table_schema())
-            .cloned()
-        else {
-            return Ok(FilterPushdownPropagation::with_parent_pushdown_result(
-                vec![PushedDown::No; filters.len()],
-            ));
-        };
+        let table_schema = self.table_schema.table_schema();
         // Determine if based on configs we should push filters down.
         // If either the table / scan itself or the config has pushdown enabled,
         // we will push down the filters.
@@ -725,7 +686,7 @@ impl FileSource for ParquetSource {
         let filters: Vec<PushedDownPredicate> = filters
             .into_iter()
             .map(|filter| {
-                if can_expr_be_pushed_down_with_schemas(&filter, &table_schema) {
+                if can_expr_be_pushed_down_with_schemas(&filter, table_schema) {
                     PushedDownPredicate::supported(filter)
                 } else {
                     PushedDownPredicate::unsupported(filter)
@@ -772,33 +733,187 @@ impl FileSource for ParquetSource {
         .with_updated_node(source))
     }
 
-    fn with_schema_adapter_factory(
+    /// Try to optimize the scan to produce data in the requested sort order.
+    ///
+    /// This method receives:
+    /// 1. The query's required ordering (`order` parameter)
+    /// 2. The file's natural ordering (via `self.file_ordering`, set by FileScanConfig)
+    ///
+    /// With both pieces of information, ParquetSource can decide what optimizations to apply.
+    ///
+    /// # Phase 1 Behavior (Current)
+    /// Returns `Inexact` when reversing the row group scan order would help satisfy the
+    /// requested ordering. We still need a Sort operator at a higher level because:
+    /// - We only reverse row group read order, not rows within row groups
+    /// - This provides approximate ordering that benefits limit pushdown
+    ///
+    /// # Phase 2 (Future)
+    /// Could return `Exact` when we can guarantee perfect ordering through techniques like:
+    /// - File reordering based on statistics
+    /// - Detecting already-sorted data
+    ///   This would allow removing the Sort operator entirely.
+    ///
+    /// # Returns
+    /// - `Inexact`: Created an optimized source (e.g., reversed scan) that approximates the order
+    /// - `Unsupported`: Cannot optimize for this ordering
+    fn try_reverse_output(
         &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> datafusion_common::Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
-    }
+        order: &[PhysicalSortExpr],
+        eq_properties: &EquivalenceProperties,
+    ) -> datafusion_common::Result<SortOrderPushdownResult<Arc<dyn FileSource>>> {
+        if order.is_empty() {
+            return Ok(SortOrderPushdownResult::Unsupported);
+        }
+
+        // Build new equivalence properties with the reversed ordering.
+        // This allows us to check if the reversed ordering satisfies the request
+        // by leveraging:
+        // - Function monotonicity (e.g., extract_year_month preserves ordering)
+        // - Constant columns (from filters)
+        // - Other equivalence relationships
+        //
+        // Example flow:
+        // 1. File ordering: [extract_year_month(ws) DESC, ws DESC]
+        // 2. After reversal: [extract_year_month(ws) ASC, ws ASC]
+        // 3. Requested: [ws ASC]
+        // 4. Through extract_year_month's monotonicity property, the reversed
+        //    ordering satisfies [ws ASC] even though it has additional prefix
+        let reversed_eq_properties = {
+            let mut new = eq_properties.clone();
+            new.clear_orderings();
+
+            // Reverse each ordering in the equivalence properties
+            let reversed_orderings = eq_properties
+                .oeq_class()
+                .iter()
+                .map(|ordering| {
+                    ordering
+                        .iter()
+                        .map(|expr| expr.reverse())
+                        .collect::<Vec<_>>()
+                })
+                .collect::<Vec<_>>();
+
+            new.add_orderings(reversed_orderings);
+            new
+        };
 
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
+        // Check if the reversed ordering satisfies the requested ordering
+        if !reversed_eq_properties.ordering_satisfy(order.iter().cloned())? {
+            return Ok(SortOrderPushdownResult::Unsupported);
+        }
+
+        // Return Inexact because we're only reversing row group order,
+        // not guaranteeing perfect row-level ordering
+        let new_source = self.clone().with_reverse_row_groups(true);
+        Ok(SortOrderPushdownResult::Inexact {
+            inner: Arc::new(new_source) as Arc<dyn FileSource>,
+        })
+
+        // TODO Phase 2: Add support for other optimizations:
+        // - File reordering based on min/max statistics
+        // - Detection of exact ordering (return Exact to remove Sort operator)
+        // - Partial sort pushdown for prefix matches
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow::datatypes::Schema;
     use datafusion_physical_expr::expressions::lit;
 
     #[test]
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     fn test_parquet_source_predicate_same_as_filter() {
         let predicate = lit(true);
 
-        let parquet_source = ParquetSource::default().with_predicate(predicate);
+        let parquet_source =
+            ParquetSource::new(Arc::new(Schema::empty())).with_predicate(predicate);
         // same value. but filter() call Arc::clone internally
         assert_eq!(parquet_source.predicate(), parquet_source.filter().as_ref());
     }
+
+    #[test]
+    fn test_reverse_scan_default_value() {
+        use arrow::datatypes::Schema;
+
+        let schema = Arc::new(Schema::empty());
+        let source = ParquetSource::new(schema);
+
+        assert!(!source.reverse_row_groups());
+    }
+
+    #[test]
+    fn test_reverse_scan_with_setter() {
+        use arrow::datatypes::Schema;
+
+        let schema = Arc::new(Schema::empty());
+
+        let source = ParquetSource::new(schema.clone()).with_reverse_row_groups(true);
+        assert!(source.reverse_row_groups());
+
+        let source = source.with_reverse_row_groups(false);
+        assert!(!source.reverse_row_groups());
+    }
+
+    #[test]
+    fn test_reverse_scan_clone_preserves_value() {
+        use arrow::datatypes::Schema;
+
+        let schema = Arc::new(Schema::empty());
+
+        let source = ParquetSource::new(schema).with_reverse_row_groups(true);
+        let cloned = source.clone();
+
+        assert!(cloned.reverse_row_groups());
+        assert_eq!(source.reverse_row_groups(), cloned.reverse_row_groups());
+    }
+
+    #[test]
+    fn test_reverse_scan_with_other_options() {
+        use arrow::datatypes::Schema;
+        use datafusion_common::config::TableParquetOptions;
+
+        let schema = Arc::new(Schema::empty());
+        let options = TableParquetOptions::default();
+
+        let source = ParquetSource::new(schema)
+            .with_table_parquet_options(options)
+            .with_metadata_size_hint(8192)
+            .with_reverse_row_groups(true);
+
+        assert!(source.reverse_row_groups());
+        assert_eq!(source.metadata_size_hint, Some(8192));
+    }
+
+    #[test]
+    fn test_reverse_scan_builder_pattern() {
+        use arrow::datatypes::Schema;
+
+        let schema = Arc::new(Schema::empty());
+
+        let source = ParquetSource::new(schema)
+            .with_reverse_row_groups(true)
+            .with_reverse_row_groups(false)
+            .with_reverse_row_groups(true);
+
+        assert!(source.reverse_row_groups());
+    }
+
+    #[test]
+    fn test_reverse_scan_independent_of_predicate() {
+        use arrow::datatypes::Schema;
+        use datafusion_physical_expr::expressions::lit;
+
+        let schema = Arc::new(Schema::empty());
+        let predicate = lit(true);
+
+        let source = ParquetSource::new(schema)
+            .with_predicate(predicate)
+            .with_reverse_row_groups(true);
+
+        assert!(source.reverse_row_groups());
+        assert!(source.filter().is_some());
+    }
 }
diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml
index 19b247829dbd2..48bf30f7a448f 100644
--- a/datafusion/datasource/Cargo.toml
+++ b/datafusion/datasource/Cargo.toml
@@ -31,12 +31,12 @@ version.workspace = true
 all-features = true
 
 [features]
-compression = ["async-compression", "xz2", "bzip2", "flate2", "zstd", "tokio-util"]
+compression = ["async-compression", "liblzma", "bzip2", "flate2", "zstd", "tokio-util"]
 default = ["compression"]
 
 [dependencies]
 arrow = { workspace = true }
-async-compression = { version = "0.4.19", features = [
+async-compression = { version = "0.4.35", features = [
     "bzip2",
     "gzip",
     "xz",
@@ -45,7 +45,7 @@ async-compression = { version = "0.4.19", features = [
 ], optional = true }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.1", optional = true }
+bzip2 = { workspace = true, optional = true }
 chrono = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }
@@ -56,19 +56,19 @@ datafusion-physical-expr-adapter = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-session = { workspace = true }
-flate2 = { version = "1.1.4", optional = true }
+flate2 = { workspace = true, optional = true }
 futures = { workspace = true }
-glob = "0.3.0"
+glob = { workspace = true }
 itertools = { workspace = true }
+liblzma = { workspace = true, optional = true }
 log = { workspace = true }
 object_store = { workspace = true }
 rand = { workspace = true }
 tempfile = { workspace = true, optional = true }
 tokio = { workspace = true }
-tokio-util = { version = "0.7.16", features = ["io"], optional = true }
+tokio-util = { version = "0.7.17", features = ["io"], optional = true }
 url = { workspace = true }
-xz2 = { version = "0.1", optional = true, features = ["static"] }
-zstd = { version = "0.13", optional = true, default-features = false }
+zstd = { workspace = true, optional = true }
 
 [dev-dependencies]
 criterion = { workspace = true }
diff --git a/datafusion/datasource/benches/split_groups_by_statistics.rs b/datafusion/datasource/benches/split_groups_by_statistics.rs
index d51fdfc0a6e90..e2ae4a9753df8 100644
--- a/datafusion/datasource/benches/split_groups_by_statistics.rs
+++ b/datafusion/datasource/benches/split_groups_by_statistics.rs
@@ -24,7 +24,7 @@ use datafusion_datasource::{generate_test_files, verify_sort_integrity};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 
 pub fn compare_split_groups_by_statistics_algorithms(c: &mut Criterion) {
     let file_schema = Arc::new(Schema::new(vec![Field::new(
diff --git a/datafusion/datasource/src/decoder.rs b/datafusion/datasource/src/decoder.rs
index 654569f741138..9f9fc0d94bb1c 100644
--- a/datafusion/datasource/src/decoder.rs
+++ b/datafusion/datasource/src/decoder.rs
@@ -24,9 +24,9 @@ use arrow::error::ArrowError;
 use bytes::Buf;
 use bytes::Bytes;
 use datafusion_common::Result;
-use futures::stream::BoxStream;
 use futures::StreamExt as _;
-use futures::{ready, Stream};
+use futures::stream::BoxStream;
+use futures::{Stream, ready};
 use std::collections::VecDeque;
 use std::fmt;
 use std::task::Poll;
@@ -175,17 +175,19 @@ pub fn deserialize_stream<'a>(
     mut input: impl Stream<Item = Result<Bytes>> + Unpin + Send + 'a,
     mut deserializer: impl BatchDeserializer<Bytes> + 'a,
 ) -> BoxStream<'a, Result<RecordBatch, ArrowError>> {
-    futures::stream::poll_fn(move |cx| loop {
-        match ready!(input.poll_next_unpin(cx)).transpose()? {
-            Some(b) => _ = deserializer.digest(b),
-            None => deserializer.finish(),
-        };
-
-        return match deserializer.next()? {
-            DeserializerOutput::RecordBatch(rb) => Poll::Ready(Some(Ok(rb))),
-            DeserializerOutput::InputExhausted => Poll::Ready(None),
-            DeserializerOutput::RequiresMoreData => continue,
-        };
+    futures::stream::poll_fn(move |cx| {
+        loop {
+            match ready!(input.poll_next_unpin(cx)).transpose()? {
+                Some(b) => _ = deserializer.digest(b),
+                None => deserializer.finish(),
+            };
+
+            return match deserializer.next()? {
+                DeserializerOutput::RecordBatch(rb) => Poll::Ready(Some(Ok(rb))),
+                DeserializerOutput::InputExhausted => Poll::Ready(None),
+                DeserializerOutput::RequiresMoreData => continue,
+            };
+        }
     })
     .boxed()
 }
diff --git a/datafusion/datasource/src/display.rs b/datafusion/datasource/src/display.rs
index c9e979535963c..15fe8679acdaf 100644
--- a/datafusion/datasource/src/display.rs
+++ b/datafusion/datasource/src/display.rs
@@ -135,7 +135,7 @@ mod tests {
     use super::*;
 
     use datafusion_physical_plan::{DefaultDisplay, VerboseDisplay};
-    use object_store::{path::Path, ObjectMeta};
+    use object_store::{ObjectMeta, path::Path};
 
     use crate::PartitionedFile;
     use chrono::Utc;
diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs
index d6ade3b8b2107..f5380c27ecc28 100644
--- a/datafusion/datasource/src/file.rs
+++ b/datafusion/datasource/src/file.rs
@@ -25,15 +25,18 @@ use std::sync::Arc;
 use crate::file_groups::FileGroupPartitioner;
 use crate::file_scan_config::FileScanConfig;
 use crate::file_stream::FileOpener;
+#[expect(deprecated)]
 use crate::schema_adapter::SchemaAdapterFactory;
-use crate::TableSchema;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{not_impl_err, Result, Statistics};
-use datafusion_physical_expr::{LexOrdering, PhysicalExpr};
+use datafusion_common::{Result, not_impl_err};
+use datafusion_physical_expr::projection::ProjectionExprs;
+use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
+use datafusion_physical_plan::DisplayFormatType;
+use datafusion_physical_plan::SortOrderPushdownResult;
 use datafusion_physical_plan::filter_pushdown::{FilterPushdownPropagation, PushedDown};
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
-use datafusion_physical_plan::DisplayFormatType;
 
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use object_store::ObjectStore;
 
 /// Helper function to convert any type implementing FileSource to Arc&lt;dyn FileSource&gt;
@@ -58,25 +61,25 @@ pub trait FileSource: Send + Sync {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         partition: usize,
-    ) -> Arc<dyn FileOpener>;
+    ) -> Result<Arc<dyn FileOpener>>;
     /// Any
     fn as_any(&self) -> &dyn Any;
+    /// Returns the table schema for this file source.
+    ///
+    /// This always returns the unprojected schema (the full schema of the data).
+    fn table_schema(&self) -> &crate::table_schema::TableSchema;
     /// Initialize new type with batch size configuration
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>;
-    /// Initialize new instance with a new schema
-    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource>;
-    /// Initialize new instance with projection information
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>;
-    /// Initialize new instance with projected statistics
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>;
     /// Returns the filter expression that will be applied during the file scan.
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
         None
     }
+    /// Return the projection that will be applied to the output stream on top of the table schema.
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        None
+    }
     /// Return execution plan metrics
     fn metrics(&self) -> &ExecutionPlanMetricsSet;
-    /// Return projected statistics
-    fn statistics(&self) -> Result<Statistics>;
     /// String representation of file source such as "csv", "json", "parquet"
     fn file_type(&self) -> &str;
     /// Format FileType specific information
@@ -84,6 +87,21 @@ pub trait FileSource: Send + Sync {
         Ok(())
     }
 
+    /// Returns whether this file source supports repartitioning files by byte ranges.
+    ///
+    /// When this returns `true`, files can be split into multiple partitions
+    /// based on byte offsets for parallel reading.
+    ///
+    /// When this returns `false`, files cannot be repartitioned (e.g., CSV files
+    /// with `newlines_in_values` enabled cannot be split because record boundaries
+    /// cannot be determined by byte offset alone).
+    ///
+    /// The default implementation returns `true`. File sources that cannot support
+    /// repartitioning should override this method.
+    fn supports_repartitioning(&self) -> bool {
+        true
+    }
+
     /// If supported by the [`FileSource`], redistribute files across partitions
     /// according to their size. Allows custom file formats to implement their
     /// own repartitioning logic.
@@ -97,7 +115,8 @@ pub trait FileSource: Send + Sync {
         output_ordering: Option<LexOrdering>,
         config: &FileScanConfig,
     ) -> Result<Option<FileScanConfig>> {
-        if config.file_compression_type.is_compressed() || config.new_lines_in_values {
+        if config.file_compression_type.is_compressed() || !self.supports_repartitioning()
+        {
             return Ok(None);
         }
 
@@ -129,28 +148,112 @@ pub trait FileSource: Send + Sync {
         ))
     }
 
-    /// Set optional schema adapter factory.
+    /// Try to create a new FileSource that can produce data in the specified sort order.
+    ///
+    /// This method attempts to optimize data retrieval to match the requested ordering.
+    /// It receives both the requested ordering and equivalence properties that describe
+    /// the output data from this file source.
+    ///
+    /// # Parameters
+    /// * `order` - The requested sort ordering from the query
+    /// * `eq_properties` - Equivalence properties of the data that will be produced by this
+    ///   file source. These properties describe the ordering, constant columns, and other
+    ///   relationships in the output data, allowing the implementation to determine if
+    ///   optimizations like reversed scanning can help satisfy the requested ordering.
+    ///   This includes information about:
+    ///   - The file's natural ordering (from output_ordering in FileScanConfig)
+    ///   - Constant columns (e.g., from filters like `ticker = 'AAPL'`)
+    ///   - Monotonic functions (e.g., `extract_year_month(timestamp)`)
+    ///   - Other equivalence relationships
+    ///
+    /// # Examples
+    ///
+    /// ## Example 1: Simple reverse
+    /// ```text
+    /// File ordering: [a ASC, b DESC]
+    /// Requested:     [a DESC]
+    /// Reversed file: [a DESC, b ASC]
+    /// Result: Satisfies request (prefix match) → Inexact
+    /// ```
+    ///
+    /// ## Example 2: Monotonic function
+    /// ```text
+    /// File ordering: [extract_year_month(ts) ASC, ts ASC]
+    /// Requested:     [ts DESC]
+    /// Reversed file: [extract_year_month(ts) DESC, ts DESC]
+    /// Result: Through monotonicity, satisfies [ts DESC] → Inexact
+    /// ```
+    ///
+    /// # Returns
+    /// * `Exact` - Created a source that guarantees perfect ordering
+    /// * `Inexact` - Created a source optimized for ordering (e.g., reversed row groups) but not perfectly sorted
+    /// * `Unsupported` - Cannot optimize for this ordering
+    ///
+    /// Default implementation returns `Unsupported`.
+    fn try_reverse_output(
+        &self,
+        _order: &[PhysicalSortExpr],
+        _eq_properties: &EquivalenceProperties,
+    ) -> Result<SortOrderPushdownResult<Arc<dyn FileSource>>> {
+        Ok(SortOrderPushdownResult::Unsupported)
+    }
+
+    /// Try to push down a projection into a this FileSource.
     ///
-    /// [`SchemaAdapterFactory`] allows user to specify how fields from the
-    /// file get mapped to that of the table schema.  If you implement this
-    /// method, you should also implement [`schema_adapter_factory`].
+    /// `FileSource` implementations that support projection pushdown should
+    /// override this method and return a new `FileSource` instance with the
+    /// projection incorporated.
     ///
-    /// The default implementation returns a not implemented error.
+    /// If a `FileSource` does accept a projection it is expected to handle
+    /// the projection in it's entirety, including partition columns.
+    /// For example, the `FileSource` may translate that projection into a
+    /// file format specific projection (e.g. Parquet can push down struct field access,
+    /// some other file formats like Vortex can push down computed expressions into un-decoded data)
+    /// and also need to handle partition column projection (generally done by replacing partition column
+    /// references with literal values derived from each files partition values).
+    ///
+    /// Not all FileSource's can handle complex expression pushdowns. For example,
+    /// a CSV file source may only support simple column selections. In such cases,
+    /// the `FileSource` can use [`SplitProjection`] and [`ProjectionOpener`]
+    /// to split the projection into a pushdownable part and a non-pushdownable part.
+    /// These helpers also handle partition column projection.
+    ///
+    /// [`SplitProjection`]: crate::projection::SplitProjection
+    /// [`ProjectionOpener`]: crate::projection::ProjectionOpener
+    fn try_pushdown_projection(
+        &self,
+        _projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        Ok(None)
+    }
+
+    /// Deprecated: Set optional schema adapter factory.
     ///
-    /// [`schema_adapter_factory`]: Self::schema_adapter_factory
+    /// `SchemaAdapterFactory` has been removed. Use `PhysicalExprAdapterFactory` instead.
+    /// See `upgrading.md` for more details.
+    #[deprecated(
+        since = "52.0.0",
+        note = "SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    #[expect(deprecated)]
     fn with_schema_adapter_factory(
         &self,
         _factory: Arc<dyn SchemaAdapterFactory>,
     ) -> Result<Arc<dyn FileSource>> {
         not_impl_err!(
-            "FileSource {} does not support schema adapter factory",
-            self.file_type()
+            "SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
         )
     }
 
-    /// Returns the current schema adapter factory if set
+    /// Deprecated: Returns the current schema adapter factory if set.
     ///
-    /// Default implementation returns `None`.
+    /// `SchemaAdapterFactory` has been removed. Use `PhysicalExprAdapterFactory` instead.
+    /// See `upgrading.md` for more details.
+    #[deprecated(
+        since = "52.0.0",
+        note = "SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    #[expect(deprecated)]
     fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
         None
     }
diff --git a/datafusion/datasource/src/file_compression_type.rs b/datafusion/datasource/src/file_compression_type.rs
index 7cc3142564e9b..89efb580652b1 100644
--- a/datafusion/datasource/src/file_compression_type.rs
+++ b/datafusion/datasource/src/file_compression_type.rs
@@ -21,8 +21,8 @@ use std::str::FromStr;
 
 use datafusion_common::error::{DataFusionError, Result};
 
-use datafusion_common::parsers::CompressionTypeVariant::{self, *};
 use datafusion_common::GetExt;
+use datafusion_common::parsers::CompressionTypeVariant::{self, *};
 
 #[cfg(feature = "compression")]
 use async_compression::tokio::bufread::{
@@ -39,17 +39,17 @@ use bytes::Bytes;
 use bzip2::read::MultiBzDecoder;
 #[cfg(feature = "compression")]
 use flate2::read::MultiGzDecoder;
-use futures::stream::BoxStream;
 use futures::StreamExt;
 #[cfg(feature = "compression")]
 use futures::TryStreamExt;
+use futures::stream::BoxStream;
+#[cfg(feature = "compression")]
+use liblzma::read::XzDecoder;
 use object_store::buffered::BufWriter;
 use tokio::io::AsyncWrite;
 #[cfg(feature = "compression")]
 use tokio_util::io::{ReaderStream, StreamReader};
 #[cfg(feature = "compression")]
-use xz2::read::XzDecoder;
-#[cfg(feature = "compression")]
 use zstd::Decoder as ZstdDecoder;
 
 /// Readable file compression type
@@ -148,32 +148,70 @@ impl FileCompressionType {
             GZIP | BZIP2 | XZ | ZSTD => {
                 return Err(DataFusionError::NotImplemented(
                     "Compression feature is not enabled".to_owned(),
-                ))
+                ));
             }
             UNCOMPRESSED => s.boxed(),
         })
     }
 
     /// Wrap the given `BufWriter` so that it performs compressed writes
-    /// according to this `FileCompressionType`.
+    /// according to this `FileCompressionType` using the default compression level.
     pub fn convert_async_writer(
         &self,
         w: BufWriter,
     ) -> Result<Box<dyn AsyncWrite + Send + Unpin>> {
+        self.convert_async_writer_with_level(w, None)
+    }
+
+    /// Wrap the given `BufWriter` so that it performs compressed writes
+    /// according to this `FileCompressionType`.
+    ///
+    /// If `compression_level` is `Some`, the encoder will use the specified
+    /// compression level. If `None`, the default level for each algorithm is used.
+    pub fn convert_async_writer_with_level(
+        &self,
+        w: BufWriter,
+        compression_level: Option<u32>,
+    ) -> Result<Box<dyn AsyncWrite + Send + Unpin>> {
+        #[cfg(feature = "compression")]
+        use async_compression::Level;
+
         Ok(match self.variant {
             #[cfg(feature = "compression")]
-            GZIP => Box::new(GzipEncoder::new(w)),
+            GZIP => match compression_level {
+                Some(level) => {
+                    Box::new(GzipEncoder::with_quality(w, Level::Precise(level as i32)))
+                }
+                None => Box::new(GzipEncoder::new(w)),
+            },
             #[cfg(feature = "compression")]
-            BZIP2 => Box::new(BzEncoder::new(w)),
+            BZIP2 => match compression_level {
+                Some(level) => {
+                    Box::new(BzEncoder::with_quality(w, Level::Precise(level as i32)))
+                }
+                None => Box::new(BzEncoder::new(w)),
+            },
             #[cfg(feature = "compression")]
-            XZ => Box::new(XzEncoder::new(w)),
+            XZ => match compression_level {
+                Some(level) => {
+                    Box::new(XzEncoder::with_quality(w, Level::Precise(level as i32)))
+                }
+                None => Box::new(XzEncoder::new(w)),
+            },
             #[cfg(feature = "compression")]
-            ZSTD => Box::new(ZstdEncoder::new(w)),
+            ZSTD => match compression_level {
+                Some(level) => {
+                    Box::new(ZstdEncoder::with_quality(w, Level::Precise(level as i32)))
+                }
+                None => Box::new(ZstdEncoder::new(w)),
+            },
             #[cfg(not(feature = "compression"))]
             GZIP | BZIP2 | XZ | ZSTD => {
+                // compression_level is not used when compression feature is disabled
+                let _ = compression_level;
                 return Err(DataFusionError::NotImplemented(
                     "Compression feature is not enabled".to_owned(),
-                ))
+                ));
             }
             UNCOMPRESSED => Box::new(w),
         })
@@ -210,7 +248,7 @@ impl FileCompressionType {
             GZIP | BZIP2 | XZ | ZSTD => {
                 return Err(DataFusionError::NotImplemented(
                     "Compression feature is not enabled".to_owned(),
-                ))
+                ));
             }
             UNCOMPRESSED => s.boxed(),
         })
@@ -237,7 +275,7 @@ impl FileCompressionType {
             GZIP | BZIP2 | XZ | ZSTD => {
                 return Err(DataFusionError::NotImplemented(
                     "Compression feature is not enabled".to_owned(),
-                ))
+                ));
             }
             UNCOMPRESSED => Box::new(r),
         })
diff --git a/datafusion/datasource/src/file_format.rs b/datafusion/datasource/src/file_format.rs
index 23f68636c156e..54389ecd214e5 100644
--- a/datafusion/datasource/src/file_format.rs
+++ b/datafusion/datasource/src/file_format.rs
@@ -30,7 +30,7 @@ use crate::file_sink_config::FileSinkConfig;
 
 use arrow::datatypes::SchemaRef;
 use datafusion_common::file_options::file_type::FileType;
-use datafusion_common::{internal_err, not_impl_err, GetExt, Result, Statistics};
+use datafusion_common::{GetExt, Result, Statistics, internal_err, not_impl_err};
 use datafusion_physical_expr::LexRequirement;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_session::Session;
@@ -111,7 +111,10 @@ pub trait FileFormat: Send + Sync + fmt::Debug {
     }
 
     /// Return the related FileSource such as `CsvSource`, `JsonSource`, etc.
-    fn file_source(&self) -> Arc<dyn FileSource>;
+    ///
+    /// # Arguments
+    /// * `table_schema` - The table schema to use for the FileSource (includes partition columns)
+    fn file_source(&self, table_schema: crate::TableSchema) -> Arc<dyn FileSource>;
 }
 
 /// Factory for creating [`FileFormat`] instances based on session and command level options
diff --git a/datafusion/datasource/src/file_groups.rs b/datafusion/datasource/src/file_groups.rs
index 998d09285cf1d..28a403ab92ad8 100644
--- a/datafusion/datasource/src/file_groups.rs
+++ b/datafusion/datasource/src/file_groups.rs
@@ -18,10 +18,12 @@
 //! Logic for managing groups of [`PartitionedFile`]s in DataFusion
 
 use crate::{FileRange, PartitionedFile};
+use arrow::compute::SortOptions;
 use datafusion_common::Statistics;
+use datafusion_common::utils::compare_rows;
 use itertools::Itertools;
-use std::cmp::{min, Ordering};
-use std::collections::BinaryHeap;
+use std::cmp::{Ordering, min};
+use std::collections::{BinaryHeap, HashMap};
 use std::iter::repeat_with;
 use std::mem;
 use std::ops::{Deref, DerefMut, Index, IndexMut};
@@ -189,15 +191,6 @@ impl FileGroupPartitioner {
             return None;
         }
 
-        // Perform redistribution only in case all files should be read from beginning to end
-        let has_ranges = file_groups
-            .iter()
-            .flat_map(FileGroup::iter)
-            .any(|f| f.range.is_some());
-        if has_ranges {
-            return None;
-        }
-
         //  special case when order must be preserved
         if self.preserve_order_within_groups {
             self.repartition_preserving_order(file_groups)
@@ -218,14 +211,13 @@ impl FileGroupPartitioner {
 
         let total_size = flattened_files
             .iter()
-            .map(|f| f.object_meta.size as i64)
-            .sum::<i64>();
-        if total_size < (repartition_file_min_size as i64) || total_size == 0 {
+            .map(|f| f.effective_size())
+            .sum::<u64>();
+        if total_size < (repartition_file_min_size as u64) || total_size == 0 {
             return None;
         }
 
-        let target_partition_size =
-            (total_size as u64).div_ceil(target_partitions as u64);
+        let target_partition_size = total_size.div_ceil(target_partitions as u64);
 
         let current_partition_index: usize = 0;
         let current_partition_size: u64 = 0;
@@ -235,13 +227,14 @@ impl FileGroupPartitioner {
             .into_iter()
             .scan(
                 (current_partition_index, current_partition_size),
-                |state, source_file| {
+                |(current_partition_index, current_partition_size), source_file| {
                     let mut produced_files = vec![];
-                    let mut range_start = 0;
-                    while range_start < source_file.object_meta.size {
+                    let (mut range_start, file_end) = source_file.range();
+                    while range_start < file_end {
                         let range_end = min(
-                            range_start + (target_partition_size - state.1),
-                            source_file.object_meta.size,
+                            range_start
+                                + (target_partition_size - *current_partition_size),
+                            file_end,
                         );
 
                         let mut produced_file = source_file.clone();
@@ -249,13 +242,15 @@ impl FileGroupPartitioner {
                             start: range_start as i64,
                             end: range_end as i64,
                         });
-                        produced_files.push((state.0, produced_file));
+                        produced_files.push((*current_partition_index, produced_file));
 
-                        if state.1 + (range_end - range_start) >= target_partition_size {
-                            state.0 += 1;
-                            state.1 = 0;
+                        if *current_partition_size + (range_end - range_start)
+                            >= target_partition_size
+                        {
+                            *current_partition_index += 1;
+                            *current_partition_size = 0;
                         } else {
-                            state.1 += range_end - range_start;
+                            *current_partition_size += range_end - range_start;
                         }
                         range_start = range_end;
                     }
@@ -297,7 +292,7 @@ impl FileGroupPartitioner {
                 if group.len() == 1 {
                     Some(ToRepartition {
                         source_index: group_index,
-                        file_size: group[0].object_meta.size,
+                        file_size: group[0].effective_size(),
                         new_groups: vec![group_index],
                     })
                 } else {
@@ -333,28 +328,31 @@ impl FileGroupPartitioner {
 
         // Distribute files to their newly assigned groups
         while let Some(to_repartition) = heap.pop() {
-            let range_size = to_repartition.range_size() as i64;
+            let range_size = to_repartition.range_size();
             let ToRepartition {
                 source_index,
-                file_size,
+                file_size: _,
                 new_groups,
             } = to_repartition.into_inner();
             assert_eq!(file_groups[source_index].len(), 1);
             let original_file = file_groups[source_index].pop().unwrap();
 
             let last_group = new_groups.len() - 1;
-            let mut range_start: i64 = 0;
-            let mut range_end: i64 = range_size;
+            let (mut range_start, file_end) = original_file.range();
+            let mut range_end = range_start + range_size;
             for (i, group_index) in new_groups.into_iter().enumerate() {
                 let target_group = &mut file_groups[group_index];
                 assert!(target_group.is_empty());
 
                 // adjust last range to include the entire file
                 if i == last_group {
-                    range_end = file_size as i64;
+                    range_end = file_end;
                 }
-                target_group
-                    .push(original_file.clone().with_range(range_start, range_end));
+                target_group.push(
+                    original_file
+                        .clone()
+                        .with_range(range_start as i64, range_end as i64),
+                );
                 range_start = range_end;
                 range_end += range_size;
             }
@@ -366,11 +364,27 @@ impl FileGroupPartitioner {
 
 /// Represents a group of partitioned files that'll be processed by a single thread.
 /// Maintains optional statistics across all files in the group.
+///
+/// # Statistics
+///
+/// The group-level [`FileGroup::file_statistics`] field contains merged statistics from all files
+/// in the group for the **full table schema** (file columns + partition columns).
+///
+/// Partition column statistics are derived from the individual file partition values:
+/// - `min` = minimum partition value across all files in the group
+/// - `max` = maximum partition value across all files in the group
+/// - `null_count` = 0 (partition values are never null)
+///
+/// This allows query optimizers to prune entire file groups based on partition bounds.
 #[derive(Debug, Clone)]
 pub struct FileGroup {
     /// The files in this group
     files: Vec<PartitionedFile>,
-    /// Optional statistics for the data across all files in the group
+    /// Optional statistics for the data across all files in the group.
+    ///
+    /// These statistics cover the full table schema: file columns plus partition columns.
+    /// Partition column statistics are merged from individual [`PartitionedFile::statistics`],
+    /// which compute exact values from [`PartitionedFile::partition_values`].
     statistics: Option<Arc<Statistics>>,
 }
 
@@ -468,6 +482,64 @@ impl FileGroup {
 
         chunks
     }
+
+    /// Groups files by their partition values, ensuring all files with same
+    /// partition values are in the same group.
+    ///
+    /// Note: May return fewer groups than `max_target_partitions` when the
+    /// number of unique partition values is less than the target.
+    pub fn group_by_partition_values(
+        self,
+        max_target_partitions: usize,
+    ) -> Vec<FileGroup> {
+        if self.is_empty() || max_target_partitions == 0 {
+            return vec![];
+        }
+
+        let mut partition_groups: HashMap<
+            Vec<datafusion_common::ScalarValue>,
+            Vec<PartitionedFile>,
+        > = HashMap::new();
+
+        for file in self.files {
+            partition_groups
+                .entry(file.partition_values.clone())
+                .or_default()
+                .push(file);
+        }
+
+        let num_unique_partitions = partition_groups.len();
+
+        // Sort for deterministic bucket assignment across query executions.
+        let mut sorted_partitions: Vec<_> = partition_groups.into_iter().collect();
+        let sort_options =
+            vec![
+                SortOptions::default();
+                sorted_partitions.first().map(|(k, _)| k.len()).unwrap_or(0)
+            ];
+        sorted_partitions.sort_by(|a, b| {
+            compare_rows(&a.0, &b.0, &sort_options).unwrap_or(Ordering::Equal)
+        });
+
+        if num_unique_partitions <= max_target_partitions {
+            sorted_partitions
+                .into_iter()
+                .map(|(_, files)| FileGroup::new(files))
+                .collect()
+        } else {
+            // Merge into max_target_partitions buckets using round-robin.
+            // This maintains grouping by partition value as we are merging groups which already
+            // contain all values for a partition key.
+            let mut target_groups = vec![vec![]; max_target_partitions];
+
+            for (idx, (_, files)) in sorted_partitions.into_iter().enumerate() {
+                let bucket = idx % max_target_partitions;
+                target_groups[bucket].extend(files);
+            }
+
+            target_groups.into_iter().map(FileGroup::new).collect()
+        }
+    }
 }
 
 impl Index<usize> for FileGroup {
@@ -559,6 +631,7 @@ impl DerefMut for CompareByRangeSize {
 #[cfg(test)]
 mod test {
     use super::*;
+    use datafusion_common::ScalarValue;
 
     /// Empty file won't get partitioned
     #[test]
@@ -645,6 +718,68 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
+    #[test]
+    fn repartition_single_file_with_range() {
+        // Single file, single partition into multiple partitions
+        let single_partition =
+            vec![FileGroup::new(vec![pfile("a", 123).with_range(0, 123)])];
+
+        let actual = FileGroupPartitioner::new()
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&single_partition);
+
+        let expected = Some(vec![
+            FileGroup::new(vec![pfile("a", 123).with_range(0, 31)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(31, 62)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(62, 93)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(93, 123)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
+    #[test]
+    fn repartition_single_file_with_incomplete_range() {
+        // Single file, single partition into multiple partitions
+        let single_partition =
+            vec![FileGroup::new(vec![pfile("a", 123).with_range(10, 100)])];
+
+        let actual = FileGroupPartitioner::new()
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&single_partition);
+
+        let expected = Some(vec![
+            FileGroup::new(vec![pfile("a", 123).with_range(10, 33)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(33, 56)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(56, 79)]),
+            FileGroup::new(vec![pfile("a", 123).with_range(79, 100)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
+    #[test]
+    fn repartition_single_file_duplicated_with_range() {
+        // Single file, two partitions into multiple partitions
+        let single_partition = vec![FileGroup::new(vec![
+            pfile("a", 100).with_range(0, 50),
+            pfile("a", 100).with_range(50, 100),
+        ])];
+
+        let actual = FileGroupPartitioner::new()
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&single_partition);
+
+        let expected = Some(vec![
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 25)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(25, 50)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(50, 75)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(75, 100)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
     #[test]
     fn repartition_too_much_partitions() {
         // Single file, single partition into 96 partitions
@@ -717,22 +852,6 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
-    #[test]
-    fn repartition_no_action_ranges() {
-        // No action due to Some(range) in second file
-        let source_partitions = vec![
-            FileGroup::new(vec![pfile("a", 123)]),
-            FileGroup::new(vec![pfile("b", 144).with_range(1, 50)]),
-        ];
-
-        let actual = FileGroupPartitioner::new()
-            .with_target_partitions(65)
-            .with_repartition_file_min_size(10)
-            .repartition_file_groups(&source_partitions);
-
-        assert_partitioned_files(None, actual)
-    }
-
     #[test]
     fn repartition_no_action_min_size() {
         // No action due to target_partition_size
@@ -809,6 +928,26 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
+    #[test]
+    fn repartition_ordered_one_large_file_with_range() {
+        // "Rebalance" the single large file across partitions
+        let source_partitions =
+            vec![FileGroup::new(vec![pfile("a", 100).with_range(0, 100)])];
+
+        let actual = FileGroupPartitioner::new()
+            .with_preserve_order_within_groups(true)
+            .with_target_partitions(3)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&source_partitions);
+
+        let expected = Some(vec![
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 34)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(34, 68)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(68, 100)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
     #[test]
     fn repartition_ordered_one_large_one_small_file() {
         // "Rebalance" the single large file across empty partitions, but can't split
@@ -837,6 +976,91 @@ mod test {
         assert_partitioned_files(expected, actual);
     }
 
+    #[test]
+    fn repartition_ordered_one_large_one_small_file_with_full_range() {
+        // "Rebalance" the single large file across empty partitions, but can't split
+        // small file
+        let source_partitions = vec![
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 100)]),
+            FileGroup::new(vec![pfile("b", 30)]),
+        ];
+
+        let actual = FileGroupPartitioner::new()
+            .with_preserve_order_within_groups(true)
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&source_partitions);
+
+        let expected = Some(vec![
+            // scan first third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 33)]),
+            // only b in this group (can't do this)
+            FileGroup::new(vec![pfile("b", 30).with_range(0, 30)]),
+            // second third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(33, 66)]),
+            // final third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(66, 100)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
+    #[test]
+    fn repartition_ordered_one_large_one_small_file_with_split_range() {
+        // "Rebalance" the single large file across empty partitions, but can't split
+        // small file
+        let source_partitions = vec![
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 50)]),
+            FileGroup::new(vec![pfile("a", 100).with_range(50, 100)]),
+            FileGroup::new(vec![pfile("b", 30)]),
+        ];
+
+        let actual = FileGroupPartitioner::new()
+            .with_preserve_order_within_groups(true)
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&source_partitions);
+
+        let expected = Some(vec![
+            // scan first half of first "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(0, 25)]),
+            // second "a" fully (not split)
+            FileGroup::new(vec![pfile("a", 100).with_range(50, 100)]),
+            // only b in this group (can't do this)
+            FileGroup::new(vec![pfile("b", 30).with_range(0, 30)]),
+            // second half of first "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(25, 50)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
+    #[test]
+    fn repartition_ordered_one_large_one_small_file_with_non_full_range() {
+        // "Rebalance" the single large file across empty partitions, but can't split
+        // small file
+        let source_partitions = vec![
+            FileGroup::new(vec![pfile("a", 100).with_range(20, 80)]),
+            FileGroup::new(vec![pfile("b", 30).with_range(5, 25)]),
+        ];
+
+        let actual = FileGroupPartitioner::new()
+            .with_preserve_order_within_groups(true)
+            .with_target_partitions(4)
+            .with_repartition_file_min_size(10)
+            .repartition_file_groups(&source_partitions);
+
+        let expected = Some(vec![
+            // scan first third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(20, 40)]),
+            // only b in this group (can't split this)
+            FileGroup::new(vec![pfile("b", 30).with_range(5, 25)]),
+            // second third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(40, 60)]),
+            // final third of "a"
+            FileGroup::new(vec![pfile("a", 100).with_range(60, 80)]),
+        ]);
+        assert_partitioned_files(expected, actual);
+    }
+
     #[test]
     fn repartition_ordered_two_large_files() {
         // "Rebalance" two large files across empty partitions, but can't mix them
@@ -998,6 +1222,13 @@ mod test {
         PartitionedFile::new(path, file_size)
     }
 
+    /// Creates a file with partition value with a static size of 10.
+    fn pfile_with_pv(path: &str, pv: &str) -> PartitionedFile {
+        let mut file = pfile(path, 10);
+        file.partition_values = vec![ScalarValue::from(pv)];
+        file
+    }
+
     /// repartition the file groups both with and without preserving order
     /// asserting they return the same value and returns that value
     fn repartition_test(
@@ -1013,4 +1244,50 @@ mod test {
         assert_partitioned_files(repartitioned.clone(), repartitioned_preserving_sort);
         repartitioned
     }
+
+    #[test]
+    fn test_group_by_partition_values_edge_cases() {
+        // Edge cases: empty and zero target
+        assert!(FileGroup::default().group_by_partition_values(4).is_empty());
+        assert!(
+            FileGroup::new(vec![pfile("a", 100)])
+                .group_by_partition_values(0)
+                .is_empty()
+        );
+    }
+
+    #[test]
+    fn test_group_by_partition_values_less_groups_than_target() {
+        // File a and b have partition value p1.
+        // File c has partition value p2.
+        // Grouping by partition value should not redistribute any files since the number of partition
+        // values <= max_target_partitions.
+        let fg = FileGroup::new(vec![
+            pfile_with_pv("a", "p1"),
+            pfile_with_pv("b", "p1"),
+            pfile_with_pv("c", "p2"),
+        ]);
+        let groups = fg.group_by_partition_values(4);
+        assert_eq!(groups.len(), 2);
+        assert_eq!(groups[0].len(), 2);
+        assert_eq!(groups[1].len(), 1);
+    }
+
+    #[test]
+    fn test_group_by_partition_values_more_groups_than_target() {
+        // Each file has a single partition value. The number of partition values > max_target_partitions, so
+        // they should be round-robin distributed into groups.
+        let fg = FileGroup::new(vec![
+            pfile_with_pv("a", "p1"),
+            pfile_with_pv("b", "p2"),
+            pfile_with_pv("c", "p3"),
+            pfile_with_pv("d", "p4"),
+            pfile_with_pv("e", "p5"),
+        ]);
+        let groups = fg.group_by_partition_values(3);
+        assert_eq!(groups.len(), 3);
+        assert_eq!(groups[0].len(), 2);
+        assert_eq!(groups[1].len(), 2);
+        assert_eq!(groups[2].len(), 1);
+    }
 }
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index 5847a8cf5e11f..c8636343ccc5a 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -19,56 +19,41 @@
 //! file sources.
 
 use crate::file_groups::FileGroup;
-#[allow(unused_imports)]
-use crate::schema_adapter::SchemaAdapterFactory;
 use crate::{
-    display::FileGroupsDisplay, file::FileSource,
+    PartitionedFile, display::FileGroupsDisplay, file::FileSource,
     file_compression_type::FileCompressionType, file_stream::FileStream,
-    source::DataSource, statistics::MinMaxStatistics, PartitionedFile, TableSchema,
+    source::DataSource, statistics::MinMaxStatistics,
 };
 use arrow::datatypes::FieldRef;
-use arrow::{
-    array::{
-        ArrayData, ArrayRef, BufferBuilder, DictionaryArray, RecordBatch,
-        RecordBatchOptions,
-    },
-    buffer::Buffer,
-    datatypes::{ArrowNativeType, DataType, Field, Schema, SchemaRef, UInt16Type},
-};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_datafusion_err, ColumnStatistics,
-    Constraints, Result, ScalarValue, Statistics,
+    Constraints, Result, ScalarValue, Statistics, internal_datafusion_err, internal_err,
 };
 use datafusion_execution::{
-    object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext,
+    SendableRecordBatchStream, TaskContext, object_store::ObjectStoreUrl,
 };
 use datafusion_expr::Operator;
+
+use datafusion_physical_expr::equivalence::project_orderings;
 use datafusion_physical_expr::expressions::{BinaryExpr, Column};
 use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_expr::utils::reassign_expr_columns;
-use datafusion_physical_expr::{split_conjunction, EquivalenceProperties, Partitioning};
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning, split_conjunction};
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::projection::{
-    all_alias_free_columns, new_projections_for_columns, ProjectionExpr,
-};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_plan::SortOrderPushdownResult;
+use datafusion_physical_plan::coop::cooperative;
+use datafusion_physical_plan::execution_plan::SchedulingType;
 use datafusion_physical_plan::{
-    display::{display_orderings, ProjectSchemaDisplay},
+    DisplayAs, DisplayFormatType,
+    display::{ProjectSchemaDisplay, display_orderings},
     filter_pushdown::FilterPushdownPropagation,
     metrics::ExecutionPlanMetricsSet,
-    DisplayAs, DisplayFormatType,
-};
-use std::{
-    any::Any, borrow::Cow, collections::HashMap, fmt::Debug, fmt::Formatter,
-    fmt::Result as FmtResult, marker::PhantomData, sync::Arc,
 };
-
-use datafusion_physical_expr::equivalence::project_orderings;
-use datafusion_physical_plan::coop::cooperative;
-use datafusion_physical_plan::execution_plan::SchedulingType;
 use log::{debug, warn};
+use std::{any::Any, fmt::Debug, fmt::Formatter, fmt::Result as FmtResult, sync::Arc};
 
 /// The base configurations for a [`DataSourceExec`], the a physical plan for
 /// any given file format.
@@ -81,7 +66,6 @@ use log::{debug, warn};
 /// # use std::sync::Arc;
 /// # use arrow::datatypes::{Field, Fields, DataType, Schema, SchemaRef};
 /// # use object_store::ObjectStore;
-/// # use datafusion_common::Statistics;
 /// # use datafusion_common::Result;
 /// # use datafusion_datasource::file::FileSource;
 /// # use datafusion_datasource::file_groups::FileGroup;
@@ -91,9 +75,9 @@ use log::{debug, warn};
 /// # use datafusion_datasource::source::DataSourceExec;
 /// # use datafusion_datasource::table_schema::TableSchema;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
+/// # use datafusion_physical_expr::projection::ProjectionExprs;
 /// # use datafusion_physical_plan::ExecutionPlan;
 /// # use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
-/// # use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 /// # let file_schema = Arc::new(Schema::new(vec![
 /// #  Field::new("c1", DataType::Int32, false),
 /// #  Field::new("c2", DataType::Int32, false),
@@ -103,31 +87,28 @@ use log::{debug, warn};
 /// # // Note: crate mock ParquetSource, as ParquetSource is not in the datasource crate
 /// #[derive(Clone)]
 /// # struct ParquetSource {
-/// #    projected_statistics: Option<Statistics>,
-/// #    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>
+/// #    table_schema: TableSchema,
 /// # };
 /// # impl FileSource for ParquetSource {
-/// #  fn create_file_opener(&self, _: Arc<dyn ObjectStore>, _: &FileScanConfig, _: usize) -> Arc<dyn FileOpener> { unimplemented!() }
+/// #  fn create_file_opener(&self, _: Arc<dyn ObjectStore>, _: &FileScanConfig, _: usize) -> Result<Arc<dyn FileOpener>> { unimplemented!() }
 /// #  fn as_any(&self) -> &dyn Any { self  }
+/// #  fn table_schema(&self) -> &TableSchema { &self.table_schema }
 /// #  fn with_batch_size(&self, _: usize) -> Arc<dyn FileSource> { unimplemented!() }
-/// #  fn with_schema(&self, _: TableSchema) -> Arc<dyn FileSource> { Arc::new(self.clone()) as Arc<dyn FileSource> }
-/// #  fn with_projection(&self, _: &FileScanConfig) -> Arc<dyn FileSource> { unimplemented!() }
-/// #  fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> { Arc::new(Self {projected_statistics: Some(statistics), schema_adapter_factory: self.schema_adapter_factory.clone()} ) }
 /// #  fn metrics(&self) -> &ExecutionPlanMetricsSet { unimplemented!() }
-/// #  fn statistics(&self) -> Result<Statistics> { Ok(self.projected_statistics.clone().expect("projected_statistics should be set")) }
 /// #  fn file_type(&self) -> &str { "parquet" }
-/// #  fn with_schema_adapter_factory(&self, factory: Arc<dyn SchemaAdapterFactory>) -> Result<Arc<dyn FileSource>> { Ok(Arc::new(Self {projected_statistics: self.projected_statistics.clone(), schema_adapter_factory: Some(factory)} )) }
-/// #  fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> { self.schema_adapter_factory.clone() }
+/// #  // Note that this implementation drops the projection on the floor, it is not complete!
+/// #  fn try_pushdown_projection(&self, projection: &ProjectionExprs) -> Result<Option<Arc<dyn FileSource>>> { Ok(Some(Arc::new(self.clone()) as Arc<dyn FileSource>)) }
 /// #  }
 /// # impl ParquetSource {
-/// #  fn new() -> Self { Self {projected_statistics: None, schema_adapter_factory: None} }
+/// #  fn new(table_schema: impl Into<TableSchema>) -> Self { Self {table_schema: table_schema.into()} }
 /// # }
 /// // create FileScan config for reading parquet files from file://
 /// let object_store_url = ObjectStoreUrl::local_filesystem();
-/// let file_source = Arc::new(ParquetSource::new());
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, file_source)
+/// let file_source = Arc::new(ParquetSource::new(file_schema.clone()));
+/// let config = FileScanConfigBuilder::new(object_store_url, file_source)
 ///   .with_limit(Some(1000))            // read only the first 1000 records
 ///   .with_projection_indices(Some(vec![2, 3])) // project columns 2 and 3
+///   .expect("Failed to push down projection")
 ///    // Read /tmp/file1.parquet with known size of 1234 bytes in a single group
 ///   .with_file(PartitionedFile::new("file1.parquet", 1234))
 ///   // Read /tmp/file2.parquet 56 bytes and /tmp/file3.parquet 78 bytes
@@ -156,16 +137,6 @@ pub struct FileScanConfig {
     /// [`RuntimeEnv::register_object_store`]: datafusion_execution::runtime_env::RuntimeEnv::register_object_store
     /// [`RuntimeEnv::object_store`]: datafusion_execution::runtime_env::RuntimeEnv::object_store
     pub object_store_url: ObjectStoreUrl,
-    /// Schema information including the file schema, table partition columns,
-    /// and the combined table schema.
-    ///
-    /// The table schema (file schema + partition columns) is the schema exposed
-    /// upstream of [`FileScanConfig`] (e.g. in [`DataSourceExec`]).
-    ///
-    /// See [`TableSchema`] for more information.
-    ///
-    /// [`DataSourceExec`]: crate::source::DataSourceExec
-    pub table_schema: TableSchema,
     /// List of files to be processed, grouped into partitions
     ///
     /// Each file must have a schema of `file_schema` or a subset. If
@@ -178,12 +149,6 @@ pub struct FileScanConfig {
     pub file_groups: Vec<FileGroup>,
     /// Table constraints
     pub constraints: Constraints,
-    /// Physical expressions defining the projection to apply when reading data.
-    ///
-    /// Each expression in the projection can reference columns from both the file
-    /// schema and table partition columns. If `None`, all columns from the table
-    /// schema are projected.
-    pub projection_exprs: Option<ProjectionExprs>,
     /// The maximum number of records to read from this plan. If `None`,
     /// all records after filtering are returned.
     pub limit: Option<usize>,
@@ -191,8 +156,6 @@ pub struct FileScanConfig {
     pub output_ordering: Vec<LexOrdering>,
     /// File compression type
     pub file_compression_type: FileCompressionType,
-    /// Are new lines in values supported for CSVOptions
-    pub new_lines_in_values: bool,
     /// File source such as `ParquetSource`, `CsvSource`, `JsonSource`, etc.
     pub file_source: Arc<dyn FileSource>,
     /// Batch size while creating new batches
@@ -201,6 +164,21 @@ pub struct FileScanConfig {
     /// Expression adapter used to adapt filters and projections that are pushed down into the scan
     /// from the logical schema to the physical schema of the file.
     pub expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+    /// Unprojected statistics for the table (file schema + partition columns).
+    /// These are projected on-demand via `projected_stats()`.
+    ///
+    /// Note that this field is pub(crate) because accessing it directly from outside
+    /// would be incorrect if there are filters being applied, thus this should be accessed
+    /// via [`FileScanConfig::statistics`].
+    pub(crate) statistics: Statistics,
+    /// When true, file_groups are organized by partition column values
+    /// and output_partitioning will return Hash partitioning on partition columns.
+    /// This allows the optimizer to skip hash repartitioning for aggregates and joins
+    /// on partition columns.
+    ///
+    /// If the number of file partitions > target_partitions, the file partitions will be grouped
+    /// in a round-robin fashion such that number of file partitions = target_partitions.
+    pub partitioned_by_file_group: bool,
 }
 
 /// A builder for [`FileScanConfig`]'s.
@@ -214,6 +192,7 @@ pub struct FileScanConfig {
 /// # use datafusion_datasource::file_compression_type::FileCompressionType;
 /// # use datafusion_datasource::file_groups::FileGroup;
 /// # use datafusion_datasource::PartitionedFile;
+/// # use datafusion_datasource::table_schema::TableSchema;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
 /// # use datafusion_common::Statistics;
 /// # use datafusion_datasource::file::FileSource;
@@ -221,25 +200,29 @@ pub struct FileScanConfig {
 /// # fn main() {
 /// # fn with_source(file_source: Arc<dyn FileSource>) {
 ///     // Create a schema for our Parquet files
-///     let schema = Arc::new(Schema::new(vec![
+///     let file_schema = Arc::new(Schema::new(vec![
 ///         Field::new("id", DataType::Int32, false),
 ///         Field::new("value", DataType::Utf8, false),
 ///     ]));
 ///
+///     // Create partition columns
+///     let partition_cols = vec![
+///         Arc::new(Field::new("date", DataType::Utf8, false)),
+///     ];
+///
+///     // Create table schema with file schema and partition columns
+///     let table_schema = TableSchema::new(file_schema, partition_cols);
+///
 ///     // Create a builder for scanning Parquet files from a local filesystem
 ///     let config = FileScanConfigBuilder::new(
 ///         ObjectStoreUrl::local_filesystem(),
-///         schema,
 ///         file_source,
 ///     )
 ///     // Set a limit of 1000 rows
 ///     .with_limit(Some(1000))
 ///     // Project only the first column
 ///     .with_projection_indices(Some(vec![0]))
-///     // Add partition columns
-///     .with_table_partition_cols(vec![
-///         Field::new("date", DataType::Utf8, false),
-///     ])
+///     .expect("Failed to push down projection")
 ///     // Add a file group with two files
 ///     .with_file_group(FileGroup::new(vec![
 ///         PartitionedFile::new("data/date=2024-01-01/file1.parquet", 1024),
@@ -255,27 +238,16 @@ pub struct FileScanConfig {
 #[derive(Clone)]
 pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
-    /// Schema information including the file schema, table partition columns,
-    /// and the combined table schema.
-    ///
-    /// This schema is used to read the files, but the file schema is **not** necessarily
-    /// the schema of the physical files. Rather this is the schema that the
-    /// physical file schema will be mapped onto, and the schema that the
-    /// [`DataSourceExec`] will return.
-    ///
-    /// [`DataSourceExec`]: crate::source::DataSourceExec
-    table_schema: TableSchema,
     file_source: Arc<dyn FileSource>,
     limit: Option<usize>,
-    projection_indices: Option<Vec<usize>>,
     constraints: Option<Constraints>,
     file_groups: Vec<FileGroup>,
     statistics: Option<Statistics>,
     output_ordering: Vec<LexOrdering>,
     file_compression_type: Option<FileCompressionType>,
-    new_lines_in_values: Option<bool>,
     batch_size: Option<usize>,
     expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+    partitioned_by_file_group: bool,
 }
 
 impl FileScanConfigBuilder {
@@ -283,27 +255,24 @@ impl FileScanConfigBuilder {
     ///
     /// # Parameters:
     /// * `object_store_url`: See [`FileScanConfig::object_store_url`]
-    /// * `file_schema`: See [`FileScanConfig::file_schema`]
-    /// * `file_source`: See [`FileScanConfig::file_source`]
+    /// * `file_source`: See [`FileScanConfig::file_source`]. The file source must have
+    ///   a schema set via its constructor.
     pub fn new(
         object_store_url: ObjectStoreUrl,
-        file_schema: SchemaRef,
         file_source: Arc<dyn FileSource>,
     ) -> Self {
         Self {
             object_store_url,
-            table_schema: TableSchema::from_file_schema(file_schema),
             file_source,
             file_groups: vec![],
             statistics: None,
             output_ordering: vec![],
             file_compression_type: None,
-            new_lines_in_values: None,
             limit: None,
-            projection_indices: None,
             constraints: None,
             batch_size: None,
             expr_adapter_factory: None,
+            partitioned_by_file_group: false,
         }
     }
 
@@ -324,7 +293,7 @@ impl FileScanConfigBuilder {
     }
 
     pub fn table_schema(&self) -> &SchemaRef {
-        self.table_schema.table_schema()
+        self.file_source.table_schema().table_schema()
     }
 
     /// Set the columns on which to project the data. Indexes that are higher than the
@@ -334,27 +303,50 @@ impl FileScanConfigBuilder {
     /// Use [`Self::with_projection_indices`] instead. This method will be removed in a future release.
     #[deprecated(since = "51.0.0", note = "Use with_projection_indices instead")]
     pub fn with_projection(self, indices: Option<Vec<usize>>) -> Self {
-        self.with_projection_indices(indices)
+        match self.clone().with_projection_indices(indices) {
+            Ok(builder) => builder,
+            Err(e) => {
+                warn!(
+                    "Failed to push down projection in FileScanConfigBuilder::with_projection: {e}"
+                );
+                self
+            }
+        }
     }
 
     /// Set the columns on which to project the data using column indices.
     ///
     /// Indexes that are higher than the number of columns of `file_schema` refer to `table_partition_cols`.
-    pub fn with_projection_indices(mut self, indices: Option<Vec<usize>>) -> Self {
-        self.projection_indices = indices;
-        self
-    }
-
-    /// Set the partitioning columns
-    pub fn with_table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
-        let table_partition_cols: Vec<FieldRef> = table_partition_cols
-            .into_iter()
-            .map(|f| Arc::new(f) as FieldRef)
-            .collect();
-        self.table_schema = self
-            .table_schema
-            .with_table_partition_cols(table_partition_cols);
-        self
+    pub fn with_projection_indices(
+        mut self,
+        indices: Option<Vec<usize>>,
+    ) -> Result<Self> {
+        let projection_exprs = indices.map(|indices| {
+            ProjectionExprs::from_indices(
+                &indices,
+                self.file_source.table_schema().table_schema(),
+            )
+        });
+        let Some(projection_exprs) = projection_exprs else {
+            return Ok(self);
+        };
+        let new_source = self
+            .file_source
+            .try_pushdown_projection(&projection_exprs)
+            .map_err(|e| {
+                internal_datafusion_err!(
+                    "Failed to push down projection in FileScanConfigBuilder::build: {e}"
+                )
+            })?;
+        if let Some(new_source) = new_source {
+            self.file_source = new_source;
+        } else {
+            internal_err!(
+                "FileSource {} does not support projection pushdown",
+                self.file_source.file_type()
+            )?;
+        }
+        Ok(self)
     }
 
     /// Set the table constraints
@@ -414,16 +406,6 @@ impl FileScanConfigBuilder {
         self
     }
 
-    /// Set whether new lines in values are supported for CSVOptions
-    ///
-    /// Parsing newlines in quoted values may be affected by execution behaviour such as
-    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
-    /// parsed successfully, which may reduce performance.
-    pub fn with_newlines_in_values(mut self, new_lines_in_values: bool) -> Self {
-        self.new_lines_in_values = Some(new_lines_in_values);
-        self
-    }
-
     /// Set the batch_size property
     pub fn with_batch_size(mut self, batch_size: Option<usize>) -> Self {
         self.batch_size = batch_size;
@@ -444,57 +426,59 @@ impl FileScanConfigBuilder {
         self
     }
 
+    /// Set whether file groups are organized by partition column values.
+    ///
+    /// When set to true, the output partitioning will be declared as Hash partitioning
+    /// on the partition columns.
+    pub fn with_partitioned_by_file_group(
+        mut self,
+        partitioned_by_file_group: bool,
+    ) -> Self {
+        self.partitioned_by_file_group = partitioned_by_file_group;
+        self
+    }
+
     /// Build the final [`FileScanConfig`] with all the configured settings.
     ///
     /// This method takes ownership of the builder and returns the constructed `FileScanConfig`.
     /// Any unset optional fields will use their default values.
+    ///
+    /// # Errors
+    /// Returns an error if projection pushdown fails or if schema operations fail.
     pub fn build(self) -> FileScanConfig {
         let Self {
             object_store_url,
-            table_schema,
             file_source,
             limit,
-            projection_indices,
             constraints,
             file_groups,
             statistics,
             output_ordering,
             file_compression_type,
-            new_lines_in_values,
             batch_size,
             expr_adapter_factory: expr_adapter,
+            partitioned_by_file_group,
         } = self;
 
         let constraints = constraints.unwrap_or_default();
-        let statistics = statistics
-            .unwrap_or_else(|| Statistics::new_unknown(table_schema.file_schema()));
-
-        let file_source = file_source
-            .with_statistics(statistics.clone())
-            .with_schema(table_schema.clone());
+        let statistics = statistics.unwrap_or_else(|| {
+            Statistics::new_unknown(file_source.table_schema().table_schema())
+        });
         let file_compression_type =
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
-        let new_lines_in_values = new_lines_in_values.unwrap_or(false);
-
-        // Convert projection indices to ProjectionExprs using the final table schema
-        // (which now includes partition columns if they were added)
-        let projection_exprs = projection_indices.map(|indices| {
-            ProjectionExprs::from_indices(&indices, table_schema.table_schema())
-        });
 
         FileScanConfig {
             object_store_url,
-            table_schema,
             file_source,
             limit,
-            projection_exprs,
             constraints,
             file_groups,
             output_ordering,
             file_compression_type,
-            new_lines_in_values,
             batch_size,
             expr_adapter_factory: expr_adapter,
+            statistics,
+            partitioned_by_file_group,
         }
     }
 }
@@ -503,20 +487,16 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
     fn from(config: FileScanConfig) -> Self {
         Self {
             object_store_url: config.object_store_url,
-            table_schema: config.table_schema,
             file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
-            statistics: config.file_source.statistics().ok(),
+            statistics: Some(config.statistics),
             output_ordering: config.output_ordering,
             file_compression_type: Some(config.file_compression_type),
-            new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
-            projection_indices: config
-                .projection_exprs
-                .map(|p| p.ordered_column_indices()),
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
+            partitioned_by_file_group: config.partitioned_by_file_group,
         }
     }
 }
@@ -532,12 +512,9 @@ impl DataSource for FileScanConfig {
             .batch_size
             .unwrap_or_else(|| context.session_config().batch_size());
 
-        let source = self
-            .file_source
-            .with_batch_size(batch_size)
-            .with_projection(self);
+        let source = self.file_source.with_batch_size(batch_size);
 
-        let opener = source.create_file_opener(object_store, self, partition);
+        let opener = source.create_file_opener(object_store, self, partition)?;
 
         let stream = FileStream::new(self, partition, opener, source.metrics())?;
         Ok(Box::pin(cooperative(stream)))
@@ -550,14 +527,40 @@ impl DataSource for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let schema = self.projected_schema();
+                let schema = self.projected_schema().map_err(|_| std::fmt::Error {})?;
                 let orderings = get_projected_output_ordering(self, &schema);
 
                 write!(f, "file_groups=")?;
                 FileGroupsDisplay(&self.file_groups).fmt_as(t, f)?;
 
                 if !schema.fields().is_empty() {
-                    write!(f, ", projection={}", ProjectSchemaDisplay(&schema))?;
+                    if let Some(projection) = self.file_source.projection() {
+                        // This matches what ProjectionExec does.
+                        // TODO: can we put this into ProjectionExprs so that it's shared code?
+                        let expr: Vec<String> = projection
+                            .as_ref()
+                            .iter()
+                            .map(|proj_expr| {
+                                if let Some(column) =
+                                    proj_expr.expr.as_any().downcast_ref::<Column>()
+                                {
+                                    if column.name() == proj_expr.alias {
+                                        column.name().to_string()
+                                    } else {
+                                        format!(
+                                            "{} as {}",
+                                            proj_expr.expr, proj_expr.alias
+                                        )
+                                    }
+                                } else {
+                                    format!("{} as {}", proj_expr.expr, proj_expr.alias)
+                                }
+                            })
+                            .collect();
+                        write!(f, ", projection=[{}]", expr.join(", "))?;
+                    } else {
+                        write!(f, ", projection={}", ProjectSchemaDisplay(&schema))?;
+                    }
                 }
 
                 if let Some(limit) = self.limit {
@@ -589,6 +592,13 @@ impl DataSource for FileScanConfig {
         repartition_file_min_size: usize,
         output_ordering: Option<LexOrdering>,
     ) -> Result<Option<Arc<dyn DataSource>>> {
+        // When files are grouped by partition values, we cannot allow byte-range
+        // splitting. It would mix rows from different partition values across
+        // file groups, breaking the Hash partitioning.
+        if self.partitioned_by_file_group {
+            return Ok(None);
+        }
+
         let source = self.file_source.repartitioned(
             target_partitions,
             repartition_file_min_size,
@@ -599,19 +609,70 @@ impl DataSource for FileScanConfig {
         Ok(source.map(|s| Arc::new(s) as _))
     }
 
+    /// Returns the output partitioning for this file scan.
+    ///
+    /// When `partitioned_by_file_group` is true, this returns `Partitioning::Hash` on
+    /// the Hive partition columns, allowing the optimizer to skip hash repartitioning
+    /// for aggregates and joins on those columns.
+    ///
+    /// Tradeoffs
+    /// - Benefit: Eliminates `RepartitionExec` and `SortExec` for queries with
+    ///   `GROUP BY` or `ORDER BY` on partition columns.
+    /// - Cost: Files are grouped by partition values rather than split by byte
+    ///   ranges, which may reduce I/O parallelism when partition sizes are uneven.
+    ///   For simple aggregations without `ORDER BY`, this cost may outweigh the benefit.
+    ///
+    /// Follow-up Work
+    /// - Idea: Could allow byte-range splitting within partition-aware groups,
+    ///   preserving I/O parallelism while maintaining partition semantics.
     fn output_partitioning(&self) -> Partitioning {
+        if self.partitioned_by_file_group {
+            let partition_cols = self.table_partition_cols();
+            if !partition_cols.is_empty() {
+                let projected_schema = match self.projected_schema() {
+                    Ok(schema) => schema,
+                    Err(_) => {
+                        debug!(
+                            "Could not get projected schema, falling back to UnknownPartitioning."
+                        );
+                        return Partitioning::UnknownPartitioning(self.file_groups.len());
+                    }
+                };
+
+                // Build Column expressions for partition columns based on their
+                // position in the projected schema
+                let mut exprs: Vec<Arc<dyn PhysicalExpr>> = Vec::new();
+                for partition_col in partition_cols {
+                    if let Some((idx, _)) = projected_schema
+                        .fields()
+                        .iter()
+                        .enumerate()
+                        .find(|(_, f)| f.name() == partition_col.name())
+                    {
+                        exprs.push(Arc::new(Column::new(partition_col.name(), idx)));
+                    }
+                }
+
+                if exprs.len() == partition_cols.len() {
+                    return Partitioning::Hash(exprs, self.file_groups.len());
+                }
+            }
+        }
         Partitioning::UnknownPartitioning(self.file_groups.len())
     }
 
     fn eq_properties(&self) -> EquivalenceProperties {
-        let (schema, constraints, _, orderings) = self.project();
-        let mut eq_properties =
-            EquivalenceProperties::new_with_orderings(Arc::clone(&schema), orderings)
-                .with_constraints(constraints);
+        let schema = self.file_source.table_schema().table_schema();
+        let mut eq_properties = EquivalenceProperties::new_with_orderings(
+            Arc::clone(schema),
+            self.output_ordering.clone(),
+        )
+        .with_constraints(self.constraints.clone());
+
         if let Some(filter) = self.file_source.filter() {
             // We need to remap column indexes to match the projected schema since that's what the equivalence properties deal with.
             // Note that this will *ignore* any non-projected columns: these don't factor into ordering / equivalence.
-            match Self::add_filter_equivalence_info(filter, &mut eq_properties, &schema) {
+            match Self::add_filter_equivalence_info(&filter, &mut eq_properties, schema) {
                 Ok(()) => {}
                 Err(e) => {
                     warn!("Failed to add filter equivalence info: {e}");
@@ -620,6 +681,24 @@ impl DataSource for FileScanConfig {
                 }
             }
         }
+
+        if let Some(projection) = self.file_source.projection() {
+            match (
+                projection.project_schema(schema),
+                projection.projection_mapping(schema),
+            ) {
+                (Ok(output_schema), Ok(mapping)) => {
+                    eq_properties =
+                        eq_properties.project(&mapping, Arc::new(output_schema));
+                }
+                (Err(e), _) | (_, Err(e)) => {
+                    warn!("Failed to project equivalence properties: {e}");
+                    #[cfg(debug_assertions)]
+                    panic!("Failed to project equivalence properties: {e}");
+                }
+            }
+        }
+
         eq_properties
     }
 
@@ -630,35 +709,30 @@ impl DataSource for FileScanConfig {
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         if let Some(partition) = partition {
             // Get statistics for a specific partition
-            if let Some(file_group) = self.file_groups.get(partition) {
-                if let Some(stat) = file_group.file_statistics(None) {
-                    // Project the statistics based on the projection
-                    let table_cols_stats = self
-                        .projection_indices()
-                        .into_iter()
-                        .map(|idx| {
-                            if idx < self.file_schema().fields().len() {
-                                stat.column_statistics[idx].clone()
-                            } else {
-                                // TODO provide accurate stat for partition column
-                                // See https://github.com/apache/datafusion/issues/1186
-                                ColumnStatistics::new_unknown()
-                            }
-                        })
-                        .collect();
-
-                    return Ok(Statistics {
-                        num_rows: stat.num_rows,
-                        total_byte_size: stat.total_byte_size,
-                        column_statistics: table_cols_stats,
-                    });
-                }
+            // Note: FileGroup statistics include partition columns (computed from partition_values)
+            if let Some(file_group) = self.file_groups.get(partition)
+                && let Some(stat) = file_group.file_statistics(None)
+            {
+                // Project the statistics based on the projection
+                let output_schema = self.projected_schema()?;
+                return if let Some(projection) = self.file_source.projection() {
+                    projection.project_statistics(stat.clone(), &output_schema)
+                } else {
+                    Ok(stat.clone())
+                };
             }
             // If no statistics available for this partition, return unknown
-            Ok(Statistics::new_unknown(&self.projected_schema()))
+            Ok(Statistics::new_unknown(self.projected_schema()?.as_ref()))
         } else {
             // Return aggregate statistics across all partitions
-            Ok(self.projected_stats())
+            let statistics = self.statistics();
+            let projection = self.file_source.projection();
+            let output_schema = self.projected_schema()?;
+            if let Some(projection) = &projection {
+                projection.project_statistics(statistics.clone(), &output_schema)
+            } else {
+                Ok(statistics)
+            }
         }
     }
 
@@ -679,43 +753,16 @@ impl DataSource for FileScanConfig {
 
     fn try_swapping_with_projection(
         &self,
-        projection: &[ProjectionExpr],
+        projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>> {
-        // This process can be moved into CsvExec, but it would be an overlap of their responsibility.
-
-        // Must be all column references, with no table partition columns (which can not be projected)
-        let partitioned_columns_in_proj = projection.iter().any(|proj_expr| {
-            proj_expr
-                .expr
-                .as_any()
-                .downcast_ref::<Column>()
-                .map(|expr| expr.index() >= self.file_schema().fields().len())
-                .unwrap_or(false)
-        });
-
-        // If there is any non-column or alias-carrier expression, Projection should not be removed.
-        let no_aliases = all_alias_free_columns(projection);
-
-        Ok((no_aliases && !partitioned_columns_in_proj).then(|| {
-            let file_scan = self.clone();
-            let source = Arc::clone(&file_scan.file_source);
-            let new_projections = new_projections_for_columns(
-                projection,
-                &file_scan
-                    .projection_exprs
-                    .as_ref()
-                    .map(|p| p.ordered_column_indices())
-                    .unwrap_or_else(|| (0..self.file_schema().fields().len()).collect()),
-            );
-
-            Arc::new(
-                FileScanConfigBuilder::from(file_scan)
-                    // Assign projected statistics to source
-                    .with_projection_indices(Some(new_projections))
-                    .with_source(source)
-                    .build(),
-            ) as _
-        }))
+        match self.file_source.try_pushdown_projection(projection)? {
+            Some(new_source) => {
+                let mut new_file_scan_config = self.clone();
+                new_file_scan_config.file_source = new_source;
+                Ok(Some(Arc::new(new_file_scan_config) as Arc<dyn DataSource>))
+            }
+            None => Ok(None),
+        }
     }
 
     fn try_pushdown_filters(
@@ -723,15 +770,49 @@ impl DataSource for FileScanConfig {
         filters: Vec<Arc<dyn PhysicalExpr>>,
         config: &ConfigOptions,
     ) -> Result<FilterPushdownPropagation<Arc<dyn DataSource>>> {
-        let result = self.file_source.try_pushdown_filters(filters, config)?;
+        // Remap filter Column indices to match the table schema (file + partition columns).
+        // This is necessary because filters may have been created against a different schema
+        // (e.g., after projection pushdown) and need to be remapped to the table schema
+        // before being passed to the file source and ultimately serialized.
+        // For example, the filter being pushed down is `c1_c2 > 5` and it was created
+        // against the output schema of the this `DataSource` which has projection `c1 + c2 as c1_c2`.
+        // Thus we need to rewrite the filter back to `c1 + c2 > 5` before passing it to the file source.
+        let table_schema = self.file_source.table_schema().table_schema();
+        // If there's a projection with aliases, first map the filters back through
+        // the projection expressions before remapping to the table schema.
+        let filters_to_remap = if let Some(projection) = self.file_source.projection() {
+            use datafusion_physical_plan::projection::update_expr;
+            filters
+                .into_iter()
+                .map(|filter| {
+                    update_expr(&filter, projection.as_ref(), true)?.ok_or_else(|| {
+                        internal_datafusion_err!(
+                            "Failed to map filter expression through projection: {}",
+                            filter
+                        )
+                    })
+                })
+                .collect::<Result<Vec<_>>>()?
+        } else {
+            filters
+        };
+        // Now remap column indices to match the table schema.
+        let remapped_filters: Result<Vec<_>> = filters_to_remap
+            .into_iter()
+            .map(|filter| reassign_expr_columns(filter, table_schema.as_ref()))
+            .collect();
+        let remapped_filters = remapped_filters?;
+
+        let result = self
+            .file_source
+            .try_pushdown_filters(remapped_filters, config)?;
         match result.updated_node {
             Some(new_file_source) => {
-                let file_scan_config = FileScanConfigBuilder::from(self.clone())
-                    .with_source(new_file_source)
-                    .build();
+                let mut new_file_scan_config = self.clone();
+                new_file_scan_config.file_source = new_file_source;
                 Ok(FilterPushdownPropagation {
                     filters: result.filters,
-                    updated_node: Some(Arc::new(file_scan_config) as _),
+                    updated_node: Some(Arc::new(new_file_scan_config) as _),
                 })
             }
             None => {
@@ -743,81 +824,73 @@ impl DataSource for FileScanConfig {
             }
         }
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn DataSource>>> {
+        // Delegate to FileSource to check if reverse scanning can satisfy the request.
+        let pushdown_result = self
+            .file_source
+            .try_reverse_output(order, &self.eq_properties())?;
+
+        match pushdown_result {
+            SortOrderPushdownResult::Exact { inner } => {
+                Ok(SortOrderPushdownResult::Exact {
+                    inner: self.rebuild_with_source(inner, true)?,
+                })
+            }
+            SortOrderPushdownResult::Inexact { inner } => {
+                Ok(SortOrderPushdownResult::Inexact {
+                    inner: self.rebuild_with_source(inner, false)?,
+                })
+            }
+            SortOrderPushdownResult::Unsupported => {
+                Ok(SortOrderPushdownResult::Unsupported)
+            }
+        }
+    }
 }
 
 impl FileScanConfig {
     /// Get the file schema (schema of the files without partition columns)
     pub fn file_schema(&self) -> &SchemaRef {
-        self.table_schema.file_schema()
+        self.file_source.table_schema().file_schema()
     }
 
     /// Get the table partition columns
     pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
-        self.table_schema.table_partition_cols()
+        self.file_source.table_schema().table_partition_cols()
     }
 
-    fn projection_indices(&self) -> Vec<usize> {
-        match &self.projection_exprs {
-            Some(proj) => proj.ordered_column_indices(),
-            None => (0..self.file_schema().fields().len()
-                + self.table_partition_cols().len())
-                .collect(),
+    /// Returns the unprojected table statistics, marking them as inexact if filters are present.
+    ///
+    /// When filters are pushed down (including pruning predicates and bloom filters),
+    /// we can't guarantee the statistics are exact because we don't know how many
+    /// rows will be filtered out.
+    pub fn statistics(&self) -> Statistics {
+        if self.file_source.filter().is_some() {
+            self.statistics.clone().to_inexact()
+        } else {
+            self.statistics.clone()
         }
     }
 
-    pub fn projected_stats(&self) -> Statistics {
-        let statistics = self.file_source.statistics().unwrap();
-
-        let table_cols_stats = self
-            .projection_indices()
-            .into_iter()
-            .map(|idx| {
-                if idx < self.file_schema().fields().len() {
-                    statistics.column_statistics[idx].clone()
-                } else {
-                    // TODO provide accurate stat for partition column (#1186)
-                    ColumnStatistics::new_unknown()
-                }
-            })
-            .collect();
-
-        Statistics {
-            num_rows: statistics.num_rows,
-            // TODO correct byte size: https://github.com/apache/datafusion/issues/14936
-            total_byte_size: statistics.total_byte_size,
-            column_statistics: table_cols_stats,
+    pub fn projected_schema(&self) -> Result<Arc<Schema>> {
+        let schema = self.file_source.table_schema().table_schema();
+        match self.file_source.projection() {
+            Some(proj) => Ok(Arc::new(proj.project_schema(schema)?)),
+            None => Ok(Arc::clone(schema)),
         }
     }
 
-    pub fn projected_schema(&self) -> Arc<Schema> {
-        let table_fields: Vec<_> = self
-            .projection_indices()
-            .into_iter()
-            .map(|idx| {
-                if idx < self.file_schema().fields().len() {
-                    self.file_schema().field(idx).clone()
-                } else {
-                    let partition_idx = idx - self.file_schema().fields().len();
-                    Arc::unwrap_or_clone(Arc::clone(
-                        &self.table_partition_cols()[partition_idx],
-                    ))
-                }
-            })
-            .collect();
-
-        Arc::new(Schema::new_with_metadata(
-            table_fields,
-            self.file_schema().metadata().clone(),
-        ))
-    }
-
     fn add_filter_equivalence_info(
-        filter: Arc<dyn PhysicalExpr>,
+        filter: &Arc<dyn PhysicalExpr>,
         eq_properties: &mut EquivalenceProperties,
         schema: &Schema,
     ) -> Result<()> {
         // Gather valid equality pairs from the filter expression
-        let equal_pairs = split_conjunction(&filter).into_iter().filter_map(|expr| {
+        let equal_pairs = split_conjunction(filter).into_iter().filter_map(|expr| {
             // Ignore any binary expressions that reference non-existent columns in the current schema
             // (e.g. due to unnecessary projections being removed)
             reassign_expr_columns(Arc::clone(expr), schema)
@@ -837,80 +910,38 @@ impl FileScanConfig {
         Ok(())
     }
 
-    pub fn projected_constraints(&self) -> Constraints {
-        let indexes = self.projection_indices();
-        self.constraints.project(&indexes).unwrap_or_default()
-    }
-
-    /// Specifies whether newlines in (quoted) values are supported.
+    /// Returns whether newlines in values are supported.
     ///
-    /// Parsing newlines in quoted values may be affected by execution behaviour such as
-    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
-    /// parsed successfully, which may reduce performance.
+    /// This method always returns `false`. The actual newlines_in_values setting
+    /// has been moved to [`CsvSource`] and should be accessed via
+    /// [`CsvSource::csv_options()`] instead.
     ///
-    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    /// [`CsvSource`]: https://docs.rs/datafusion/latest/datafusion/datasource/physical_plan/struct.CsvSource.html
+    /// [`CsvSource::csv_options()`]: https://docs.rs/datafusion/latest/datafusion/datasource/physical_plan/struct.CsvSource.html#method.csv_options
+    #[deprecated(
+        since = "52.0.0",
+        note = "newlines_in_values has moved to CsvSource. Access it via CsvSource::csv_options().newlines_in_values instead. It will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first."
+    )]
     pub fn newlines_in_values(&self) -> bool {
-        self.new_lines_in_values
-    }
-
-    /// Project the schema, constraints, and the statistics on the given column indices
-    pub fn project(&self) -> (SchemaRef, Constraints, Statistics, Vec<LexOrdering>) {
-        if self.projection_exprs.is_none() && self.table_partition_cols().is_empty() {
-            return (
-                Arc::clone(self.file_schema()),
-                self.constraints.clone(),
-                self.file_source.statistics().unwrap().clone(),
-                self.output_ordering.clone(),
-            );
-        }
-
-        let schema = self.projected_schema();
-        let constraints = self.projected_constraints();
-        let stats = self.projected_stats();
-
-        let output_ordering = get_projected_output_ordering(self, &schema);
-
-        (schema, constraints, stats, output_ordering)
+        false
     }
 
-    pub fn projected_file_column_names(&self) -> Option<Vec<String>> {
-        let fields = self.file_schema().fields();
-
-        self.projection_exprs.as_ref().map(|p| {
-            let column_indices = p.ordered_column_indices();
-
-            column_indices
-                .iter()
-                .filter(|&&col_i| col_i < fields.len())
-                .map(|&col_i| self.file_schema().field(col_i).name())
-                .cloned()
-                .collect::<Vec<_>>()
-        })
-    }
-
-    /// Projects only file schema, ignoring partition columns
-    pub fn projected_file_schema(&self) -> SchemaRef {
-        let fields = self.file_column_projection_indices().map(|indices| {
-            indices
-                .iter()
-                .map(|col_idx| self.file_schema().field(*col_idx))
-                .cloned()
-                .collect::<Vec<_>>()
-        });
-
-        fields.map_or_else(
-            || Arc::clone(self.file_schema()),
-            |f| {
-                Arc::new(Schema::new_with_metadata(
-                    f,
-                    self.file_schema().metadata.clone(),
-                ))
-            },
-        )
+    #[deprecated(
+        since = "52.0.0",
+        note = "This method is no longer used, use eq_properties instead. It will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first."
+    )]
+    pub fn projected_constraints(&self) -> Constraints {
+        let props = self.eq_properties();
+        props.constraints().clone()
     }
 
+    #[deprecated(
+        since = "52.0.0",
+        note = "This method is no longer used, use eq_properties instead. It will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first."
+    )]
     pub fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
-        self.projection_exprs.as_ref().map(|p| {
+        #[expect(deprecated)]
+        self.file_source.projection().as_ref().map(|p| {
             p.ordered_column_indices()
                 .into_iter()
                 .filter(|&i| i < self.file_schema().fields().len())
@@ -1086,6 +1117,36 @@ impl FileScanConfig {
     pub fn file_source(&self) -> &Arc<dyn FileSource> {
         &self.file_source
     }
+
+    /// Helper: Rebuild FileScanConfig with new file source
+    fn rebuild_with_source(
+        &self,
+        new_file_source: Arc<dyn FileSource>,
+        is_exact: bool,
+    ) -> Result<Arc<dyn DataSource>> {
+        let mut new_config = self.clone();
+
+        // Reverse file groups (FileScanConfig's responsibility)
+        new_config.file_groups = new_config
+            .file_groups
+            .into_iter()
+            .map(|group| {
+                let mut files = group.into_inner();
+                files.reverse();
+                files.into()
+            })
+            .collect();
+
+        new_config.file_source = new_file_source;
+
+        // Phase 1: Clear output_ordering for Inexact
+        // (we're only reversing row groups, not guaranteeing perfect ordering)
+        if !is_exact {
+            new_config.output_ordering = vec![];
+        }
+
+        Ok(Arc::new(new_config))
+    }
 }
 
 impl Debug for FileScanConfig {
@@ -1093,11 +1154,7 @@ impl Debug for FileScanConfig {
         write!(f, "FileScanConfig {{")?;
         write!(f, "object_store_url={:?}, ", self.object_store_url)?;
 
-        write!(
-            f,
-            "statistics={:?}, ",
-            self.file_source.statistics().unwrap()
-        )?;
+        write!(f, "statistics={:?}, ", self.statistics())?;
 
         DisplayAs::fmt_as(self, DisplayFormatType::Verbose, f)?;
         write!(f, "}}")
@@ -1106,7 +1163,7 @@ impl Debug for FileScanConfig {
 
 impl DisplayAs for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
-        let schema = self.projected_schema();
+        let schema = self.projected_schema().map_err(|_| std::fmt::Error {})?;
         let orderings = get_projected_output_ordering(self, &schema);
 
         write!(f, "file_groups=")?;
@@ -1130,249 +1187,19 @@ impl DisplayAs for FileScanConfig {
     }
 }
 
-/// A helper that projects partition columns into the file record batches.
-///
-/// One interesting trick is the usage of a cache for the key buffers of the partition column
-/// dictionaries. Indeed, the partition columns are constant, so the dictionaries that represent them
-/// have all their keys equal to 0. This enables us to re-use the same "all-zero" buffer across batches,
-/// which makes the space consumption of the partition columns O(batch_size) instead of O(record_count).
-pub struct PartitionColumnProjector {
-    /// An Arrow buffer initialized to zeros that represents the key array of all partition
-    /// columns (partition columns are materialized by dictionary arrays with only one
-    /// value in the dictionary, thus all the keys are equal to zero).
-    key_buffer_cache: ZeroBufferGenerators,
-    /// Mapping between the indexes in the list of partition columns and the target
-    /// schema. Sorted by index in the target schema so that we can iterate on it to
-    /// insert the partition columns in the target record batch.
-    projected_partition_indexes: Vec<(usize, usize)>,
-    /// The schema of the table once the projection was applied.
-    projected_schema: SchemaRef,
-}
-
-impl PartitionColumnProjector {
-    // Create a projector to insert the partitioning columns into batches read from files
-    // - `projected_schema`: the target schema with both file and partitioning columns
-    // - `table_partition_cols`: all the partitioning column names
-    pub fn new(projected_schema: SchemaRef, table_partition_cols: &[String]) -> Self {
-        let mut idx_map = HashMap::new();
-        for (partition_idx, partition_name) in table_partition_cols.iter().enumerate() {
-            if let Ok(schema_idx) = projected_schema.index_of(partition_name) {
-                idx_map.insert(partition_idx, schema_idx);
-            }
-        }
-
-        let mut projected_partition_indexes: Vec<_> = idx_map.into_iter().collect();
-        projected_partition_indexes.sort_by(|(_, a), (_, b)| a.cmp(b));
-
-        Self {
-            projected_partition_indexes,
-            key_buffer_cache: Default::default(),
-            projected_schema,
-        }
-    }
-
-    // Transform the batch read from the file by inserting the partitioning columns
-    // to the right positions as deduced from `projected_schema`
-    // - `file_batch`: batch read from the file, with internal projection applied
-    // - `partition_values`: the list of partition values, one for each partition column
-    pub fn project(
-        &mut self,
-        file_batch: RecordBatch,
-        partition_values: &[ScalarValue],
-    ) -> Result<RecordBatch> {
-        let expected_cols =
-            self.projected_schema.fields().len() - self.projected_partition_indexes.len();
-
-        if file_batch.columns().len() != expected_cols {
-            return exec_err!(
-                "Unexpected batch schema from file, expected {} cols but got {}",
-                expected_cols,
-                file_batch.columns().len()
-            );
-        }
-
-        let mut cols = file_batch.columns().to_vec();
-        for &(pidx, sidx) in &self.projected_partition_indexes {
-            let p_value = partition_values.get(pidx).ok_or_else(|| {
-                exec_datafusion_err!("Invalid partitioning found on disk")
-            })?;
-
-            let mut partition_value = Cow::Borrowed(p_value);
-
-            // check if user forgot to dict-encode the partition value
-            let field = self.projected_schema.field(sidx);
-            let expected_data_type = field.data_type();
-            let actual_data_type = partition_value.data_type();
-            if let DataType::Dictionary(key_type, _) = expected_data_type {
-                if !matches!(actual_data_type, DataType::Dictionary(_, _)) {
-                    warn!("Partition value for column {} was not dictionary-encoded, applied auto-fix.", field.name());
-                    partition_value = Cow::Owned(ScalarValue::Dictionary(
-                        key_type.clone(),
-                        Box::new(partition_value.as_ref().clone()),
-                    ));
-                }
-            }
-
-            cols.insert(
-                sidx,
-                create_output_array(
-                    &mut self.key_buffer_cache,
-                    partition_value.as_ref(),
-                    file_batch.num_rows(),
-                )?,
-            )
-        }
-
-        RecordBatch::try_new_with_options(
-            Arc::clone(&self.projected_schema),
-            cols,
-            &RecordBatchOptions::new().with_row_count(Some(file_batch.num_rows())),
-        )
-        .map_err(Into::into)
-    }
-}
-
-#[derive(Debug, Default)]
-struct ZeroBufferGenerators {
-    gen_i8: ZeroBufferGenerator<i8>,
-    gen_i16: ZeroBufferGenerator<i16>,
-    gen_i32: ZeroBufferGenerator<i32>,
-    gen_i64: ZeroBufferGenerator<i64>,
-    gen_u8: ZeroBufferGenerator<u8>,
-    gen_u16: ZeroBufferGenerator<u16>,
-    gen_u32: ZeroBufferGenerator<u32>,
-    gen_u64: ZeroBufferGenerator<u64>,
-}
-
-/// Generate a arrow [`Buffer`] that contains zero values.
-#[derive(Debug, Default)]
-struct ZeroBufferGenerator<T>
-where
-    T: ArrowNativeType,
-{
-    cache: Option<Buffer>,
-    _t: PhantomData<T>,
-}
-
-impl<T> ZeroBufferGenerator<T>
-where
-    T: ArrowNativeType,
-{
-    const SIZE: usize = size_of::<T>();
-
-    fn get_buffer(&mut self, n_vals: usize) -> Buffer {
-        match &mut self.cache {
-            Some(buf) if buf.len() >= n_vals * Self::SIZE => {
-                buf.slice_with_length(0, n_vals * Self::SIZE)
-            }
-            _ => {
-                let mut key_buffer_builder = BufferBuilder::<T>::new(n_vals);
-                key_buffer_builder.advance(n_vals); // keys are all 0
-                self.cache.insert(key_buffer_builder.finish()).clone()
-            }
-        }
-    }
-}
-
-fn create_dict_array<T>(
-    buffer_gen: &mut ZeroBufferGenerator<T>,
-    dict_val: &ScalarValue,
-    len: usize,
-    data_type: DataType,
-) -> Result<ArrayRef>
-where
-    T: ArrowNativeType,
-{
-    let dict_vals = dict_val.to_array()?;
-
-    let sliced_key_buffer = buffer_gen.get_buffer(len);
-
-    // assemble pieces together
-    let mut builder = ArrayData::builder(data_type)
-        .len(len)
-        .add_buffer(sliced_key_buffer);
-    builder = builder.add_child_data(dict_vals.to_data());
-    Ok(Arc::new(DictionaryArray::<UInt16Type>::from(
-        builder.build().unwrap(),
-    )))
-}
-
-fn create_output_array(
-    key_buffer_cache: &mut ZeroBufferGenerators,
-    val: &ScalarValue,
-    len: usize,
-) -> Result<ArrayRef> {
-    if let ScalarValue::Dictionary(key_type, dict_val) = &val {
-        match key_type.as_ref() {
-            DataType::Int8 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i8,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int16 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i16,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int32 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i32,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int64 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i64,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt8 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u8,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt16 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u16,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt32 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u32,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt64 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u64,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            _ => {}
-        }
-    }
-
-    val.to_array_of_size(len)
+/// Get the indices of columns in a projection if the projection is a simple
+/// list of columns.
+/// If there are any expressions other than columns, returns None.
+fn ordered_column_indices_from_projection(
+    projection: &ProjectionExprs,
+) -> Option<Vec<usize>> {
+    projection
+        .expr_iter()
+        .map(|e| {
+            let index = e.as_any().downcast_ref::<Column>()?.index();
+            Some(index)
+        })
+        .collect::<Option<Vec<usize>>>()
 }
 
 /// The various listing tables does not attempt to read all files
@@ -1450,10 +1277,15 @@ fn get_projected_output_ordering(
                 return false;
             }
 
-            let indices = base_config
-                .projection_exprs
+            let Some(indices) = base_config
+                .file_source
+                .projection()
                 .as_ref()
-                .map(|p| p.ordered_column_indices());
+                .map(|p| ordered_column_indices_from_projection(p))
+            else {
+                // Can't determine if ordered without a simple projection
+                return true;
+            };
 
             let statistics = match MinMaxStatistics::new_from_files(
                 &new_ordering,
@@ -1507,59 +1339,25 @@ pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use super::*;
+    use crate::TableSchema;
     use crate::test_util::col;
     use crate::{
         generate_test_files, test_util::MockSource, tests::aggr_test_schema,
         verify_sort_integrity,
     };
 
-    use arrow::array::{Int32Array, RecordBatch};
+    use arrow::datatypes::Field;
     use datafusion_common::stats::Precision;
-    use datafusion_common::{assert_batches_eq, internal_err};
+    use datafusion_common::{ColumnStatistics, internal_err};
     use datafusion_expr::{Operator, SortExpr};
     use datafusion_physical_expr::create_physical_sort_expr;
     use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
+    use datafusion_physical_expr::projection::ProjectionExpr;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
-    /// Returns the column names on the schema
-    pub fn columns(schema: &Schema) -> Vec<String> {
-        schema.fields().iter().map(|f| f.name().clone()).collect()
-    }
-
-    #[test]
-    fn physical_plan_config_no_projection() {
-        let file_schema = aggr_test_schema();
-        let conf = config_for_projection(
-            Arc::clone(&file_schema),
-            None,
-            Statistics::new_unknown(&file_schema),
-            to_partition_cols(vec![(
-                "date".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            )]),
-        );
-
-        let (proj_schema, _, proj_statistics, _) = conf.project();
-        assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
-        assert_eq!(
-            proj_schema.field(file_schema.fields().len()).name(),
-            "date",
-            "partition columns are the last columns"
-        );
-        assert_eq!(
-            proj_statistics.column_statistics.len(),
-            file_schema.fields().len() + 1
-        );
-        // TODO implement tests for partition column statistics once implemented
-
-        let col_names = conf.projected_file_column_names();
-        assert_eq!(col_names, None);
-
-        let col_indices = conf.file_column_projection_indices();
-        assert_eq!(col_indices, None);
-    }
-
     #[test]
     fn physical_plan_config_no_projection_tab_cols_as_field() {
         let file_schema = aggr_test_schema();
@@ -1580,7 +1378,7 @@ mod tests {
         );
 
         // verify the proj_schema includes the last column and exactly the same the field it is defined
-        let proj_schema = conf.projected_schema();
+        let proj_schema = conf.projected_schema().unwrap();
         assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
         assert_eq!(
             *proj_schema.field(file_schema.fields().len()),
@@ -1589,289 +1387,12 @@ mod tests {
         );
     }
 
-    #[test]
-    fn physical_plan_config_with_projection() {
-        let file_schema = aggr_test_schema();
-        let conf = config_for_projection(
-            Arc::clone(&file_schema),
-            Some(vec![file_schema.fields().len(), 0]),
-            Statistics {
-                num_rows: Precision::Inexact(10),
-                // assign the column index to distinct_count to help assert
-                // the source statistic after the projection
-                column_statistics: (0..file_schema.fields().len())
-                    .map(|i| ColumnStatistics {
-                        distinct_count: Precision::Inexact(i),
-                        ..Default::default()
-                    })
-                    .collect(),
-                total_byte_size: Precision::Absent,
-            },
-            to_partition_cols(vec![(
-                "date".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            )]),
-        );
-
-        let (proj_schema, _, proj_statistics, _) = conf.project();
-        assert_eq!(
-            columns(&proj_schema),
-            vec!["date".to_owned(), "c1".to_owned()]
-        );
-        let proj_stat_cols = proj_statistics.column_statistics;
-        assert_eq!(proj_stat_cols.len(), 2);
-        // TODO implement tests for proj_stat_cols[0] once partition column
-        // statistics are implemented
-        assert_eq!(proj_stat_cols[1].distinct_count, Precision::Inexact(0));
-
-        let col_names = conf.projected_file_column_names();
-        assert_eq!(col_names, Some(vec!["c1".to_owned()]));
-
-        let col_indices = conf.file_column_projection_indices();
-        assert_eq!(col_indices, Some(vec![0]));
-    }
-
-    #[test]
-    fn partition_column_projector() {
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 2]),
-            ("b", &vec![-2, -1, 0]),
-            ("c", &vec![10, 11, 12]),
-        );
-        let partition_cols = vec![
-            (
-                "year".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "month".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "day".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-        ];
-        // create a projected schema
-        let statistics = Statistics {
-            num_rows: Precision::Inexact(3),
-            total_byte_size: Precision::Absent,
-            column_statistics: Statistics::unknown_column(&file_batch.schema()),
-        };
-
-        let conf = config_for_projection(
-            file_batch.schema(),
-            // keep all cols from file and 2 from partitioning
-            Some(vec![
-                0,
-                1,
-                2,
-                file_batch.schema().fields().len(),
-                file_batch.schema().fields().len() + 2,
-            ]),
-            statistics.clone(),
-            to_partition_cols(partition_cols.clone()),
-        );
-
-        let source_statistics = conf.file_source.statistics().unwrap();
-        let conf_stats = conf.partition_statistics(None).unwrap();
-
-        // projection should be reflected in the file source statistics
-        assert_eq!(conf_stats.num_rows, Precision::Inexact(3));
-
-        // 3 original statistics + 2 partition statistics
-        assert_eq!(conf_stats.column_statistics.len(), 5);
-
-        // file statics should not be modified
-        assert_eq!(source_statistics, statistics);
-        assert_eq!(source_statistics.column_statistics.len(), 3);
-
-        let proj_schema = conf.projected_schema();
-        // created a projector for that projected schema
-        let mut proj = PartitionColumnProjector::new(
-            proj_schema,
-            &partition_cols
-                .iter()
-                .map(|x| x.0.clone())
-                .collect::<Vec<_>>(),
-        );
-
-        // project first batch
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("26")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+----+----+------+-----+",
-            "| a | b  | c  | year | day |",
-            "+---+----+----+------+-----+",
-            "| 0 | -2 | 10 | 2021 | 26  |",
-            "| 1 | -1 | 11 | 2021 | 26  |",
-            "| 2 | 0  | 12 | 2021 | 26  |",
-            "+---+----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // project another batch that is larger than the previous one
-        let file_batch = build_table_i32(
-            ("a", &vec![5, 6, 7, 8, 9]),
-            ("b", &vec![-10, -9, -8, -7, -6]),
-            ("c", &vec![12, 13, 14, 15, 16]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("27")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+-----+----+------+-----+",
-            "| a | b   | c  | year | day |",
-            "+---+-----+----+------+-----+",
-            "| 5 | -10 | 12 | 2021 | 27  |",
-            "| 6 | -9  | 13 | 2021 | 27  |",
-            "| 7 | -8  | 14 | 2021 | 27  |",
-            "| 8 | -7  | 15 | 2021 | 27  |",
-            "| 9 | -6  | 16 | 2021 | 27  |",
-            "+---+-----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // project another batch that is smaller than the previous one
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 3]),
-            ("b", &vec![2, 3, 4]),
-            ("c", &vec![4, 5, 6]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("28")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+---+---+------+-----+",
-            "| a | b | c | year | day |",
-            "+---+---+---+------+-----+",
-            "| 0 | 2 | 4 | 2021 | 28  |",
-            "| 1 | 3 | 5 | 2021 | 28  |",
-            "| 3 | 4 | 6 | 2021 | 28  |",
-            "+---+---+---+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // forgot to dictionary-wrap the scalar value
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 2]),
-            ("b", &vec![-2, -1, 0]),
-            ("c", &vec![10, 11, 12]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    ScalarValue::from("2021"),
-                    ScalarValue::from("10"),
-                    ScalarValue::from("26"),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+----+----+------+-----+",
-            "| a | b  | c  | year | day |",
-            "+---+----+----+------+-----+",
-            "| 0 | -2 | 10 | 2021 | 26  |",
-            "| 1 | -1 | 11 | 2021 | 26  |",
-            "| 2 | 0  | 12 | 2021 | 26  |",
-            "+---+----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-    }
-
-    #[test]
-    fn test_projected_file_schema_with_partition_col() {
-        let schema = aggr_test_schema();
-        let partition_cols = vec![
-            (
-                "part1".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "part2".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-        ];
-
-        // Projected file schema for config with projection including partition column
-        let projection = config_for_projection(
-            schema.clone(),
-            Some(vec![0, 3, 5, schema.fields().len()]),
-            Statistics::new_unknown(&schema),
-            to_partition_cols(partition_cols),
-        )
-        .projected_file_schema();
-
-        // Assert partition column filtered out in projected file schema
-        let expected_columns = vec!["c1", "c4", "c6"];
-        let actual_columns = projection
-            .fields()
-            .iter()
-            .map(|f| f.name().clone())
-            .collect::<Vec<_>>();
-        assert_eq!(expected_columns, actual_columns);
-    }
-
-    #[test]
-    fn test_projected_file_schema_without_projection() {
-        let schema = aggr_test_schema();
-        let partition_cols = vec![
-            (
-                "part1".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "part2".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-        ];
-
-        // Projected file schema for config without projection
-        let projection = config_for_projection(
-            schema.clone(),
-            None,
-            Statistics::new_unknown(&schema),
-            to_partition_cols(partition_cols),
-        )
-        .projected_file_schema();
-
-        // Assert projected file schema is equal to file schema
-        assert_eq!(projection.fields(), schema.fields());
-    }
-
     #[test]
     fn test_split_groups_by_statistics() -> Result<()> {
         use chrono::TimeZone;
         use datafusion_common::DFSchema;
         use datafusion_expr::execution_props::ExecutionProps;
-        use object_store::{path::Path, ObjectMeta};
+        use object_store::{ObjectMeta, path::Path};
 
         struct File {
             name: &'static str,
@@ -1972,12 +1493,16 @@ mod tests {
                     true,
                 )]),
                 files: vec![
-                    File::new_nullable("0", "2023-01-01", vec![Some((Some(0.00), Some(0.49)))]),
+                    File::new_nullable(
+                        "0",
+                        "2023-01-01",
+                        vec![Some((Some(0.00), Some(0.49)))],
+                    ),
                     File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), None))]),
                     File::new_nullable("2", "2023-01-02", vec![Some((Some(0.00), None))]),
                 ],
                 sort: vec![col("value").sort(true, false)],
-                expected_result: Ok(vec![vec!["0", "1"], vec!["2"]])
+                expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]),
             },
             TestCase {
                 name: "nullable sort columns, nulls first",
@@ -1988,11 +1513,15 @@ mod tests {
                 )]),
                 files: vec![
                     File::new_nullable("0", "2023-01-01", vec![Some((None, Some(0.49)))]),
-                    File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), Some(1.00)))]),
+                    File::new_nullable(
+                        "1",
+                        "2023-01-01",
+                        vec![Some((Some(0.50), Some(1.00)))],
+                    ),
                     File::new_nullable("2", "2023-01-02", vec![Some((None, Some(1.00)))]),
                 ],
                 sort: vec![col("value").sort(true, true)],
-                expected_result: Ok(vec![vec!["0", "1"], vec!["2"]])
+                expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]),
             },
             TestCase {
                 name: "all three non-overlapping",
@@ -2048,7 +1577,9 @@ mod tests {
                     File::new("2", "2023-01-02", vec![None]),
                 ],
                 sort: vec![col("value").sort(true, false)],
-                expected_result: Err("construct min/max statistics for split_groups_by_statistics\ncaused by\ncollect min/max values\ncaused by\nget min/max for column: 'value'\ncaused by\nError during planning: statistics not found"),
+                expected_result: Err(
+                    "construct min/max statistics for split_groups_by_statistics\ncaused by\ncollect min/max values\ncaused by\nget min/max for column: 'value'\ncaused by\nError during planning: statistics not found",
+                ),
             },
         ];
 
@@ -2178,58 +1709,40 @@ mod tests {
         statistics: Statistics,
         table_partition_cols: Vec<Field>,
     ) -> FileScanConfig {
+        let table_schema = TableSchema::new(
+            file_schema,
+            table_partition_cols.into_iter().map(Arc::new).collect(),
+        );
         FileScanConfigBuilder::new(
             ObjectStoreUrl::parse("test:///").unwrap(),
-            file_schema,
-            Arc::new(MockSource::default()),
+            Arc::new(MockSource::new(table_schema.clone())),
         )
         .with_projection_indices(projection)
+        .unwrap()
         .with_statistics(statistics)
-        .with_table_partition_cols(table_partition_cols)
         .build()
     }
 
-    /// Convert partition columns from Vec<String DataType> to Vec<Field>
-    fn to_partition_cols(table_partition_cols: Vec<(String, DataType)>) -> Vec<Field> {
-        table_partition_cols
-            .iter()
-            .map(|(name, dtype)| Field::new(name, dtype.clone(), false))
-            .collect::<Vec<_>>()
-    }
-
-    /// returns record batch with 3 columns of i32 in memory
-    pub fn build_table_i32(
-        a: (&str, &Vec<i32>),
-        b: (&str, &Vec<i32>),
-        c: (&str, &Vec<i32>),
-    ) -> RecordBatch {
-        let schema = Schema::new(vec![
-            Field::new(a.0, DataType::Int32, false),
-            Field::new(b.0, DataType::Int32, false),
-            Field::new(c.0, DataType::Int32, false),
-        ]);
-
-        RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(Int32Array::from(a.1.clone())),
-                Arc::new(Int32Array::from(b.1.clone())),
-                Arc::new(Int32Array::from(c.1.clone())),
-            ],
-        )
-        .unwrap()
-    }
-
     #[test]
     fn test_file_scan_config_builder() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
+
+        let table_schema = TableSchema::new(
+            Arc::clone(&file_schema),
+            vec![Arc::new(Field::new(
+                "date",
+                wrap_partition_type_in_dict(DataType::Utf8),
+                false,
+            ))],
+        );
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
 
         // Create a builder with required parameters
         let builder = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         );
 
@@ -2237,22 +1750,19 @@ mod tests {
         let config = builder
             .with_limit(Some(1000))
             .with_projection_indices(Some(vec![0, 1]))
-            .with_table_partition_cols(vec![Field::new(
-                "date",
-                wrap_partition_type_in_dict(DataType::Utf8),
-                false,
-            )])
+            .unwrap()
             .with_statistics(Statistics::new_unknown(&file_schema))
             .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
                 "test.parquet".to_string(),
                 1024,
             )])])
-            .with_output_ordering(vec![[PhysicalSortExpr::new_default(Arc::new(
-                Column::new("date", 0),
-            ))]
-            .into()])
+            .with_output_ordering(vec![
+                [PhysicalSortExpr::new_default(Arc::new(Column::new(
+                    "date", 0,
+                )))]
+                .into(),
+            ])
             .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
-            .with_newlines_in_values(true)
             .build();
 
         // Verify the built config has all the expected values
@@ -2260,7 +1770,11 @@ mod tests {
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, Some(1000));
         assert_eq!(
-            config.projection_exprs.as_ref().map(|p| p.column_indices()),
+            config
+                .file_source
+                .projection()
+                .as_ref()
+                .map(|p| p.column_indices()),
             Some(vec![0, 1])
         );
         assert_eq!(config.table_partition_cols().len(), 1);
@@ -2275,7 +1789,6 @@ mod tests {
             config.file_compression_type,
             FileCompressionType::UNCOMPRESSED
         );
-        assert!(config.new_lines_in_values);
         assert_eq!(config.output_ordering.len(), 1);
     }
 
@@ -2283,29 +1796,34 @@ mod tests {
     fn equivalence_properties_after_schema_change() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
+
+        let table_schema = TableSchema::new(Arc::clone(&file_schema), vec![]);
+
         // Create a file source with a filter
-        let file_source: Arc<dyn FileSource> =
-            Arc::new(MockSource::default().with_filter(Arc::new(BinaryExpr::new(
+        let file_source: Arc<dyn FileSource> = Arc::new(
+            MockSource::new(table_schema.clone()).with_filter(Arc::new(BinaryExpr::new(
                 col("c2", &file_schema).unwrap(),
                 Operator::Eq,
                 Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
-            ))));
+            ))),
+        );
 
         let config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         )
         .with_projection_indices(Some(vec![0, 1, 2]))
+        .unwrap()
         .build();
 
         // Simulate projection being updated. Since the filter has already been pushed down,
         // the new projection won't include the filtered column.
+        let exprs = ProjectionExprs::new(vec![ProjectionExpr::new(
+            col("c1", &file_schema).unwrap(),
+            "c1",
+        )]);
         let data_source = config
-            .try_swapping_with_projection(&[ProjectionExpr::new(
-                col("c3", &file_schema).unwrap(),
-                "c3".to_string(),
-            )])
+            .try_swapping_with_projection(&exprs)
             .unwrap()
             .unwrap();
 
@@ -2331,12 +1849,15 @@ mod tests {
     fn test_file_scan_config_builder_defaults() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
+
+        let table_schema = TableSchema::new(Arc::clone(&file_schema), vec![]);
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
 
         // Create a builder with only required parameters and build without any additional configurations
         let config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         )
         .build();
@@ -2345,9 +1866,16 @@ mod tests {
         assert_eq!(config.object_store_url, object_store_url);
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, None);
+        // When no projection is specified, the file source should have an unprojected projection
+        // (i.e., all columns)
+        let expected_projection: Vec<usize> = (0..file_schema.fields().len()).collect();
         assert_eq!(
-            config.projection_exprs.as_ref().map(|p| p.column_indices()),
-            None
+            config
+                .file_source
+                .projection()
+                .as_ref()
+                .map(|p| p.column_indices()),
+            Some(expected_projection)
         );
         assert!(config.table_partition_cols().is_empty());
         assert!(config.file_groups.is_empty());
@@ -2355,29 +1883,17 @@ mod tests {
             config.file_compression_type,
             FileCompressionType::UNCOMPRESSED
         );
-        assert!(!config.new_lines_in_values);
         assert!(config.output_ordering.is_empty());
         assert!(config.constraints.is_empty());
 
         // Verify statistics are set to unknown
+        assert_eq!(config.statistics().num_rows, Precision::Absent);
+        assert_eq!(config.statistics().total_byte_size, Precision::Absent);
         assert_eq!(
-            config.file_source.statistics().unwrap().num_rows,
-            Precision::Absent
-        );
-        assert_eq!(
-            config.file_source.statistics().unwrap().total_byte_size,
-            Precision::Absent
-        );
-        assert_eq!(
-            config
-                .file_source
-                .statistics()
-                .unwrap()
-                .column_statistics
-                .len(),
+            config.statistics().column_statistics.len(),
             file_schema.fields().len()
         );
-        for stat in config.file_source.statistics().unwrap().column_statistics {
+        for stat in config.statistics().column_statistics {
             assert_eq!(stat.distinct_count, Precision::Absent);
             assert_eq!(stat.min_value, Precision::Absent);
             assert_eq!(stat.max_value, Precision::Absent);
@@ -2389,7 +1905,6 @@ mod tests {
     fn test_file_scan_config_builder_new_from() {
         let schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
         let partition_cols = vec![Field::new(
             "date",
             wrap_partition_type_in_dict(DataType::Utf8),
@@ -2397,18 +1912,24 @@ mod tests {
         )];
         let file = PartitionedFile::new("test_file.parquet", 100);
 
+        let table_schema = TableSchema::new(
+            Arc::clone(&schema),
+            partition_cols.iter().map(|f| Arc::new(f.clone())).collect(),
+        );
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
+
         // Create a config with non-default values
         let original_config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&schema),
             Arc::clone(&file_source),
         )
         .with_projection_indices(Some(vec![0, 2]))
+        .unwrap()
         .with_limit(Some(10))
-        .with_table_partition_cols(partition_cols.clone())
         .with_file(file.clone())
         .with_constraints(Constraints::default())
-        .with_newlines_in_values(true)
         .build();
 
         // Create a new builder from the config
@@ -2423,7 +1944,8 @@ mod tests {
         assert_eq!(*new_config.file_schema(), schema);
         assert_eq!(
             new_config
-                .projection_exprs
+                .file_source
+                .projection()
                 .as_ref()
                 .map(|p| p.column_indices()),
             Some(vec![0, 2])
@@ -2437,7 +1959,6 @@ mod tests {
             "test_file.parquet"
         );
         assert_eq!(new_config.constraints, Constraints::default());
-        assert!(new_config.new_lines_in_values);
     }
 
     #[test]
@@ -2640,13 +2161,15 @@ mod tests {
         let file_group = FileGroup::new(vec![PartitionedFile::new("test.parquet", 1024)])
             .with_statistics(Arc::new(file_group_stats));
 
+        let table_schema = TableSchema::new(Arc::clone(&schema), vec![]);
+
         // Create a FileScanConfig with projection: only keep columns 0 and 2
         let config = FileScanConfigBuilder::new(
             ObjectStoreUrl::parse("test:///").unwrap(),
-            Arc::clone(&schema),
-            Arc::new(MockSource::default()),
+            Arc::new(MockSource::new(table_schema.clone())),
         )
-        .with_projection_indices(Some(vec![0, 2])) // Only project columns 0 and 2
+        .with_projection_indices(Some(vec![0, 2]))
+        .unwrap() // Only project columns 0 and 2
         .with_file_groups(vec![file_group])
         .build();
 
@@ -2676,8 +2199,111 @@ mod tests {
             "Second projected column should be col2 with 10 nulls"
         );
 
-        // Verify row count and byte size are preserved
+        // Verify row count and byte size
         assert_eq!(partition_stats.num_rows, Precision::Exact(100));
-        assert_eq!(partition_stats.total_byte_size, Precision::Exact(1024));
+        assert_eq!(partition_stats.total_byte_size, Precision::Exact(800));
+    }
+
+    #[test]
+    fn test_output_partitioning_not_partitioned_by_file_group() {
+        let file_schema = aggr_test_schema();
+        let partition_col =
+            Field::new("date", wrap_partition_type_in_dict(DataType::Utf8), false);
+
+        let config = config_for_projection(
+            Arc::clone(&file_schema),
+            None,
+            Statistics::new_unknown(&file_schema),
+            vec![partition_col],
+        );
+
+        // partitioned_by_file_group defaults to false
+        let partitioning = config.output_partitioning();
+        assert!(matches!(partitioning, Partitioning::UnknownPartitioning(_)));
+    }
+
+    #[test]
+    fn test_output_partitioning_no_partition_columns() {
+        let file_schema = aggr_test_schema();
+        let mut config = config_for_projection(
+            Arc::clone(&file_schema),
+            None,
+            Statistics::new_unknown(&file_schema),
+            vec![], // No partition columns
+        );
+        config.partitioned_by_file_group = true;
+
+        let partitioning = config.output_partitioning();
+        assert!(matches!(partitioning, Partitioning::UnknownPartitioning(_)));
+    }
+
+    #[test]
+    fn test_output_partitioning_with_partition_columns() {
+        let file_schema = aggr_test_schema();
+
+        // Test single partition column
+        let single_partition_col = vec![Field::new(
+            "date",
+            wrap_partition_type_in_dict(DataType::Utf8),
+            false,
+        )];
+
+        let mut config = config_for_projection(
+            Arc::clone(&file_schema),
+            None,
+            Statistics::new_unknown(&file_schema),
+            single_partition_col,
+        );
+        config.partitioned_by_file_group = true;
+        config.file_groups = vec![
+            FileGroup::new(vec![PartitionedFile::new("f1.parquet".to_string(), 1024)]),
+            FileGroup::new(vec![PartitionedFile::new("f2.parquet".to_string(), 1024)]),
+            FileGroup::new(vec![PartitionedFile::new("f3.parquet".to_string(), 1024)]),
+        ];
+
+        let partitioning = config.output_partitioning();
+        match partitioning {
+            Partitioning::Hash(exprs, num_partitions) => {
+                assert_eq!(num_partitions, 3);
+                assert_eq!(exprs.len(), 1);
+                assert_eq!(
+                    exprs[0].as_any().downcast_ref::<Column>().unwrap().name(),
+                    "date"
+                );
+            }
+            _ => panic!("Expected Hash partitioning"),
+        }
+
+        // Test multiple partition columns
+        let multiple_partition_cols = vec![
+            Field::new("year", wrap_partition_type_in_dict(DataType::Utf8), false),
+            Field::new("month", wrap_partition_type_in_dict(DataType::Utf8), false),
+        ];
+
+        config = config_for_projection(
+            Arc::clone(&file_schema),
+            None,
+            Statistics::new_unknown(&file_schema),
+            multiple_partition_cols,
+        );
+        config.partitioned_by_file_group = true;
+        config.file_groups = vec![
+            FileGroup::new(vec![PartitionedFile::new("f1.parquet".to_string(), 1024)]),
+            FileGroup::new(vec![PartitionedFile::new("f2.parquet".to_string(), 1024)]),
+        ];
+
+        let partitioning = config.output_partitioning();
+        match partitioning {
+            Partitioning::Hash(exprs, num_partitions) => {
+                assert_eq!(num_partitions, 2);
+                assert_eq!(exprs.len(), 2);
+                let col_names: Vec<_> = exprs
+                    .iter()
+                    .map(|e| e.as_any().downcast_ref::<Column>().unwrap().name())
+                    .collect();
+                assert_eq!(col_names, vec!["year", "month"]);
+            }
+            _ => panic!("Expected Hash partitioning"),
+        }
     }
 }
diff --git a/datafusion/datasource/src/file_sink_config.rs b/datafusion/datasource/src/file_sink_config.rs
index 2968bd1ee0449..643831a1199f8 100644
--- a/datafusion/datasource/src/file_sink_config.rs
+++ b/datafusion/datasource/src/file_sink_config.rs
@@ -17,10 +17,10 @@
 
 use std::sync::Arc;
 
+use crate::ListingTableUrl;
 use crate::file_groups::FileGroup;
 use crate::sink::DataSink;
-use crate::write::demux::{start_demuxer_task, DemuxedStreamReceiver};
-use crate::ListingTableUrl;
+use crate::write::demux::{DemuxedStreamReceiver, start_demuxer_task};
 
 use arrow::datatypes::{DataType, SchemaRef};
 use datafusion_common::Result;
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index a4a43ca9aeab3..c8090382094ef 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -27,8 +27,8 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::file_scan_config::{FileScanConfig, PartitionColumnProjector};
 use crate::PartitionedFile;
+use crate::file_scan_config::FileScanConfig;
 use arrow::datatypes::SchemaRef;
 use datafusion_common::error::Result;
 use datafusion_execution::RecordBatchStream;
@@ -38,11 +38,10 @@ use datafusion_physical_plan::metrics::{
 
 use arrow::record_batch::RecordBatch;
 use datafusion_common::instant::Instant;
-use datafusion_common::ScalarValue;
 
 use futures::future::BoxFuture;
 use futures::stream::BoxStream;
-use futures::{ready, FutureExt as _, Stream, StreamExt as _};
+use futures::{FutureExt as _, Stream, StreamExt as _, ready};
 
 /// A stream that iterates record batch by record batch, file over file.
 pub struct FileStream {
@@ -56,8 +55,6 @@ pub struct FileStream {
     /// A dynamic [`FileOpener`]. Calling `open()` returns a [`FileOpenFuture`],
     /// which can be resolved to a stream of `RecordBatch`.
     file_opener: Arc<dyn FileOpener>,
-    /// The partition column projector
-    pc_projector: PartitionColumnProjector,
     /// The stream state
     state: FileStreamState,
     /// File stream specific metrics
@@ -76,15 +73,7 @@ impl FileStream {
         file_opener: Arc<dyn FileOpener>,
         metrics: &ExecutionPlanMetricsSet,
     ) -> Result<Self> {
-        let projected_schema = config.projected_schema();
-        let pc_projector = PartitionColumnProjector::new(
-            Arc::clone(&projected_schema),
-            &config
-                .table_partition_cols()
-                .iter()
-                .map(|x| x.name().clone())
-                .collect::<Vec<_>>(),
-        );
+        let projected_schema = config.projected_schema()?;
 
         let file_group = config.file_groups[partition].clone();
 
@@ -93,7 +82,6 @@ impl FileStream {
             projected_schema,
             remain: config.limit,
             file_opener,
-            pc_projector,
             state: FileStreamState::Idle,
             file_stream_metrics: FileStreamMetrics::new(metrics, partition),
             baseline_metrics: BaselineMetrics::new(metrics, partition),
@@ -114,15 +102,9 @@ impl FileStream {
     ///
     /// Since file opening is mostly IO (and may involve a
     /// bunch of sequential IO), it can be parallelized with decoding.
-    fn start_next_file(&mut self) -> Option<Result<(FileOpenFuture, Vec<ScalarValue>)>> {
+    fn start_next_file(&mut self) -> Option<Result<FileOpenFuture>> {
         let part_file = self.file_iter.pop_front()?;
-
-        let partition_values = part_file.partition_values.clone();
-        Some(
-            self.file_opener
-                .open(part_file)
-                .map(|future| (future, partition_values)),
-        )
+        Some(self.file_opener.open(part_file))
     }
 
     fn poll_inner(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<RecordBatch>>> {
@@ -132,12 +114,7 @@ impl FileStream {
                     self.file_stream_metrics.time_opening.start();
 
                     match self.start_next_file().transpose() {
-                        Ok(Some((future, partition_values))) => {
-                            self.state = FileStreamState::Open {
-                                future,
-                                partition_values,
-                            }
-                        }
+                        Ok(Some(future)) => self.state = FileStreamState::Open { future },
                         Ok(None) => return Poll::Ready(None),
                         Err(e) => {
                             self.state = FileStreamState::Error;
@@ -145,13 +122,8 @@ impl FileStream {
                         }
                     }
                 }
-                FileStreamState::Open {
-                    future,
-                    partition_values,
-                } => match ready!(future.poll_unpin(cx)) {
+                FileStreamState::Open { future } => match ready!(future.poll_unpin(cx)) {
                     Ok(reader) => {
-                        let partition_values = mem::take(partition_values);
-
                         // include time needed to start opening in `start_next_file`
                         self.file_stream_metrics.time_opening.stop();
                         let next = self.start_next_file().transpose();
@@ -159,22 +131,14 @@ impl FileStream {
                         self.file_stream_metrics.time_scanning_total.start();
 
                         match next {
-                            Ok(Some((next_future, next_partition_values))) => {
+                            Ok(Some(next_future)) => {
                                 self.state = FileStreamState::Scan {
-                                    partition_values,
                                     reader,
-                                    next: Some((
-                                        NextOpen::Pending(next_future),
-                                        next_partition_values,
-                                    )),
+                                    next: Some(NextOpen::Pending(next_future)),
                                 };
                             }
                             Ok(None) => {
-                                self.state = FileStreamState::Scan {
-                                    reader,
-                                    partition_values,
-                                    next: None,
-                                };
+                                self.state = FileStreamState::Scan { reader, next: None };
                             }
                             Err(e) => {
                                 self.state = FileStreamState::Error;
@@ -196,48 +160,34 @@ impl FileStream {
                         }
                     }
                 },
-                FileStreamState::Scan {
-                    reader,
-                    partition_values,
-                    next,
-                } => {
+                FileStreamState::Scan { reader, next } => {
                     // We need to poll the next `FileOpenFuture` here to drive it forward
-                    if let Some((next_open_future, _)) = next {
-                        if let NextOpen::Pending(f) = next_open_future {
-                            if let Poll::Ready(reader) = f.as_mut().poll(cx) {
-                                *next_open_future = NextOpen::Ready(reader);
-                            }
-                        }
+                    if let Some(next_open_future) = next
+                        && let NextOpen::Pending(f) = next_open_future
+                        && let Poll::Ready(reader) = f.as_mut().poll(cx)
+                    {
+                        *next_open_future = NextOpen::Ready(reader);
                     }
                     match ready!(reader.poll_next_unpin(cx)) {
                         Some(Ok(batch)) => {
                             self.file_stream_metrics.time_scanning_until_data.stop();
                             self.file_stream_metrics.time_scanning_total.stop();
-                            let result = self
-                                .pc_projector
-                                .project(batch, partition_values)
-                                .map(|batch| match &mut self.remain {
-                                    Some(remain) => {
-                                        if *remain > batch.num_rows() {
-                                            *remain -= batch.num_rows();
-                                            batch
-                                        } else {
-                                            let batch = batch.slice(0, *remain);
-                                            self.state = FileStreamState::Limit;
-                                            *remain = 0;
-                                            batch
-                                        }
+                            let batch = match &mut self.remain {
+                                Some(remain) => {
+                                    if *remain > batch.num_rows() {
+                                        *remain -= batch.num_rows();
+                                        batch
+                                    } else {
+                                        let batch = batch.slice(0, *remain);
+                                        self.state = FileStreamState::Limit;
+                                        *remain = 0;
+                                        batch
                                     }
-                                    None => batch,
-                                });
-
-                            if result.is_err() {
-                                // If the partition value projection fails, this is not governed by
-                                // the `OnError` behavior
-                                self.state = FileStreamState::Error
-                            }
+                                }
+                                None => batch,
+                            };
                             self.file_stream_metrics.time_scanning_total.start();
-                            return Poll::Ready(Some(result));
+                            return Poll::Ready(Some(Ok(batch)));
                         }
                         Some(Err(err)) => {
                             self.file_stream_metrics.file_scan_errors.add(1);
@@ -247,22 +197,19 @@ impl FileStream {
                             match self.on_error {
                                 // If `OnError::Skip` we skip the file as soon as we hit the first error
                                 OnError::Skip => match mem::take(next) {
-                                    Some((future, partition_values)) => {
+                                    Some(future) => {
                                         self.file_stream_metrics.time_opening.start();
 
                                         match future {
                                             NextOpen::Pending(future) => {
-                                                self.state = FileStreamState::Open {
-                                                    future,
-                                                    partition_values,
-                                                }
+                                                self.state =
+                                                    FileStreamState::Open { future }
                                             }
                                             NextOpen::Ready(reader) => {
                                                 self.state = FileStreamState::Open {
                                                     future: Box::pin(std::future::ready(
                                                         reader,
                                                     )),
-                                                    partition_values,
                                                 }
                                             }
                                         }
@@ -280,22 +227,18 @@ impl FileStream {
                             self.file_stream_metrics.time_scanning_total.stop();
 
                             match mem::take(next) {
-                                Some((future, partition_values)) => {
+                                Some(future) => {
                                     self.file_stream_metrics.time_opening.start();
 
                                     match future {
                                         NextOpen::Pending(future) => {
-                                            self.state = FileStreamState::Open {
-                                                future,
-                                                partition_values,
-                                            }
+                                            self.state = FileStreamState::Open { future }
                                         }
                                         NextOpen::Ready(reader) => {
                                             self.state = FileStreamState::Open {
                                                 future: Box::pin(std::future::ready(
                                                     reader,
                                                 )),
-                                                partition_values,
                                             }
                                         }
                                     }
@@ -306,7 +249,7 @@ impl FileStream {
                     }
                 }
                 FileStreamState::Error | FileStreamState::Limit => {
-                    return Poll::Ready(None)
+                    return Poll::Ready(None);
                 }
             }
         }
@@ -373,21 +316,16 @@ pub enum FileStreamState {
     Open {
         /// A [`FileOpenFuture`] returned by [`FileOpener::open`]
         future: FileOpenFuture,
-        /// The partition values for this file
-        partition_values: Vec<ScalarValue>,
     },
     /// Scanning the [`BoxStream`] returned by the completion of a [`FileOpenFuture`]
     /// returned by [`FileOpener::open`]
     Scan {
-        /// Partitioning column values for the current batch_iter
-        partition_values: Vec<ScalarValue>,
         /// The reader instance
         reader: BoxStream<'static, Result<RecordBatch>>,
-        /// A [`FileOpenFuture`] for the next file to be processed,
-        /// and its corresponding partition column values, if any.
+        /// A [`FileOpenFuture`] for the next file to be processed.
         /// This allows the next file to be opened in parallel while the
         /// current file is read.
-        next: Option<(NextOpen, Vec<ScalarValue>)>,
+        next: Option<NextOpen>,
     },
     /// Encountered an error
     Error,
@@ -415,7 +353,6 @@ impl StartableTime {
     }
 }
 
-#[allow(rustdoc::broken_intra_doc_links)]
 /// Metrics for [`FileStream`]
 ///
 /// Note that all of these metrics are in terms of wall clock time
@@ -509,15 +446,15 @@ impl FileStreamMetrics {
 
 #[cfg(test)]
 mod tests {
+    use crate::PartitionedFile;
     use crate::file_scan_config::FileScanConfigBuilder;
     use crate::tests::make_partition;
-    use crate::PartitionedFile;
     use datafusion_common::error::Result;
     use datafusion_execution::object_store::ObjectStoreUrl;
     use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
     use futures::{FutureExt as _, StreamExt as _};
-    use std::sync::atomic::{AtomicUsize, Ordering};
     use std::sync::Arc;
+    use std::sync::atomic::{AtomicUsize, Ordering};
 
     use crate::file_stream::{FileOpenFuture, FileOpener, FileStream, OnError};
     use crate::test_util::MockSource;
@@ -639,10 +576,10 @@ mod tests {
 
             let on_error = self.on_error;
 
+            let table_schema = crate::table_schema::TableSchema::new(file_schema, vec![]);
             let config = FileScanConfigBuilder::new(
                 ObjectStoreUrl::parse("test:///").unwrap(),
-                file_schema,
-                Arc::new(MockSource::default()),
+                Arc::new(MockSource::new(table_schema)),
             )
             .with_file_group(file_group)
             .with_limit(self.limit)
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index 7d5c8c4834ead..1d12bb3200309 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -29,18 +29,21 @@ use crate::source::{DataSource, DataSourceExec};
 
 use arrow::array::{RecordBatch, RecordBatchOptions};
 use arrow::datatypes::{Schema, SchemaRef};
-use datafusion_common::{internal_err, plan_err, project_schema, Result, ScalarValue};
+use datafusion_common::{
+    Result, ScalarValue, assert_or_internal_err, plan_err, project_schema,
+};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::project_orderings;
+use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 use datafusion_physical_plan::memory::MemoryStream;
 use datafusion_physical_plan::projection::{
-    all_alias_free_columns, new_projections_for_columns, ProjectionExpr,
+    all_alias_free_columns, new_projections_for_columns,
 };
 use datafusion_physical_plan::{
-    common, ColumnarValue, DisplayAs, DisplayFormatType, Partitioning, PhysicalExpr,
-    SendableRecordBatchStream, Statistics,
+    ColumnarValue, DisplayAs, DisplayFormatType, Partitioning, PhysicalExpr,
+    SendableRecordBatchStream, Statistics, common,
 };
 
 use async_trait::async_trait;
@@ -117,10 +120,10 @@ impl DataSource for MemorySourceConfig {
                     .map_or(String::new(), |limit| format!(", fetch={limit}"));
                 if self.show_sizes {
                     write!(
-                                f,
-                                "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}",
-                                partition_sizes.len(),
-                            )
+                        f,
+                        "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}",
+                        partition_sizes.len(),
+                    )
                 } else {
                     write!(
                         f,
@@ -227,15 +230,16 @@ impl DataSource for MemorySourceConfig {
 
     fn try_swapping_with_projection(
         &self,
-        projection: &[ProjectionExpr],
+        projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>> {
         // If there is any non-column or alias-carrier expression, Projection should not be removed.
         // This process can be moved into MemoryExec, but it would be an overlap of their responsibility.
-        all_alias_free_columns(projection)
+        let exprs = projection.iter().cloned().collect_vec();
+        all_alias_free_columns(exprs.as_slice())
             .then(|| {
                 let all_projections = (0..self.schema.fields().len()).collect();
                 let new_projections = new_projections_for_columns(
-                    projection,
+                    &exprs,
                     self.projection().as_ref().unwrap_or(&all_projections),
                 );
 
@@ -282,6 +286,7 @@ impl MemorySourceConfig {
     }
 
     /// Create a new execution plan from a list of constant values (`ValuesExec`)
+    #[expect(clippy::needless_pass_by_value)]
     pub fn try_new_as_values(
         schema: SchemaRef,
         data: Vec<Vec<Arc<dyn PhysicalExpr>>>,
@@ -339,6 +344,7 @@ impl MemorySourceConfig {
     ///
     /// Errors if any of the batches don't match the provided schema, or if no
     /// batches are provided.
+    #[expect(clippy::needless_pass_by_value)]
     pub fn try_new_from_batches(
         schema: SchemaRef,
         batches: Vec<RecordBatch>,
@@ -438,12 +444,11 @@ impl MemorySourceConfig {
                     .map(|field| field.name() != col.name())
                     .unwrap_or(true)
             });
-        if let Some(col) = ambiguous_column {
-            return internal_err!(
-                "Column {:?} is not found in the original schema of the MemorySourceConfig",
-                col
-            );
-        }
+        assert_or_internal_err!(
+            ambiguous_column.is_none(),
+            "Column {:?} is not found in the original schema of the MemorySourceConfig",
+            ambiguous_column.as_ref().unwrap()
+        );
 
         // If there is a projection on the source, we also need to project orderings
         if self.projection.is_some() {
@@ -943,10 +948,9 @@ mod tests {
             vec![lit(ScalarValue::Null)],
         ];
         let rows = data.len();
-        let values = MemorySourceConfig::try_new_as_values(
-            Arc::new(Schema::new(vec![Field::new("col0", DataType::Null, true)])),
-            data,
-        )?;
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("col0", DataType::Null, true)]));
+        let values = MemorySourceConfig::try_new_as_values(schema, data)?;
 
         assert_eq!(
             values.partition_statistics(None)?,
@@ -959,6 +963,7 @@ mod tests {
                     max_value: Precision::Absent,
                     min_value: Precision::Absent,
                     sum_value: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },],
             }
         );
@@ -1079,8 +1084,7 @@ mod tests {
         let actual = partitioned_datasrc
             .map(|datasrc| datasrc.output_partitioning().partition_count());
         assert_eq!(
-            actual,
-            partition_cnt,
+            actual, partition_cnt,
             "partitioned datasrc does not match expected, we expected {should_exist}, instead found {actual:?}"
         );
     }
@@ -1266,8 +1270,8 @@ mod tests {
     }
 
     #[test]
-    fn test_repartition_no_sort_information_no_output_ordering_lopsized_batches(
-    ) -> Result<()> {
+    fn test_repartition_no_sort_information_no_output_ordering_lopsized_batches()
+    -> Result<()> {
         let no_sort = vec![];
         let no_output_ordering = None;
 
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index 8d988bdb31be7..347e783c278d0 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 //! A table that uses the `ObjectStore` listing capability
 //! to get the list of files to process.
@@ -37,6 +39,7 @@ pub mod file_scan_config;
 pub mod file_sink_config;
 pub mod file_stream;
 pub mod memory;
+pub mod projection;
 pub mod schema_adapter;
 pub mod sink;
 pub mod source;
@@ -53,14 +56,14 @@ pub use self::url::ListingTableUrl;
 use crate::file_groups::FileGroup;
 use chrono::TimeZone;
 use datafusion_common::stats::Precision;
-use datafusion_common::{exec_datafusion_err, ColumnStatistics, Result};
+use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err};
 use datafusion_common::{ScalarValue, Statistics};
 use futures::{Stream, StreamExt};
-use object_store::{path::Path, ObjectMeta};
 use object_store::{GetOptions, GetRange, ObjectStore};
+use object_store::{ObjectMeta, path::Path};
 pub use table_schema::TableSchema;
 // Remove when add_row_stats is remove
-#[allow(deprecated)]
+#[expect(deprecated)]
 pub use statistics::add_row_stats;
 pub use statistics::compute_all_files_statistics;
 use std::ops::Range;
@@ -92,6 +95,19 @@ impl FileRange {
 #[derive(Debug, Clone)]
 /// A single file or part of a file that should be read, along with its schema, statistics
 /// and partition column values that need to be appended to each row.
+///
+/// # Statistics
+///
+/// The [`Self::statistics`] field contains statistics for the **full table schema**,
+/// which includes both file columns and partition columns. When statistics are set via
+/// [`Self::with_statistics`], exact statistics for partition columns are automatically
+/// computed from [`Self::partition_values`]:
+///
+/// - `min = max = partition_value` (all rows in a file share the same partition value)
+/// - `null_count = 0` (partition values extracted from paths are never null)
+/// - `distinct_count = 1` (single distinct value per file for each partition column)
+///
+/// This enables query optimizers to use partition column bounds for pruning and planning.
 pub struct PartitionedFile {
     /// Path for the file (e.g. URL, filesystem path, etc)
     pub object_meta: ObjectMeta,
@@ -112,6 +128,10 @@ pub struct PartitionedFile {
     ///
     /// DataFusion relies on these statistics for planning (in particular to sort file groups),
     /// so if they are incorrect, incorrect answers may result.
+    ///
+    /// These statistics cover the full table schema: file columns plus partition columns.
+    /// When set via [`Self::with_statistics`], partition column statistics are automatically
+    /// computed from [`Self::partition_values`] with exact min/max/null_count/distinct_count.
     pub statistics: Option<Arc<Statistics>>,
     /// An optional field for user defined per object metadata
     pub extensions: Option<Arc<dyn std::any::Any + Send + Sync>>,
@@ -157,6 +177,24 @@ impl PartitionedFile {
         .with_range(start, end)
     }
 
+    /// Size of the file to be scanned (taking into account the range, if present).
+    pub fn effective_size(&self) -> u64 {
+        if let Some(range) = &self.range {
+            (range.end - range.start) as u64
+        } else {
+            self.object_meta.size
+        }
+    }
+
+    /// Effective range of the file to be scanned.
+    pub fn range(&self) -> (u64, u64) {
+        if let Some(range) = &self.range {
+            (range.start as u64, range.end as u64)
+        } else {
+            (0, self.object_meta.size)
+        }
+    }
+
     /// Provide a hint to the size of the file metadata. If a hint is provided
     /// the reader will try and fetch the last `size_hint` bytes of the parquet file optimistically.
     /// Without an appropriate hint, two read may be required to fetch the metadata.
@@ -193,9 +231,38 @@ impl PartitionedFile {
         self
     }
 
-    // Update the statistics for this file.
-    pub fn with_statistics(mut self, statistics: Arc<Statistics>) -> Self {
-        self.statistics = Some(statistics);
+    /// Update the statistics for this file.
+    ///
+    /// The provided `statistics` should cover only the file schema columns.
+    /// This method will automatically append exact statistics for partition columns
+    /// based on `partition_values`:
+    /// - `min = max = partition_value` (all rows have the same value)
+    /// - `null_count = 0` (partition values from paths are never null)
+    /// - `distinct_count = 1` (all rows have the same partition value)
+    pub fn with_statistics(mut self, file_statistics: Arc<Statistics>) -> Self {
+        if self.partition_values.is_empty() {
+            // No partition columns, use stats as-is
+            self.statistics = Some(file_statistics);
+        } else {
+            // Extend stats with exact partition column statistics
+            let mut stats = Arc::unwrap_or_clone(file_statistics);
+            for partition_value in &self.partition_values {
+                let col_stats = ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(partition_value.clone()),
+                    min_value: Precision::Exact(partition_value.clone()),
+                    distinct_count: Precision::Exact(1),
+                    sum_value: Precision::Absent,
+                    byte_size: partition_value
+                        .data_type()
+                        .primitive_width()
+                        .map(|w| stats.num_rows.multiply(&Precision::Exact(w)))
+                        .unwrap_or_else(|| Precision::Absent),
+                };
+                stats.column_statistics.push(col_stats);
+            }
+            self.statistics = Some(Arc::new(stats));
+        }
         self
     }
 
@@ -413,6 +480,7 @@ pub fn generate_test_files(num_files: usize, overlap_factor: f64) -> Vec<FileGro
                     min_value: Precision::Exact(ScalarValue::Float64(Some(min))),
                     sum_value: Precision::Absent,
                     distinct_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 }],
             })),
             extensions: None,
@@ -539,6 +607,70 @@ mod tests {
         sut.get_store(url.as_ref()).unwrap();
     }
 
+    #[test]
+    fn test_with_statistics_appends_partition_column_stats() {
+        use crate::PartitionedFile;
+        use datafusion_common::stats::Precision;
+        use datafusion_common::{ColumnStatistics, ScalarValue, Statistics};
+
+        // Create a PartitionedFile with partition values
+        let mut pf = PartitionedFile::new(
+            "test.parquet",
+            100, // file size
+        );
+        pf.partition_values = vec![
+            ScalarValue::Date32(Some(20148)), // 2025-03-01
+        ];
+
+        // Create file-only statistics (1 column for 'id')
+        let file_stats = Arc::new(Statistics {
+            num_rows: Precision::Exact(2),
+            total_byte_size: Precision::Exact(16),
+            column_statistics: vec![ColumnStatistics {
+                null_count: Precision::Exact(0),
+                max_value: Precision::Exact(ScalarValue::Int32(Some(4))),
+                min_value: Precision::Exact(ScalarValue::Int32(Some(3))),
+                sum_value: Precision::Absent,
+                distinct_count: Precision::Absent,
+                byte_size: Precision::Absent,
+            }],
+        });
+
+        // Call with_statistics - should append partition column stats
+        let pf = pf.with_statistics(file_stats);
+
+        // Verify the statistics now have 2 columns
+        let stats = pf.statistics.unwrap();
+        assert_eq!(
+            stats.column_statistics.len(),
+            2,
+            "Expected 2 columns (id + date partition)"
+        );
+
+        // Verify partition column statistics
+        let partition_col_stats = &stats.column_statistics[1];
+        assert_eq!(
+            partition_col_stats.null_count,
+            Precision::Exact(0),
+            "Partition column null_count should be Exact(0)"
+        );
+        assert_eq!(
+            partition_col_stats.min_value,
+            Precision::Exact(ScalarValue::Date32(Some(20148))),
+            "Partition column min should match partition value"
+        );
+        assert_eq!(
+            partition_col_stats.max_value,
+            Precision::Exact(ScalarValue::Date32(Some(20148))),
+            "Partition column max should match partition value"
+        );
+        assert_eq!(
+            partition_col_stats.distinct_count,
+            Precision::Exact(1),
+            "Partition column distinct_count should be Exact(1)"
+        );
+    }
+
     #[test]
     fn test_url_contains() {
         let url = ListingTableUrl::parse("file:///var/data/mytable/").unwrap();
@@ -557,12 +689,13 @@ mod tests {
 
         // as per documentation, when `ignore_subdirectory` is true, we should ignore files that aren't
         // a direct child of the `url`
-        assert!(url
-            .contains(
+        assert!(
+            url.contains(
                 &Path::parse("/var/data/mytable/mysubfolder/data.parquet").unwrap(),
                 true
             )
-            .not());
+            .not()
+        );
 
         // when we set `ignore_subdirectory` to false, we should not ignore the file
         assert!(url.contains(
diff --git a/datafusion/datasource/src/projection.rs b/datafusion/datasource/src/projection.rs
new file mode 100644
index 0000000000000..9a0cb494e495f
--- /dev/null
+++ b/datafusion/datasource/src/projection.rs
@@ -0,0 +1,630 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::datatypes::{Schema, SchemaRef};
+use datafusion_common::{
+    Result, ScalarValue,
+    tree_node::{Transformed, TransformedResult, TreeNode},
+};
+use datafusion_physical_expr::{
+    expressions::{Column, Literal},
+    projection::{ProjectionExpr, ProjectionExprs},
+};
+use futures::{FutureExt, StreamExt};
+use itertools::Itertools;
+
+use crate::{
+    PartitionedFile, TableSchema,
+    file_stream::{FileOpenFuture, FileOpener},
+};
+
+/// A file opener that handles applying a projection on top of an inner opener.
+///
+/// This includes handling partition columns.
+///
+/// Any projection pushed down will be split up into:
+/// - Simple column indices / column selection
+/// - A remainder projection that this opener applies on top of it
+///
+/// This is meant to simplify projection pushdown for sources like CSV
+/// that can only handle "simple" column selection.
+pub struct ProjectionOpener {
+    inner: Arc<dyn FileOpener>,
+    projection: ProjectionExprs,
+    input_schema: SchemaRef,
+    partition_columns: Vec<PartitionColumnIndex>,
+}
+
+impl ProjectionOpener {
+    pub fn try_new(
+        projection: SplitProjection,
+        inner: Arc<dyn FileOpener>,
+        file_schema: &Schema,
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(ProjectionOpener {
+            inner,
+            projection: projection.remapped_projection,
+            input_schema: Arc::new(file_schema.project(&projection.file_indices)?),
+            partition_columns: projection.partition_columns,
+        }))
+    }
+}
+
+impl FileOpener for ProjectionOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        let partition_values = partitioned_file.partition_values.clone();
+        // Modify any references to partition columns in the projection expressions
+        // and substitute them with literal values from PartitionedFile.partition_values
+        let projection = if self.partition_columns.is_empty() {
+            self.projection.clone()
+        } else {
+            inject_partition_columns_into_projection(
+                &self.projection,
+                &self.partition_columns,
+                partition_values,
+            )
+        };
+        let projector = projection.make_projector(&self.input_schema)?;
+
+        let inner = self.inner.open(partitioned_file)?;
+
+        Ok(async move {
+            let stream = inner.await?;
+            let stream = stream.map(move |batch| {
+                let batch = batch?;
+                let batch = projector.project_batch(&batch)?;
+                Ok(batch)
+            });
+            Ok(stream.boxed())
+        }
+        .boxed())
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct PartitionColumnIndex {
+    /// The index of this partition column in the remainder projection (>= num_file_columns)
+    pub in_remainder_projection: usize,
+    /// The index of this partition column in the partition_values array
+    pub in_partition_values: usize,
+}
+
+fn inject_partition_columns_into_projection(
+    projection: &ProjectionExprs,
+    partition_columns: &[PartitionColumnIndex],
+    partition_values: Vec<ScalarValue>,
+) -> ProjectionExprs {
+    // Pre-create all literals for partition columns to avoid cloning ScalarValues multiple times.
+    let partition_literals: Vec<Arc<Literal>> = partition_values
+        .into_iter()
+        .map(|value| Arc::new(Literal::new(value)))
+        .collect();
+
+    let projections = projection
+        .iter()
+        .map(|projection| {
+            let expr = Arc::clone(&projection.expr)
+                .transform(|expr| {
+                    let original_expr = Arc::clone(&expr);
+                    if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                        // Check if this column index corresponds to a partition column
+                        if let Some(pci) = partition_columns
+                            .iter()
+                            .find(|pci| pci.in_remainder_projection == column.index())
+                        {
+                            let literal =
+                                Arc::clone(&partition_literals[pci.in_partition_values]);
+                            return Ok(Transformed::yes(literal));
+                        }
+                    }
+                    Ok(Transformed::no(original_expr))
+                })
+                .data()
+                .expect("infallible transform");
+            ProjectionExpr::new(expr, projection.alias.clone())
+        })
+        .collect_vec();
+    ProjectionExprs::new(projections)
+}
+
+/// At a high level the goal of SplitProjection is to take a ProjectionExprs meant to be applied to the table schema
+/// and split that into:
+/// - The projection indices into the file schema (file_indices)
+/// - The projection indices into the partition values (partition_value_indices), which pre-compute both the index into the table schema
+///   and the index into the partition values array
+/// - A remapped projection that can be applied after the file projection is applied
+///   This remapped projection has the following properties:
+///     - Column indices referring to file columns are remapped to [0..file_indices.len())
+///     - Column indices referring to partition columns are remapped to [file_indices.len()..)
+///
+///   This allows the ProjectionOpener to easily identify which columns in the remapped projection
+///   refer to partition columns and substitute them with literals from the partition values.
+#[derive(Debug, Clone)]
+pub struct SplitProjection {
+    /// The original projection this [`SplitProjection`] was derived from
+    pub source: ProjectionExprs,
+    /// Column indices to read from file (public for file sources)
+    pub file_indices: Vec<usize>,
+    /// Pre-computed partition column mappings (internal, used by ProjectionOpener)
+    pub(crate) partition_columns: Vec<PartitionColumnIndex>,
+    /// The remapped projection (internal, used by ProjectionOpener)
+    pub(crate) remapped_projection: ProjectionExprs,
+}
+
+impl SplitProjection {
+    pub fn unprojected(table_schema: &TableSchema) -> Self {
+        let projection = ProjectionExprs::from_indices(
+            &(0..table_schema.table_schema().fields().len()).collect_vec(),
+            table_schema.table_schema(),
+        );
+        Self::new(table_schema.file_schema(), &projection)
+    }
+
+    /// Creates a new [`SplitProjection`] by splitting a projection into
+    /// simple file column indices and a remainder projection that is applied after reading the file.
+    ///
+    /// In other words: we get a `Vec<usize>` projection that is meant to be applied on top of `file_schema`
+    /// and a remainder projection that is applied to the result of that first projection.
+    ///
+    /// Here `file_schema` is expected to be the *logical* schema of the file, that is the
+    /// table schema minus any partition columns.
+    /// Partition columns are always expected to be at the end of the table schema.
+    /// Note that `file_schema` is *not* the physical schema of the file.
+    pub fn new(logical_file_schema: &Schema, projection: &ProjectionExprs) -> Self {
+        let num_file_schema_columns = logical_file_schema.fields().len();
+
+        // Collect all unique columns and classify as file or partition
+        let mut file_columns = Vec::new();
+        let mut partition_columns = Vec::new();
+        let mut all_columns = std::collections::HashMap::new();
+
+        // Extract all unique column references (index -> name)
+        for proj_expr in projection {
+            proj_expr
+                .expr
+                .apply(|expr| {
+                    if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                        all_columns
+                            .entry(column.index())
+                            .or_insert_with(|| column.name().to_string());
+                    }
+                    Ok(datafusion_common::tree_node::TreeNodeRecursion::Continue)
+                })
+                .expect("infallible apply");
+        }
+
+        // Sort by index and classify into file vs partition columns
+        let mut sorted_columns: Vec<_> = all_columns
+            .into_iter()
+            .map(|(idx, name)| (name, idx))
+            .collect();
+        sorted_columns.sort_by_key(|(_, idx)| *idx);
+
+        // Separate file and partition columns, assigning final indices
+        // Pre-create all remapped columns to avoid duplicate Arc'd expressions
+        let mut column_mapping = std::collections::HashMap::new();
+        let mut file_idx = 0;
+        let mut partition_idx = 0;
+
+        for (name, original_index) in sorted_columns {
+            let new_index = if original_index < num_file_schema_columns {
+                // File column: gets index [0..num_file_columns)
+                file_columns.push(original_index);
+                let idx = file_idx;
+                file_idx += 1;
+                idx
+            } else {
+                // Partition column: gets index [num_file_columns..)
+                partition_columns.push(original_index);
+                let idx = file_idx + partition_idx;
+                partition_idx += 1;
+                idx
+            };
+
+            // Pre-create the remapped column so all references can share the same Arc
+            let new_column: Arc<dyn datafusion_physical_plan::PhysicalExpr> =
+                Arc::new(Column::new(&name, new_index));
+            column_mapping.insert(original_index, new_column);
+        }
+
+        // Single tree transformation: remap all column references using pre-created columns
+        let remapped_projection = projection
+            .iter()
+            .map(|proj_expr| {
+                let expr = Arc::clone(&proj_expr.expr)
+                    .transform(|expr| {
+                        let original_expr = Arc::clone(&expr);
+                        if let Some(column) = expr.as_any().downcast_ref::<Column>()
+                            && let Some(new_column) = column_mapping.get(&column.index())
+                        {
+                            return Ok(Transformed::yes(Arc::clone(new_column)));
+                        }
+                        Ok(Transformed::no(original_expr))
+                    })
+                    .data()
+                    .expect("infallible transform");
+                ProjectionExpr::new(expr, proj_expr.alias.clone())
+            })
+            .collect_vec();
+
+        // Pre-compute partition column mappings for ProjectionOpener
+        let num_file_columns = file_columns.len();
+        let partition_column_mappings = partition_columns
+            .iter()
+            .enumerate()
+            .map(|(partition_idx, &table_index)| PartitionColumnIndex {
+                in_remainder_projection: num_file_columns + partition_idx,
+                in_partition_values: table_index - num_file_schema_columns,
+            })
+            .collect_vec();
+
+        Self {
+            source: projection.clone(),
+            file_indices: file_columns,
+            partition_columns: partition_column_mappings,
+            remapped_projection: ProjectionExprs::from(remapped_projection),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use arrow::array::AsArray;
+    use arrow::datatypes::{DataType, SchemaRef};
+    use datafusion_common::{DFSchema, ScalarValue, record_batch};
+    use datafusion_expr::{Expr, col, execution_props::ExecutionProps};
+    use datafusion_physical_expr::{create_physical_exprs, projection::ProjectionExpr};
+    use itertools::Itertools;
+
+    use super::*;
+
+    fn create_projection_exprs<'a>(
+        exprs: impl IntoIterator<Item = &'a Expr>,
+        schema: &SchemaRef,
+    ) -> ProjectionExprs {
+        let df_schema = DFSchema::try_from(Arc::clone(schema)).unwrap();
+        let physical_exprs =
+            create_physical_exprs(exprs, &df_schema, &ExecutionProps::default()).unwrap();
+        let projection_exprs = physical_exprs
+            .into_iter()
+            .enumerate()
+            .map(|(i, e)| ProjectionExpr::new(Arc::clone(&e), format!("col{i}")))
+            .collect_vec();
+        ProjectionExprs::from(projection_exprs)
+    }
+
+    #[test]
+    fn test_split_projection_with_partition_columns() {
+        use arrow::array::AsArray;
+        use arrow::datatypes::Field;
+        // Simulate the avro_exec_with_partition test scenario:
+        // file_schema has 3 fields
+        let file_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("bool_col", DataType::Boolean, false),
+            Field::new("tinyint_col", DataType::Int8, false),
+        ]));
+
+        // table_schema has 4 fields (3 file + 1 partition)
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("bool_col", DataType::Boolean, false),
+            Field::new("tinyint_col", DataType::Int8, false),
+            Field::new("date", DataType::Utf8, false), // partition column at index 3
+        ]));
+
+        // projection indices: [0, 1, 3, 2]
+        // This should select: id (0), bool_col (1), date (3-partition), tinyint_col (2)
+        let projection_indices = vec![0, 1, 3, 2];
+
+        // Create projection expressions from indices using the table schema
+        let projection =
+            ProjectionExprs::from_indices(&projection_indices, &table_schema);
+
+        // Call SplitProjection to separate file and partition columns
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // The file_indices should be [0, 1, 2] (all file columns needed)
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+
+        // Should have 1 partition column at in_partition_values index 0
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+
+        // Now create a batch with only the file columns
+        let file_batch = record_batch!(
+            ("id", Int32, vec![4]),
+            ("bool_col", Boolean, vec![true]),
+            ("tinyint_col", Int8, vec![0])
+        )
+        .unwrap();
+
+        // After the fix, the remainder projection should have remapped indices:
+        // - File columns: [0, 1, 2] (unchanged since they're already in order)
+        // - Partition column: [3] (stays at index 3, which is >= num_file_columns)
+        // So the remainder expects input columns [0, 1, 2] and references column [3] for partition
+
+        // Verify that we can inject partition columns and apply the projection
+        let partition_values = vec![ScalarValue::from("2021-10-26")];
+
+        // Create partition column mapping
+        let partition_columns = vec![PartitionColumnIndex {
+            in_remainder_projection: 3, // partition column is at index 3 in remainder
+            in_partition_values: 0,     // first partition value
+        }];
+
+        // Inject partition columns (replaces Column(3) with Literal)
+        let injected_projection = inject_partition_columns_into_projection(
+            &split.remapped_projection,
+            &partition_columns,
+            partition_values,
+        );
+
+        // Now the projection should work on the file batch
+        let projector = injected_projection
+            .make_projector(&file_batch.schema())
+            .unwrap();
+        let result = projector.project_batch(&file_batch).unwrap();
+
+        // Verify the output has the correct column order: id, bool_col, date, tinyint_col
+        assert_eq!(result.num_columns(), 4);
+        assert_eq!(
+            result
+                .column(0)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            4
+        );
+        assert!(result.column(1).as_boolean().value(0));
+        assert_eq!(result.column(2).as_string::<i32>().value(0), "2021-10-26");
+        assert_eq!(
+            result
+                .column(3)
+                .as_primitive::<arrow::datatypes::Int8Type>()
+                .value(0),
+            0
+        );
+    }
+
+    // ========================================================================
+    // Comprehensive Test Suite for SplitProjection
+    // ========================================================================
+
+    // Helper to create test schemas with file and partition columns
+    fn create_test_schemas(
+        file_cols: usize,
+        partition_cols: usize,
+    ) -> (SchemaRef, SchemaRef) {
+        use arrow::datatypes::Field;
+
+        let file_fields: Vec<_> = (0..file_cols)
+            .map(|i| Field::new(format!("col_{i}"), DataType::Int32, false))
+            .collect();
+
+        let mut table_fields = file_fields.clone();
+        table_fields.extend(
+            (0..partition_cols)
+                .map(|i| Field::new(format!("part_{i}"), DataType::Utf8, false)),
+        );
+
+        (
+            Arc::new(Schema::new(file_fields)),
+            Arc::new(Schema::new(table_fields)),
+        )
+    }
+
+    // ========================================================================
+    // Partition Column Handling Tests
+    // ========================================================================
+
+    #[test]
+    fn test_split_projection_only_file_columns() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Select only file columns [0, 1, 2]
+        let projection = ProjectionExprs::from_indices(&[0, 1, 2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+        assert_eq!(split.partition_columns.len(), 0);
+    }
+
+    #[test]
+    fn test_split_projection_only_partition_columns() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Select only partition columns [3, 4]
+        let projection = ProjectionExprs::from_indices(&[3, 4], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 2);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+    }
+
+    #[test]
+    fn test_split_projection_multiple_partition_columns() {
+        let (file_schema, table_schema) = create_test_schemas(2, 3);
+        // File cols: 0, 1; Partition cols: 2, 3, 4
+        // Select: [0, 2, 4, 1, 3] (mixed file and partition)
+        let projection = ProjectionExprs::from_indices(&[0, 2, 4, 1, 3], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1]);
+        assert_eq!(split.partition_columns.len(), 3);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+        assert_eq!(split.partition_columns[2].in_partition_values, 2);
+
+        // Verify remapped projection has correct indices
+        // File columns should be at [0, 1], partition columns at [2, 3, 4]
+        assert_eq!(split.remapped_projection.iter().count(), 5);
+    }
+
+    #[test]
+    fn test_split_projection_partition_columns_reverse_order() {
+        let (file_schema, table_schema) = create_test_schemas(2, 2);
+        // File cols: 0, 1; Partition cols: 2, 3
+        // Select: [3, 2] (partitions in reverse)
+        let projection = ProjectionExprs::from_indices(&[3, 2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 2);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+    }
+
+    #[test]
+    fn test_split_projection_interleaved_file_and_partition() {
+        let (file_schema, table_schema) = create_test_schemas(3, 3);
+        // File cols: 0, 1, 2; Partition cols: 3, 4, 5
+        // Select: [0, 3, 1, 4, 2, 5] (alternating)
+        let projection =
+            ProjectionExprs::from_indices(&[0, 3, 1, 4, 2, 5], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+        assert_eq!(split.partition_columns.len(), 3);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+        assert_eq!(split.partition_columns[2].in_partition_values, 2);
+    }
+
+    #[test]
+    fn test_split_projection_expression_with_file_and_partition_columns() {
+        use arrow::datatypes::Field;
+
+        // Create schemas: 2 file columns, 1 partition column
+        let file_schema = Arc::new(Schema::new(vec![
+            Field::new("file_a", DataType::Int32, false),
+            Field::new("file_b", DataType::Int32, false),
+        ]));
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("file_a", DataType::Int32, false),
+            Field::new("file_b", DataType::Int32, false),
+            Field::new("part_c", DataType::Int32, false),
+        ]));
+
+        // Create expression: file_a + part_c
+        let exprs = [col("file_a") + col("part_c")];
+        let projection = create_projection_exprs(exprs.iter(), &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // Should extract both columns
+        assert_eq!(split.file_indices, vec![0]);
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+    }
+
+    // ========================================================================
+    // Category 4: Boundary Conditions
+    // ========================================================================
+
+    #[test]
+    fn test_split_projection_boundary_last_file_column() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Last file column is index 2
+        let projection = ProjectionExprs::from_indices(&[2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![2]);
+        assert_eq!(split.partition_columns.len(), 0);
+    }
+
+    #[test]
+    fn test_split_projection_boundary_first_partition_column() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // First partition column is index 3
+        let projection = ProjectionExprs::from_indices(&[3], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+    }
+
+    // ========================================================================
+    // Category 6: Integration Tests
+    // ========================================================================
+
+    #[test]
+    fn test_inject_partition_columns_multiple_partitions() {
+        let data =
+            record_batch!(("col_0", Int32, vec![1]), ("col_1", Int32, vec![2])).unwrap();
+
+        // Create projection that references file columns and partition columns
+        let (file_schema, table_schema) = create_test_schemas(2, 2);
+        // Projection: [0, 2, 1, 3] = [file_0, part_0, file_1, part_1]
+        let projection = ProjectionExprs::from_indices(&[0, 2, 1, 3], &table_schema);
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // Create partition column mappings
+        let partition_columns = vec![
+            PartitionColumnIndex {
+                in_remainder_projection: 2, // First partition column at index 2
+                in_partition_values: 0,
+            },
+            PartitionColumnIndex {
+                in_remainder_projection: 3, // Second partition column at index 3
+                in_partition_values: 1,
+            },
+        ];
+
+        let partition_values =
+            vec![ScalarValue::from("part_a"), ScalarValue::from("part_b")];
+
+        let injected = inject_partition_columns_into_projection(
+            &split.remapped_projection,
+            &partition_columns,
+            partition_values,
+        );
+
+        // Apply projection
+        let projector = injected.make_projector(&data.schema()).unwrap();
+        let result = projector.project_batch(&data).unwrap();
+
+        assert_eq!(result.num_columns(), 4);
+        assert_eq!(
+            result
+                .column(0)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            1
+        );
+        assert_eq!(result.column(1).as_string::<i32>().value(0), "part_a");
+        assert_eq!(
+            result
+                .column(2)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            2
+        );
+        assert_eq!(result.column(3).as_string::<i32>().value(0), "part_b");
+    }
+}
diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs
index 4c7b37113d58d..3d0b06954e085 100644
--- a/datafusion/datasource/src/schema_adapter.rs
+++ b/datafusion/datasource/src/schema_adapter.rs
@@ -15,49 +15,47 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`SchemaAdapter`] and [`SchemaAdapterFactory`] to adapt file-level record batches to a table schema.
+//! Deprecated: [`SchemaAdapter`] and [`SchemaAdapterFactory`] have been removed.
 //!
-//! Adapter provides a method of translating the RecordBatches that come out of the
-//! physical format into how they should be used by DataFusion.  For instance, a schema
-//! can be stored external to a parquet file that maps parquet logical types to arrow types.
-use arrow::{
-    array::{new_null_array, ArrayRef, RecordBatch, RecordBatchOptions},
-    compute::can_cast_types,
-    datatypes::{DataType, Field, Schema, SchemaRef},
-};
-use datafusion_common::{
-    format::DEFAULT_CAST_OPTIONS,
-    nested_struct::{cast_column, validate_struct_compatibility},
-    plan_err, ColumnStatistics,
-};
-use std::{fmt::Debug, sync::Arc};
-/// Function used by [`SchemaMapping`] to adapt a column from the file schema to
-/// the table schema.
-pub type CastColumnFn = dyn Fn(
-        &ArrayRef,
-        &Field,
-        &arrow::compute::CastOptions,
-    ) -> datafusion_common::Result<ArrayRef>
+//! Use [`PhysicalExprAdapterFactory`] instead. See `upgrading.md` for more details.
+//!
+//! [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+
+#![allow(deprecated)]
+
+use arrow::array::{ArrayRef, RecordBatch};
+use arrow::datatypes::{Field, Schema, SchemaRef};
+use datafusion_common::{ColumnStatistics, Result, not_impl_err};
+use log::warn;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+/// Deprecated: Function type for casting columns.
+///
+/// This type has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
+///
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "SchemaAdapter has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
+pub type CastColumnFn = dyn Fn(&ArrayRef, &Field, &arrow::compute::CastOptions) -> Result<ArrayRef>
     + Send
     + Sync;
 
-/// Factory for creating [`SchemaAdapter`]
+/// Deprecated: Factory for creating [`SchemaAdapter`].
 ///
-/// This interface provides a way to implement custom schema adaptation logic
-/// for DataSourceExec (for example, to fill missing columns with default value
-/// other than null).
+/// This trait has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
 ///
-/// Most users should use [`DefaultSchemaAdapterFactory`]. See that struct for
-/// more details and examples.
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "SchemaAdapter has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
 pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     /// Create a [`SchemaAdapter`]
-    ///
-    /// Arguments:
-    ///
-    /// * `projected_table_schema`: The schema for the table, projected to
-    ///   include only the fields being output (projected) by the this mapping.
-    ///
-    /// * `table_schema`: The entire table schema for the table
     fn create(
         &self,
         projected_table_schema: SchemaRef,
@@ -65,9 +63,6 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     ) -> Box<dyn SchemaAdapter>;
 
     /// Create a [`SchemaAdapter`] using only the projected table schema.
-    ///
-    /// This is a convenience method for cases where the table schema and the
-    /// projected table schema are the same.
     fn create_with_projected_schema(
         &self,
         projected_table_schema: SchemaRef,
@@ -76,971 +71,142 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     }
 }
 
-/// Creates [`SchemaMapper`]s to map file-level [`RecordBatch`]es to a table
-/// schema, which may have a schema obtained from merging multiple file-level
-/// schemas.
+/// Deprecated: Creates [`SchemaMapper`]s to map file-level [`RecordBatch`]es to a table schema.
 ///
-/// This is useful for implementing schema evolution in partitioned datasets.
+/// This trait has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
 ///
-/// See [`DefaultSchemaAdapterFactory`] for more details and examples.
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "SchemaAdapter has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
 pub trait SchemaAdapter: Send + Sync {
-    /// Map a column index in the table schema to a column index in a particular
-    /// file schema
-    ///
-    /// This is used while reading a file to push down projections by mapping
-    /// projected column indexes from the table schema to the file schema
-    ///
-    /// Panics if index is not in range for the table schema
+    /// Map a column index in the table schema to a column index in a particular file schema.
     fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize>;
 
-    /// Creates a mapping for casting columns from the file schema to the table
-    /// schema.
-    ///
-    /// This is used after reading a record batch. The returned [`SchemaMapper`]:
-    ///
-    /// 1. Maps columns to the expected columns indexes
-    /// 2. Handles missing values (e.g. fills nulls or a default value) for
-    ///    columns in the in the table schema not in the file schema
-    /// 2. Handles different types: if the column in the file schema has a
-    ///    different type than `table_schema`, the mapper will resolve this
-    ///    difference (e.g. by casting to the appropriate type)
-    ///
-    /// Returns:
-    /// * a [`SchemaMapper`]
-    /// * an ordered list of columns to project from the file
+    /// Creates a mapping for casting columns from the file schema to the table schema.
     fn map_schema(
         &self,
         file_schema: &Schema,
-    ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)>;
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)>;
 }
 
-/// Maps, columns from a specific file schema to the table schema.
+/// Deprecated: Maps columns from a specific file schema to the table schema.
+///
+/// This trait has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
 ///
-/// See [`DefaultSchemaAdapterFactory`] for more details and examples.
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "SchemaMapper has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
 pub trait SchemaMapper: Debug + Send + Sync {
-    /// Adapts a `RecordBatch` to match the `table_schema`
-    fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch>;
+    /// Adapts a `RecordBatch` to match the `table_schema`.
+    fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch>;
 
-    /// Adapts file-level column `Statistics` to match the `table_schema`
+    /// Adapts file-level column `Statistics` to match the `table_schema`.
     fn map_column_statistics(
         &self,
         file_col_statistics: &[ColumnStatistics],
-    ) -> datafusion_common::Result<Vec<ColumnStatistics>>;
+    ) -> Result<Vec<ColumnStatistics>>;
 }
 
-/// Default  [`SchemaAdapterFactory`] for mapping schemas.
-///
-/// This can be used to adapt file-level record batches to a table schema and
-/// implement schema evolution.
-///
-/// Given an input file schema and a table schema, this factory returns
-/// [`SchemaAdapter`] that return [`SchemaMapper`]s that:
-///
-/// 1. Reorder columns
-/// 2. Cast columns to the correct type
-/// 3. Fill missing columns with nulls
-///
-/// # Errors:
-///
-/// * If a column in the table schema is non-nullable but is not present in the
-///   file schema (i.e. it is missing), the returned mapper tries to fill it with
-///   nulls resulting in a schema error.
-///
-/// # Illustration of Schema Mapping
-///
-/// ```text
-/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─                  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
-///  ┌───────┐   ┌───────┐ │                  ┌───────┐   ┌───────┐   ┌───────┐ │
-/// ││  1.0  │   │ "foo" │                   ││ NULL  │   │ "foo" │   │ "1.0" │
-///  ├───────┤   ├───────┤ │ Schema mapping   ├───────┤   ├───────┤   ├───────┤ │
-/// ││  2.0  │   │ "bar" │                   ││  NULL │   │ "bar" │   │ "2.0" │
-///  └───────┘   └───────┘ │────────────────▶ └───────┘   └───────┘   └───────┘ │
-/// │                                        │
-///  column "c"  column "b"│                  column "a"  column "b"  column "c"│
-/// │ Float64       Utf8                     │  Int32        Utf8        Utf8
-///  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘                  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
-///     Input Record Batch                         Output Record Batch
-///
-///     Schema {                                   Schema {
-///      "c": Float64,                              "a": Int32,
-///      "b": Utf8,                                 "b": Utf8,
-///     }                                           "c": Utf8,
-///                                                }
-/// ```
-///
-/// # Example of using the `DefaultSchemaAdapterFactory` to map [`RecordBatch`]s
-///
-/// Note `SchemaMapping` also supports mapping partial batches, which is used as
-/// part of predicate pushdown.
-///
-/// ```
-/// # use std::sync::Arc;
-/// # use arrow::datatypes::{DataType, Field, Schema};
-/// # use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapterFactory};
-/// # use datafusion_common::record_batch;
-/// // Table has fields "a",  "b" and "c"
-/// let table_schema = Schema::new(vec![
-///     Field::new("a", DataType::Int32, true),
-///     Field::new("b", DataType::Utf8, true),
-///     Field::new("c", DataType::Utf8, true),
-/// ]);
-///
-/// // create an adapter to map the table schema to the file schema
-/// let adapter = DefaultSchemaAdapterFactory::from_schema(Arc::new(table_schema));
+/// Deprecated: Default [`SchemaAdapterFactory`] for mapping schemas.
 ///
-/// // The file schema has fields "c" and "b" but "b" is stored as an 'Float64'
-/// // instead of 'Utf8'
-/// let file_schema = Schema::new(vec![
-///    Field::new("c", DataType::Utf8, true),
-///    Field::new("b", DataType::Float64, true),
-/// ]);
+/// This struct has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
 ///
-/// // Get a mapping from the file schema to the table schema
-/// let (mapper, _indices) = adapter.map_schema(&file_schema).unwrap();
-///
-/// let file_batch = record_batch!(
-///     ("c", Utf8, vec!["foo", "bar"]),
-///     ("b", Float64, vec![1.0, 2.0])
-/// ).unwrap();
-///
-/// let mapped_batch = mapper.map_batch(file_batch).unwrap();
-///
-/// // the mapped batch has the correct schema and the "b" column has been cast to Utf8
-/// let expected_batch = record_batch!(
-///    ("a", Int32, vec![None, None]),  // missing column filled with nulls
-///    ("b", Utf8, vec!["1.0", "2.0"]), // b was cast to string and order was changed
-///    ("c", Utf8, vec!["foo", "bar"])
-/// ).unwrap();
-/// assert_eq!(mapped_batch, expected_batch);
-/// ```
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "DefaultSchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
 #[derive(Clone, Debug, Default)]
 pub struct DefaultSchemaAdapterFactory;
 
-impl DefaultSchemaAdapterFactory {
-    /// Create a new factory for mapping batches from a file schema to a table
-    /// schema.
-    ///
-    /// This is a convenience for [`DefaultSchemaAdapterFactory::create`] with
-    /// the same schema for both the projected table schema and the table
-    /// schema.
-    pub fn from_schema(table_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
-        Self.create(Arc::clone(&table_schema), table_schema)
-    }
-}
-
 impl SchemaAdapterFactory for DefaultSchemaAdapterFactory {
     fn create(
         &self,
         projected_table_schema: SchemaRef,
         _table_schema: SchemaRef,
     ) -> Box<dyn SchemaAdapter> {
-        Box::new(DefaultSchemaAdapter {
-            projected_table_schema,
+        Box::new(DeprecatedSchemaAdapter {
+            _projected_table_schema: projected_table_schema,
         })
     }
 }
 
-/// This SchemaAdapter requires both the table schema and the projected table
-/// schema. See  [`SchemaMapping`] for more details
-#[derive(Clone, Debug)]
-pub(crate) struct DefaultSchemaAdapter {
-    /// The schema for the table, projected to include only the fields being output (projected) by the
-    /// associated ParquetSource
-    projected_table_schema: SchemaRef,
+impl DefaultSchemaAdapterFactory {
+    /// Deprecated: Create a new factory for mapping batches from a file schema to a table schema.
+    #[deprecated(
+        since = "52.0.0",
+        note = "DefaultSchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+    )]
+    pub fn from_schema(table_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+        // Note: this method did not return an error thus the errors are raised from the returned adapter
+        warn!(
+            "DefaultSchemaAdapterFactory::from_schema is deprecated. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+        );
+        Box::new(DeprecatedSchemaAdapter {
+            _projected_table_schema: table_schema,
+        })
+    }
 }
 
-/// Checks if a file field can be cast to a table field
-///
-/// Returns Ok(true) if casting is possible, or an error explaining why casting is not possible
-pub(crate) fn can_cast_field(
-    file_field: &Field,
-    table_field: &Field,
-) -> datafusion_common::Result<bool> {
-    match (file_field.data_type(), table_field.data_type()) {
-        (DataType::Struct(source_fields), DataType::Struct(target_fields)) => {
-            // validate_struct_compatibility returns Result<()>; on success we can cast structs
-            validate_struct_compatibility(source_fields, target_fields)?;
-            Ok(true)
-        }
-        _ => {
-            if can_cast_types(file_field.data_type(), table_field.data_type()) {
-                Ok(true)
-            } else {
-                plan_err!(
-                    "Cannot cast file schema field {} of type {} to table schema field of type {}",
-                    file_field.name(),
-                    file_field.data_type(),
-                    table_field.data_type()
-                )
-            }
-        }
-    }
+/// Internal deprecated adapter that returns errors when methods are called.
+struct DeprecatedSchemaAdapter {
+    _projected_table_schema: SchemaRef,
 }
 
-impl SchemaAdapter for DefaultSchemaAdapter {
-    /// Map a column index in the table schema to a column index in a particular
-    /// file schema
-    ///
-    /// Panics if index is not in range for the table schema
-    fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option<usize> {
-        let field = self.projected_table_schema.field(index);
-        Some(file_schema.fields.find(field.name())?.0)
+impl SchemaAdapter for DeprecatedSchemaAdapter {
+    fn map_column_index(&self, _index: usize, _file_schema: &Schema) -> Option<usize> {
+        None // Safe no-op
     }
 
-    /// Creates a `SchemaMapping` for casting or mapping the columns from the
-    /// file schema to the table schema.
-    ///
-    /// If the provided `file_schema` contains columns of a different type to
-    /// the expected `table_schema`, the method will attempt to cast the array
-    /// data from the file schema to the table schema where possible.
-    ///
-    /// Returns a [`SchemaMapping`] that can be applied to the output batch
-    /// along with an ordered list of columns to project from the file
     fn map_schema(
         &self,
-        file_schema: &Schema,
-    ) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
-        let (field_mappings, projection) = create_field_mapping(
-            file_schema,
-            &self.projected_table_schema,
-            can_cast_field,
-        )?;
-
-        Ok((
-            Arc::new(SchemaMapping::new(
-                Arc::clone(&self.projected_table_schema),
-                field_mappings,
-                Arc::new(
-                    |array: &ArrayRef,
-                     field: &Field,
-                     opts: &arrow::compute::CastOptions| {
-                        cast_column(array, field, opts)
-                    },
-                ),
-            )),
-            projection,
-        ))
-    }
-}
-
-/// Helper function that creates field mappings between file schema and table schema
-///
-/// Maps columns from the file schema to their corresponding positions in the table schema,
-/// applying type compatibility checking via the provided predicate function.
-///
-/// Returns field mappings (for column reordering) and a projection (for field selection).
-pub(crate) fn create_field_mapping<F>(
-    file_schema: &Schema,
-    projected_table_schema: &SchemaRef,
-    can_map_field: F,
-) -> datafusion_common::Result<(Vec<Option<usize>>, Vec<usize>)>
-where
-    F: Fn(&Field, &Field) -> datafusion_common::Result<bool>,
-{
-    let mut projection = Vec::with_capacity(file_schema.fields().len());
-    let mut field_mappings = vec![None; projected_table_schema.fields().len()];
-
-    for (file_idx, file_field) in file_schema.fields.iter().enumerate() {
-        if let Some((table_idx, table_field)) =
-            projected_table_schema.fields().find(file_field.name())
-        {
-            if can_map_field(file_field, table_field)? {
-                field_mappings[table_idx] = Some(projection.len());
-                projection.push(file_idx);
-            }
-        }
+        _file_schema: &Schema,
+    ) -> Result<(Arc<dyn SchemaMapper>, Vec<usize>)> {
+        not_impl_err!(
+            "SchemaAdapter has been removed. Use PhysicalExprAdapterFactory instead. \
+            See upgrading.md for more details."
+        )
     }
-
-    Ok((field_mappings, projection))
 }
 
-/// The SchemaMapping struct holds a mapping from the file schema to the table
-/// schema and any necessary type conversions.
+/// Deprecated: The SchemaMapping struct held a mapping from the file schema to the table schema.
 ///
-/// [`map_batch`] is used by the ParquetOpener to produce a RecordBatch which
-/// has the projected schema, since that's the schema which is supposed to come
-/// out of the execution of this query. Thus `map_batch` uses
-/// `projected_table_schema` as it can only operate on the projected fields.
+/// This struct has been removed. Use [`PhysicalExprAdapterFactory`] instead.
+/// See `upgrading.md` for more details.
 ///
-/// [`map_batch`]: Self::map_batch
+/// [`PhysicalExprAdapterFactory`]: datafusion_physical_expr_adapter::PhysicalExprAdapterFactory
+#[deprecated(
+    since = "52.0.0",
+    note = "SchemaMapping has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
+)]
+#[derive(Debug)]
 pub struct SchemaMapping {
-    /// The schema of the table. This is the expected schema after conversion
-    /// and it should match the schema of the query result.
-    projected_table_schema: SchemaRef,
-    /// Mapping from field index in `projected_table_schema` to index in
-    /// projected file_schema.
-    ///
-    /// They are Options instead of just plain `usize`s because the table could
-    /// have fields that don't exist in the file.
-    field_mappings: Vec<Option<usize>>,
-    /// Function used to adapt a column from the file schema to the table schema
-    /// when it exists in both schemas
-    cast_column: Arc<CastColumnFn>,
-}
-
-impl Debug for SchemaMapping {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("SchemaMapping")
-            .field("projected_table_schema", &self.projected_table_schema)
-            .field("field_mappings", &self.field_mappings)
-            .field("cast_column", &"<fn>")
-            .finish()
-    }
-}
-
-impl SchemaMapping {
-    /// Creates a new SchemaMapping instance
-    ///
-    /// Initializes the field mappings needed to transform file data to the projected table schema
-    pub fn new(
-        projected_table_schema: SchemaRef,
-        field_mappings: Vec<Option<usize>>,
-        cast_column: Arc<CastColumnFn>,
-    ) -> Self {
-        Self {
-            projected_table_schema,
-            field_mappings,
-            cast_column,
-        }
-    }
+    // Private fields removed - this is a skeleton for deprecation purposes only
+    _private: (),
 }
 
 impl SchemaMapper for SchemaMapping {
-    /// Adapts a `RecordBatch` to match the `projected_table_schema` using the stored mapping and
-    /// conversions.
-    /// The produced RecordBatch has a schema that contains only the projected columns.
-    fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch> {
-        let (_old_schema, batch_cols, batch_rows) = batch.into_parts();
-
-        let cols = self
-            .projected_table_schema
-            // go through each field in the projected schema
-            .fields()
-            .iter()
-            // and zip it with the index that maps fields from the projected table schema to the
-            // projected file schema in `batch`
-            .zip(&self.field_mappings)
-            // and for each one...
-            .map(|(field, file_idx)| {
-                file_idx.map_or_else(
-                    // If this field only exists in the table, and not in the file, then we know
-                    // that it's null, so just return that.
-                    || Ok(new_null_array(field.data_type(), batch_rows)),
-                    // However, if it does exist in both, use the cast_column function
-                    // to perform any necessary conversions
-                    |batch_idx| {
-                        (self.cast_column)(
-                            &batch_cols[batch_idx],
-                            field,
-                            &DEFAULT_CAST_OPTIONS,
-                        )
-                    },
-                )
-            })
-            .collect::<datafusion_common::Result<Vec<_>, _>>()?;
-
-        // Necessary to handle empty batches
-        let options = RecordBatchOptions::new().with_row_count(Some(batch_rows));
-
-        let schema = Arc::clone(&self.projected_table_schema);
-        let record_batch = RecordBatch::try_new_with_options(schema, cols, &options)?;
-        Ok(record_batch)
+    fn map_batch(&self, _batch: RecordBatch) -> Result<RecordBatch> {
+        not_impl_err!(
+            "SchemaMapping has been removed. Use PhysicalExprAdapterFactory instead. \
+            See upgrading.md for more details."
+        )
     }
 
-    /// Adapts file-level column `Statistics` to match the `table_schema`
     fn map_column_statistics(
         &self,
-        file_col_statistics: &[ColumnStatistics],
-    ) -> datafusion_common::Result<Vec<ColumnStatistics>> {
-        let mut table_col_statistics = vec![];
-
-        // Map the statistics for each field in the file schema to the corresponding field in the
-        // table schema, if a field is not present in the file schema, we need to fill it with `ColumnStatistics::new_unknown`
-        for (_, file_col_idx) in self
-            .projected_table_schema
-            .fields()
-            .iter()
-            .zip(&self.field_mappings)
-        {
-            if let Some(file_col_idx) = file_col_idx {
-                table_col_statistics.push(
-                    file_col_statistics
-                        .get(*file_col_idx)
-                        .cloned()
-                        .unwrap_or_default(),
-                );
-            } else {
-                table_col_statistics.push(ColumnStatistics::new_unknown());
-            }
-        }
-
-        Ok(table_col_statistics)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::{
-        array::{Array, ArrayRef, StringBuilder, StructArray, TimestampMillisecondArray},
-        compute::cast,
-        datatypes::{DataType, Field, TimeUnit},
-        record_batch::RecordBatch,
-    };
-    use datafusion_common::{stats::Precision, Result, ScalarValue, Statistics};
-
-    #[test]
-    fn test_schema_mapping_map_statistics_basic() {
-        // Create table schema (a, b, c)
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-            Field::new("c", DataType::Float64, true),
-        ]));
-
-        // Create file schema (b, a) - different order, missing c
-        let file_schema = Schema::new(vec![
-            Field::new("b", DataType::Utf8, true),
-            Field::new("a", DataType::Int32, true),
-        ]);
-
-        // Create SchemaAdapter
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-
-        // Get mapper and projection
-        let (mapper, projection) = adapter.map_schema(&file_schema).unwrap();
-
-        // Should project columns 0,1 from file
-        assert_eq!(projection, vec![0, 1]);
-
-        // Create file statistics
-        let mut file_stats = Statistics::default();
-
-        // Statistics for column b (index 0 in file)
-        let b_stats = ColumnStatistics {
-            null_count: Precision::Exact(5),
-            ..Default::default()
-        };
-
-        // Statistics for column a (index 1 in file)
-        let a_stats = ColumnStatistics {
-            null_count: Precision::Exact(10),
-            ..Default::default()
-        };
-
-        file_stats.column_statistics = vec![b_stats, a_stats];
-
-        // Map statistics
-        let table_col_stats = mapper
-            .map_column_statistics(&file_stats.column_statistics)
-            .unwrap();
-
-        // Verify stats
-        assert_eq!(table_col_stats.len(), 3);
-        assert_eq!(table_col_stats[0].null_count, Precision::Exact(10)); // a from file idx 1
-        assert_eq!(table_col_stats[1].null_count, Precision::Exact(5)); // b from file idx 0
-        assert_eq!(table_col_stats[2].null_count, Precision::Absent); // c (unknown)
-    }
-
-    #[test]
-    fn test_schema_mapping_map_statistics_empty() {
-        // Create schemas
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-        ]));
-        let file_schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-        ]);
-
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-        let (mapper, _) = adapter.map_schema(&file_schema).unwrap();
-
-        // Empty file statistics
-        let file_stats = Statistics::default();
-        let table_col_stats = mapper
-            .map_column_statistics(&file_stats.column_statistics)
-            .unwrap();
-
-        // All stats should be unknown
-        assert_eq!(table_col_stats.len(), 2);
-        assert_eq!(table_col_stats[0], ColumnStatistics::new_unknown(),);
-        assert_eq!(table_col_stats[1], ColumnStatistics::new_unknown(),);
-    }
-
-    #[test]
-    fn test_can_cast_field() {
-        // Same type should work
-        let from_field = Field::new("col", DataType::Int32, true);
-        let to_field = Field::new("col", DataType::Int32, true);
-        assert!(can_cast_field(&from_field, &to_field).unwrap());
-
-        // Casting Int32 to Float64 is allowed
-        let from_field = Field::new("col", DataType::Int32, true);
-        let to_field = Field::new("col", DataType::Float64, true);
-        assert!(can_cast_field(&from_field, &to_field).unwrap());
-
-        // Casting Float64 to Utf8 should work (converts to string)
-        let from_field = Field::new("col", DataType::Float64, true);
-        let to_field = Field::new("col", DataType::Utf8, true);
-        assert!(can_cast_field(&from_field, &to_field).unwrap());
-
-        // Binary to Utf8 is not supported - this is an example of a cast that should fail
-        // Note: We use Binary instead of Utf8->Int32 because Arrow actually supports that cast
-        let from_field = Field::new("col", DataType::Binary, true);
-        let to_field = Field::new("col", DataType::Decimal128(10, 2), true);
-        let result = can_cast_field(&from_field, &to_field);
-        assert!(result.is_err());
-        let error_msg = result.unwrap_err().to_string();
-        assert!(error_msg.contains("Cannot cast file schema field col"));
-    }
-
-    #[test]
-    fn test_create_field_mapping() {
-        // Define the table schema
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-            Field::new("c", DataType::Float64, true),
-        ]));
-
-        // Define file schema: different order, missing column c, and b has different type
-        let file_schema = Schema::new(vec![
-            Field::new("b", DataType::Float64, true), // Different type but castable to Utf8
-            Field::new("a", DataType::Int32, true),   // Same type
-            Field::new("d", DataType::Boolean, true), // Not in table schema
-        ]);
-
-        // Custom can_map_field function that allows all mappings for testing
-        let allow_all = |_: &Field, _: &Field| Ok(true);
-
-        // Test field mapping
-        let (field_mappings, projection) =
-            create_field_mapping(&file_schema, &table_schema, allow_all).unwrap();
-
-        // Expected:
-        // - field_mappings[0] (a) maps to projection[1]
-        // - field_mappings[1] (b) maps to projection[0]
-        // - field_mappings[2] (c) is None (not in file)
-        assert_eq!(field_mappings, vec![Some(1), Some(0), None]);
-        assert_eq!(projection, vec![0, 1]); // Projecting file columns b, a
-
-        // Test with a failing mapper
-        let fails_all = |_: &Field, _: &Field| Ok(false);
-        let (field_mappings, projection) =
-            create_field_mapping(&file_schema, &table_schema, fails_all).unwrap();
-
-        // Should have no mappings or projections if all cast checks fail
-        assert_eq!(field_mappings, vec![None, None, None]);
-        assert_eq!(projection, Vec::<usize>::new());
-
-        // Test with error-producing mapper
-        let error_mapper = |_: &Field, _: &Field| plan_err!("Test error");
-        let result = create_field_mapping(&file_schema, &table_schema, error_mapper);
-        assert!(result.is_err());
-        assert!(result.unwrap_err().to_string().contains("Test error"));
-    }
-
-    #[test]
-    fn test_schema_mapping_new() {
-        // Define the projected table schema
-        let projected_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-        ]));
-
-        // Define field mappings from table to file
-        let field_mappings = vec![Some(1), Some(0)];
-
-        // Create SchemaMapping manually
-        let mapping = SchemaMapping::new(
-            Arc::clone(&projected_schema),
-            field_mappings.clone(),
-            Arc::new(
-                |array: &ArrayRef, field: &Field, opts: &arrow::compute::CastOptions| {
-                    cast_column(array, field, opts)
-                },
-            ),
-        );
-
-        // Check that fields were set correctly
-        assert_eq!(*mapping.projected_table_schema, *projected_schema);
-        assert_eq!(mapping.field_mappings, field_mappings);
-
-        // Test with a batch to ensure it works properly
-        let batch = RecordBatch::try_new(
-            Arc::new(Schema::new(vec![
-                Field::new("b_file", DataType::Utf8, true),
-                Field::new("a_file", DataType::Int32, true),
-            ])),
-            vec![
-                Arc::new(arrow::array::StringArray::from(vec!["hello", "world"])),
-                Arc::new(arrow::array::Int32Array::from(vec![1, 2])),
-            ],
-        )
-        .unwrap();
-
-        // Test that map_batch works with our manually created mapping
-        let mapped_batch = mapping.map_batch(batch).unwrap();
-
-        // Verify the mapped batch has the correct schema and data
-        assert_eq!(*mapped_batch.schema(), *projected_schema);
-        assert_eq!(mapped_batch.num_columns(), 2);
-        assert_eq!(mapped_batch.column(0).len(), 2); // a column
-        assert_eq!(mapped_batch.column(1).len(), 2); // b column
-    }
-
-    #[test]
-    fn test_map_schema_error_path() {
-        // Define the table schema
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-            Field::new("c", DataType::Decimal128(10, 2), true), // Use Decimal which has stricter cast rules
-        ]));
-
-        // Define file schema with incompatible type for column c
-        let file_schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Float64, true), // Different but castable
-            Field::new("c", DataType::Binary, true),  // Not castable to Decimal128
-        ]);
-
-        // Create DefaultSchemaAdapter
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-
-        // map_schema should error due to incompatible types
-        let result = adapter.map_schema(&file_schema);
-        assert!(result.is_err());
-        let error_msg = result.unwrap_err().to_string();
-        assert!(error_msg.contains("Cannot cast file schema field c"));
-    }
-
-    #[test]
-    fn test_map_schema_happy_path() {
-        // Define the table schema
-        let table_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Utf8, true),
-            Field::new("c", DataType::Decimal128(10, 2), true),
-        ]));
-
-        // Create DefaultSchemaAdapter
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-
-        // Define compatible file schema (missing column c)
-        let compatible_file_schema = Schema::new(vec![
-            Field::new("a", DataType::Int64, true), // Can be cast to Int32
-            Field::new("b", DataType::Float64, true), // Can be cast to Utf8
-        ]);
-
-        // Test successful schema mapping
-        let (mapper, projection) = adapter.map_schema(&compatible_file_schema).unwrap();
-
-        // Verify field_mappings and projection created correctly
-        assert_eq!(projection, vec![0, 1]); // Projecting a and b
-
-        // Verify the SchemaMapping works with actual data
-        let file_batch = RecordBatch::try_new(
-            Arc::new(compatible_file_schema.clone()),
-            vec![
-                Arc::new(arrow::array::Int64Array::from(vec![100, 200])),
-                Arc::new(arrow::array::Float64Array::from(vec![1.5, 2.5])),
-            ],
+        _file_col_statistics: &[ColumnStatistics],
+    ) -> Result<Vec<ColumnStatistics>> {
+        not_impl_err!(
+            "SchemaMapping has been removed. Use PhysicalExprAdapterFactory instead. \
+            See upgrading.md for more details."
         )
-        .unwrap();
-
-        let mapped_batch = mapper.map_batch(file_batch).unwrap();
-
-        // Verify correct schema mapping
-        assert_eq!(*mapped_batch.schema(), *table_schema);
-        assert_eq!(mapped_batch.num_columns(), 3); // a, b, c
-
-        // Column c should be null since it wasn't in the file schema
-        let c_array = mapped_batch.column(2);
-        assert_eq!(c_array.len(), 2);
-        assert_eq!(c_array.null_count(), 2);
-    }
-
-    #[test]
-    fn test_adapt_struct_with_added_nested_fields() -> Result<()> {
-        let (file_schema, table_schema) = create_test_schemas_with_nested_fields();
-        let batch = create_test_batch_with_struct_data(&file_schema)?;
-
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-        let (mapper, _) = adapter.map_schema(file_schema.as_ref())?;
-        let mapped_batch = mapper.map_batch(batch)?;
-
-        verify_adapted_batch_with_nested_fields(&mapped_batch, &table_schema)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_map_column_statistics_struct() -> Result<()> {
-        let (file_schema, table_schema) = create_test_schemas_with_nested_fields();
-
-        let adapter = DefaultSchemaAdapter {
-            projected_table_schema: Arc::clone(&table_schema),
-        };
-        let (mapper, _) = adapter.map_schema(file_schema.as_ref())?;
-
-        let file_stats = vec![
-            create_test_column_statistics(
-                0,
-                100,
-                Some(ScalarValue::Int32(Some(1))),
-                Some(ScalarValue::Int32(Some(100))),
-                Some(ScalarValue::Int32(Some(5100))),
-            ),
-            create_test_column_statistics(10, 50, None, None, None),
-        ];
-
-        let table_stats = mapper.map_column_statistics(&file_stats)?;
-        assert_eq!(table_stats.len(), 1);
-        verify_column_statistics(
-            &table_stats[0],
-            Some(0),
-            Some(100),
-            Some(ScalarValue::Int32(Some(1))),
-            Some(ScalarValue::Int32(Some(100))),
-            Some(ScalarValue::Int32(Some(5100))),
-        );
-        let missing_stats = mapper.map_column_statistics(&[])?;
-        assert_eq!(missing_stats.len(), 1);
-        assert_eq!(missing_stats[0], ColumnStatistics::new_unknown());
-        Ok(())
-    }
-
-    fn create_test_schemas_with_nested_fields() -> (SchemaRef, SchemaRef) {
-        let file_schema = Arc::new(Schema::new(vec![Field::new(
-            "info",
-            DataType::Struct(
-                vec![
-                    Field::new("location", DataType::Utf8, true),
-                    Field::new(
-                        "timestamp_utc",
-                        DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())),
-                        true,
-                    ),
-                ]
-                .into(),
-            ),
-            true,
-        )]));
-
-        let table_schema = Arc::new(Schema::new(vec![Field::new(
-            "info",
-            DataType::Struct(
-                vec![
-                    Field::new("location", DataType::Utf8, true),
-                    Field::new(
-                        "timestamp_utc",
-                        DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())),
-                        true,
-                    ),
-                    Field::new(
-                        "reason",
-                        DataType::Struct(
-                            vec![
-                                Field::new("_level", DataType::Float64, true),
-                                Field::new(
-                                    "details",
-                                    DataType::Struct(
-                                        vec![
-                                            Field::new("rurl", DataType::Utf8, true),
-                                            Field::new("s", DataType::Float64, true),
-                                            Field::new("t", DataType::Utf8, true),
-                                        ]
-                                        .into(),
-                                    ),
-                                    true,
-                                ),
-                            ]
-                            .into(),
-                        ),
-                        true,
-                    ),
-                ]
-                .into(),
-            ),
-            true,
-        )]));
-
-        (file_schema, table_schema)
-    }
-
-    fn create_test_batch_with_struct_data(
-        file_schema: &SchemaRef,
-    ) -> Result<RecordBatch> {
-        let mut location_builder = StringBuilder::new();
-        location_builder.append_value("San Francisco");
-        location_builder.append_value("New York");
-
-        let timestamp_array = TimestampMillisecondArray::from(vec![
-            Some(1640995200000),
-            Some(1641081600000),
-        ]);
-
-        let timestamp_type =
-            DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into()));
-        let timestamp_array = cast(&timestamp_array, &timestamp_type)?;
-
-        let info_struct = StructArray::from(vec![
-            (
-                Arc::new(Field::new("location", DataType::Utf8, true)),
-                Arc::new(location_builder.finish()) as ArrayRef,
-            ),
-            (
-                Arc::new(Field::new("timestamp_utc", timestamp_type, true)),
-                timestamp_array,
-            ),
-        ]);
-
-        Ok(RecordBatch::try_new(
-            Arc::clone(file_schema),
-            vec![Arc::new(info_struct)],
-        )?)
-    }
-
-    fn verify_adapted_batch_with_nested_fields(
-        mapped_batch: &RecordBatch,
-        table_schema: &SchemaRef,
-    ) -> Result<()> {
-        assert_eq!(mapped_batch.schema(), *table_schema);
-        assert_eq!(mapped_batch.num_rows(), 2);
-
-        let info_col = mapped_batch.column(0);
-        let info_array = info_col
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .expect("Expected info column to be a StructArray");
-
-        verify_preserved_fields(info_array)?;
-        verify_reason_field_structure(info_array)?;
-        Ok(())
-    }
-
-    fn verify_preserved_fields(info_array: &StructArray) -> Result<()> {
-        let location_col = info_array
-            .column_by_name("location")
-            .expect("Expected location field in struct");
-        let location_array = location_col
-            .as_any()
-            .downcast_ref::<arrow::array::StringArray>()
-            .expect("Expected location to be a StringArray");
-        assert_eq!(location_array.value(0), "San Francisco");
-        assert_eq!(location_array.value(1), "New York");
-
-        let timestamp_col = info_array
-            .column_by_name("timestamp_utc")
-            .expect("Expected timestamp_utc field in struct");
-        let timestamp_array = timestamp_col
-            .as_any()
-            .downcast_ref::<TimestampMillisecondArray>()
-            .expect("Expected timestamp_utc to be a TimestampMillisecondArray");
-        assert_eq!(timestamp_array.value(0), 1640995200000);
-        assert_eq!(timestamp_array.value(1), 1641081600000);
-        Ok(())
-    }
-
-    fn verify_reason_field_structure(info_array: &StructArray) -> Result<()> {
-        let reason_col = info_array
-            .column_by_name("reason")
-            .expect("Expected reason field in struct");
-        let reason_array = reason_col
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .expect("Expected reason to be a StructArray");
-        assert_eq!(reason_array.fields().len(), 2);
-        assert!(reason_array.column_by_name("_level").is_some());
-        assert!(reason_array.column_by_name("details").is_some());
-
-        let details_col = reason_array
-            .column_by_name("details")
-            .expect("Expected details field in reason struct");
-        let details_array = details_col
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .expect("Expected details to be a StructArray");
-        assert_eq!(details_array.fields().len(), 3);
-        assert!(details_array.column_by_name("rurl").is_some());
-        assert!(details_array.column_by_name("s").is_some());
-        assert!(details_array.column_by_name("t").is_some());
-        for i in 0..2 {
-            assert!(reason_array.is_null(i), "reason field should be null");
-        }
-        Ok(())
-    }
-
-    fn verify_column_statistics(
-        stats: &ColumnStatistics,
-        expected_null_count: Option<usize>,
-        expected_distinct_count: Option<usize>,
-        expected_min: Option<ScalarValue>,
-        expected_max: Option<ScalarValue>,
-        expected_sum: Option<ScalarValue>,
-    ) {
-        if let Some(count) = expected_null_count {
-            assert_eq!(
-                stats.null_count,
-                Precision::Exact(count),
-                "Null count should match expected value"
-            );
-        }
-        if let Some(count) = expected_distinct_count {
-            assert_eq!(
-                stats.distinct_count,
-                Precision::Exact(count),
-                "Distinct count should match expected value"
-            );
-        }
-        if let Some(min) = expected_min {
-            assert_eq!(
-                stats.min_value,
-                Precision::Exact(min),
-                "Min value should match expected value"
-            );
-        }
-        if let Some(max) = expected_max {
-            assert_eq!(
-                stats.max_value,
-                Precision::Exact(max),
-                "Max value should match expected value"
-            );
-        }
-        if let Some(sum) = expected_sum {
-            assert_eq!(
-                stats.sum_value,
-                Precision::Exact(sum),
-                "Sum value should match expected value"
-            );
-        }
-    }
-
-    fn create_test_column_statistics(
-        null_count: usize,
-        distinct_count: usize,
-        min_value: Option<ScalarValue>,
-        max_value: Option<ScalarValue>,
-        sum_value: Option<ScalarValue>,
-    ) -> ColumnStatistics {
-        ColumnStatistics {
-            null_count: Precision::Exact(null_count),
-            distinct_count: Precision::Exact(distinct_count),
-            min_value: min_value.map_or_else(|| Precision::Absent, Precision::Exact),
-            max_value: max_value.map_or_else(|| Precision::Absent, Precision::Exact),
-            sum_value: sum_value.map_or_else(|| Precision::Absent, Precision::Exact),
-        }
     }
 }
diff --git a/datafusion/datasource/src/sink.rs b/datafusion/datasource/src/sink.rs
index a8adb46b96ffa..5460a0ffdc3df 100644
--- a/datafusion/datasource/src/sink.rs
+++ b/datafusion/datasource/src/sink.rs
@@ -24,15 +24,15 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, RecordBatch, UInt64Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{Distribution, EquivalenceProperties};
 use datafusion_physical_expr_common::sort_expr::{LexRequirement, OrderingRequirements};
 use datafusion_physical_plan::metrics::MetricsSet;
 use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion_physical_plan::{
-    execute_input_stream, DisplayAs, DisplayFormatType, ExecutionPlan,
-    ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream,
+    DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning,
+    PlanProperties, SendableRecordBatchStream, execute_input_stream,
 };
 
 use async_trait::async_trait;
@@ -226,9 +226,11 @@ impl ExecutionPlan for DataSinkExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if partition != 0 {
-            return internal_err!("DataSinkExec can only be called on partition 0!");
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "DataSinkExec can only be called on partition 0!"
+        );
         let data = execute_input_stream(
             Arc::clone(&self.input),
             Arc::clone(self.sink.schema()),
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index 11a8a3867b809..a3892dfac9778 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -22,12 +22,13 @@ use std::fmt;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
+use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_plan::execution_plan::{
     Boundedness, EmissionType, SchedulingType,
 };
 use datafusion_physical_plan::metrics::SplitMetrics;
 use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
+use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::stream::BatchSplitStream;
 use datafusion_physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
@@ -39,7 +40,8 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::{Constraints, Result, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr};
-use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_plan::SortOrderPushdownResult;
 use datafusion_physical_plan::filter_pushdown::{
     ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, PushedDown,
 };
@@ -174,7 +176,7 @@ pub trait DataSource: Send + Sync + Debug {
     }
     fn try_swapping_with_projection(
         &self,
-        _projection: &[ProjectionExpr],
+        _projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>>;
     /// Try to push down filters into this DataSource.
     /// See [`ExecutionPlan::handle_child_pushdown_result`] for more details.
@@ -189,6 +191,25 @@ pub trait DataSource: Send + Sync + Debug {
             vec![PushedDown::No; filters.len()],
         ))
     }
+
+    /// Try to create a new DataSource that produces data in the specified sort order.
+    ///
+    /// # Arguments
+    /// * `order` - The desired output ordering
+    ///
+    /// # Returns
+    /// * `Ok(SortOrderPushdownResult::Exact { .. })` - Created a source that guarantees exact ordering
+    /// * `Ok(SortOrderPushdownResult::Inexact { .. })` - Created a source optimized for the ordering
+    /// * `Ok(SortOrderPushdownResult::Unsupported)` - Cannot optimize for this ordering
+    /// * `Err(e)` - Error occurred
+    ///
+    /// Default implementation returns `Unsupported`.
+    fn try_pushdown_sort(
+        &self,
+        _order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn DataSource>>> {
+        Ok(SortOrderPushdownResult::Unsupported)
+    }
 }
 
 /// [`ExecutionPlan`] that reads one or more files
@@ -262,17 +283,15 @@ impl ExecutionPlan for DataSourceExec {
             self.properties().eq_properties.output_ordering(),
         )?;
 
-        if let Some(source) = data_source {
+        Ok(data_source.map(|source| {
             let output_partitioning = source.output_partitioning();
             let plan = self
                 .clone()
                 .with_data_source(source)
                 // Changing source partitioning may invalidate output partitioning. Update it also
                 .with_partitioning(output_partitioning);
-            Ok(Some(Arc::new(plan)))
-        } else {
-            Ok(Some(Arc::new(self.clone())))
-        }
+            Arc::new(plan) as _
+        }))
     }
 
     fn execute(
@@ -319,7 +338,7 @@ impl ExecutionPlan for DataSourceExec {
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         match self
             .data_source
-            .try_swapping_with_projection(projection.expr())?
+            .try_swapping_with_projection(projection.projection_expr())?
         {
             Some(new_data_source) => {
                 Ok(Some(Arc::new(DataSourceExec::new(new_data_source))))
@@ -342,14 +361,13 @@ impl ExecutionPlan for DataSourceExec {
             .collect_vec();
         let res = self
             .data_source
-            .try_pushdown_filters(parent_filters.clone(), config)?;
+            .try_pushdown_filters(parent_filters, config)?;
         match res.updated_node {
             Some(data_source) => {
                 let mut new_node = self.clone();
                 new_node.data_source = data_source;
                 // Re-compute properties since we have new filters which will impact equivalence info
-                new_node.cache =
-                    Self::compute_properties(Arc::clone(&new_node.data_source));
+                new_node.cache = Self::compute_properties(&new_node.data_source);
 
                 Ok(FilterPushdownPropagation {
                     filters: res.filters,
@@ -362,6 +380,19 @@ impl ExecutionPlan for DataSourceExec {
             }),
         }
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // Delegate to the data source and wrap result with DataSourceExec
+        self.data_source
+            .try_pushdown_sort(order)?
+            .try_map(|new_data_source| {
+                let new_exec = self.clone().with_data_source(new_data_source);
+                Ok(Arc::new(new_exec) as Arc<dyn ExecutionPlan>)
+            })
+    }
 }
 
 impl DataSourceExec {
@@ -371,7 +402,7 @@ impl DataSourceExec {
 
     // Default constructor for `DataSourceExec`, setting the `cooperative` flag to `true`.
     pub fn new(data_source: Arc<dyn DataSource>) -> Self {
-        let cache = Self::compute_properties(Arc::clone(&data_source));
+        let cache = Self::compute_properties(&data_source);
         Self { data_source, cache }
     }
 
@@ -381,7 +412,7 @@ impl DataSourceExec {
     }
 
     pub fn with_data_source(mut self, data_source: Arc<dyn DataSource>) -> Self {
-        self.cache = Self::compute_properties(Arc::clone(&data_source));
+        self.cache = Self::compute_properties(&data_source);
         self.data_source = data_source;
         self
     }
@@ -398,7 +429,7 @@ impl DataSourceExec {
         self
     }
 
-    fn compute_properties(data_source: Arc<dyn DataSource>) -> PlanProperties {
+    fn compute_properties(data_source: &Arc<dyn DataSource>) -> PlanProperties {
         PlanProperties::new(
             data_source.eq_properties(),
             data_source.output_partitioning(),
diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs
index 0dd9bdb87c40a..2f34ca032e132 100644
--- a/datafusion/datasource/src/statistics.rs
+++ b/datafusion/datasource/src/statistics.rs
@@ -22,8 +22,8 @@
 
 use std::sync::Arc;
 
-use crate::file_groups::FileGroup;
 use crate::PartitionedFile;
+use crate::file_groups::FileGroup;
 
 use arrow::array::RecordBatch;
 use arrow::compute::SortColumn;
@@ -31,7 +31,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::row::{Row, Rows};
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    plan_datafusion_err, plan_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, plan_datafusion_err, plan_err,
 };
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
@@ -50,13 +50,13 @@ pub(crate) struct MinMaxStatistics {
 
 impl MinMaxStatistics {
     /// Sort order used to sort the statistics
-    #[allow(unused)]
+    #[expect(unused)]
     pub fn sort_order(&self) -> &LexOrdering {
         &self.sort_order
     }
 
     /// Min value at index
-    #[allow(unused)]
+    #[expect(unused)]
     pub fn min(&'_ self, idx: usize) -> Row<'_> {
         self.min_by_sort_order.row(idx)
     }
@@ -152,28 +152,25 @@ impl MinMaxStatistics {
             .into_iter()
             .unzip();
 
-        Self::new(
-            &min_max_sort_order,
-            &min_max_schema,
-            RecordBatch::try_new(Arc::clone(&min_max_schema), min_values).map_err(
-                |e| {
-                    DataFusionError::ArrowError(
-                        Box::new(e),
-                        Some("\ncreate min batch".to_string()),
-                    )
-                },
-            )?,
-            RecordBatch::try_new(Arc::clone(&min_max_schema), max_values).map_err(
-                |e| {
-                    DataFusionError::ArrowError(
-                        Box::new(e),
-                        Some("\ncreate max batch".to_string()),
-                    )
-                },
-            )?,
-        )
+        let min_batch = RecordBatch::try_new(Arc::clone(&min_max_schema), min_values)
+            .map_err(|e| {
+                DataFusionError::ArrowError(
+                    Box::new(e),
+                    Some("\ncreate min batch".to_string()),
+                )
+            })?;
+        let max_batch = RecordBatch::try_new(Arc::clone(&min_max_schema), max_values)
+            .map_err(|e| {
+                DataFusionError::ArrowError(
+                    Box::new(e),
+                    Some("\ncreate max batch".to_string()),
+                )
+            })?;
+
+        Self::new(&min_max_sort_order, &min_max_schema, min_batch, max_batch)
     }
 
+    #[expect(clippy::needless_pass_by_value)]
     pub fn new(
         sort_order: &LexOrdering,
         schema: &SchemaRef,
@@ -295,7 +292,7 @@ fn sort_columns_from_physical_sort_exprs(
     since = "47.0.0",
     note = "Please use `get_files_with_limit` and  `compute_all_files_statistics` instead"
 )]
-#[allow(unused)]
+#[expect(unused)]
 pub async fn get_statistics_with_limit(
     all_files: impl Stream<Item = Result<(PartitionedFile, Arc<Statistics>)>>,
     file_schema: SchemaRef,
@@ -370,12 +367,14 @@ pub async fn get_statistics_with_limit(
                         min_value: file_min,
                         sum_value: file_sum,
                         distinct_count: _,
+                        byte_size: file_sbs,
                     } = file_col_stats;
 
                     col_stats.null_count = col_stats.null_count.add(file_nc);
                     col_stats.max_value = col_stats.max_value.max(file_max);
                     col_stats.min_value = col_stats.min_value.min(file_min);
                     col_stats.sum_value = col_stats.sum_value.add(file_sum);
+                    col_stats.byte_size = col_stats.byte_size.add(file_sbs);
                 }
 
                 // If the number of rows exceeds the limit, we can stop processing
@@ -421,6 +420,7 @@ pub async fn get_statistics_with_limit(
 ///
 /// # Returns
 /// A new file group with summary statistics attached
+#[expect(clippy::needless_pass_by_value)]
 pub fn compute_file_group_statistics(
     file_group: FileGroup,
     file_schema: SchemaRef,
@@ -456,6 +456,7 @@ pub fn compute_file_group_statistics(
 /// A tuple containing:
 /// * The processed file groups with their individual statistics attached
 /// * The summary statistics across all file groups, aka all files summary statistics
+#[expect(clippy::needless_pass_by_value)]
 pub fn compute_all_files_statistics(
     file_groups: Vec<FileGroup>,
     table_schema: SchemaRef,
diff --git a/datafusion/datasource/src/table_schema.rs b/datafusion/datasource/src/table_schema.rs
index 8002df4a99dfc..a45cdbaaea076 100644
--- a/datafusion/datasource/src/table_schema.rs
+++ b/datafusion/datasource/src/table_schema.rs
@@ -70,7 +70,7 @@ pub struct TableSchema {
     ///
     /// These columns are NOT present in the data files but are appended to each
     /// row during query execution based on the file's location.
-    table_partition_cols: Vec<FieldRef>,
+    table_partition_cols: Arc<Vec<FieldRef>>,
 
     /// The complete table schema: file_schema columns followed by partition columns.
     ///
@@ -121,7 +121,7 @@ impl TableSchema {
         builder.extend(table_partition_cols.iter().cloned());
         Self {
             file_schema,
-            table_partition_cols,
+            table_partition_cols: Arc::new(table_partition_cols),
             table_schema: Arc::new(builder.finish()),
         }
     }
@@ -140,7 +140,15 @@ impl TableSchema {
     /// into [`TableSchema::with_table_partition_cols`] if you have partition columns at construction time
     /// since it avoids re-computing the table schema.
     pub fn with_table_partition_cols(mut self, partition_cols: Vec<FieldRef>) -> Self {
-        self.table_partition_cols = partition_cols;
+        if self.table_partition_cols.is_empty() {
+            self.table_partition_cols = Arc::new(partition_cols);
+        } else {
+            // Append to existing partition columns
+            let table_partition_cols = Arc::get_mut(&mut self.table_partition_cols).expect(
+                "Expected to be the sole owner of table_partition_cols since this function accepts mut self",
+            );
+            table_partition_cols.extend(partition_cols);
+        }
         let mut builder = SchemaBuilder::from(self.file_schema.as_ref());
         builder.extend(self.table_partition_cols.iter().cloned());
         self.table_schema = Arc::new(builder.finish());
@@ -170,3 +178,102 @@ impl TableSchema {
         &self.table_schema
     }
 }
+
+impl From<SchemaRef> for TableSchema {
+    fn from(schema: SchemaRef) -> Self {
+        Self::from_file_schema(schema)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TableSchema;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_table_schema_creation() {
+        let file_schema = Arc::new(Schema::new(vec![
+            Field::new("user_id", DataType::Int64, false),
+            Field::new("amount", DataType::Float64, false),
+        ]));
+
+        let partition_cols = vec![
+            Arc::new(Field::new("date", DataType::Utf8, false)),
+            Arc::new(Field::new("region", DataType::Utf8, false)),
+        ];
+
+        let table_schema = TableSchema::new(file_schema.clone(), partition_cols.clone());
+
+        // Verify file schema
+        assert_eq!(table_schema.file_schema().as_ref(), file_schema.as_ref());
+
+        // Verify partition columns
+        assert_eq!(table_schema.table_partition_cols().len(), 2);
+        assert_eq!(table_schema.table_partition_cols()[0], partition_cols[0]);
+        assert_eq!(table_schema.table_partition_cols()[1], partition_cols[1]);
+
+        // Verify full table schema
+        let expected_fields = vec![
+            Field::new("user_id", DataType::Int64, false),
+            Field::new("amount", DataType::Float64, false),
+            Field::new("date", DataType::Utf8, false),
+            Field::new("region", DataType::Utf8, false),
+        ];
+        let expected_schema = Schema::new(expected_fields);
+        assert_eq!(table_schema.table_schema().as_ref(), &expected_schema);
+    }
+
+    #[test]
+    fn test_add_multiple_partition_columns() {
+        let file_schema =
+            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+
+        let initial_partition_cols =
+            vec![Arc::new(Field::new("country", DataType::Utf8, false))];
+
+        let table_schema = TableSchema::new(file_schema.clone(), initial_partition_cols);
+
+        let additional_partition_cols = vec![
+            Arc::new(Field::new("city", DataType::Utf8, false)),
+            Arc::new(Field::new("year", DataType::Int32, false)),
+        ];
+
+        let updated_table_schema =
+            table_schema.with_table_partition_cols(additional_partition_cols);
+
+        // Verify file schema remains unchanged
+        assert_eq!(
+            updated_table_schema.file_schema().as_ref(),
+            file_schema.as_ref()
+        );
+
+        // Verify partition columns
+        assert_eq!(updated_table_schema.table_partition_cols().len(), 3);
+        assert_eq!(
+            updated_table_schema.table_partition_cols()[0].name(),
+            "country"
+        );
+        assert_eq!(
+            updated_table_schema.table_partition_cols()[1].name(),
+            "city"
+        );
+        assert_eq!(
+            updated_table_schema.table_partition_cols()[2].name(),
+            "year"
+        );
+
+        // Verify full table schema
+        let expected_fields = vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("country", DataType::Utf8, false),
+            Field::new("city", DataType::Utf8, false),
+            Field::new("year", DataType::Int32, false),
+        ];
+        let expected_schema = Schema::new(expected_fields);
+        assert_eq!(
+            updated_table_schema.table_schema().as_ref(),
+            &expected_schema
+        );
+    }
+}
diff --git a/datafusion/datasource/src/test_util.rs b/datafusion/datasource/src/test_util.rs
index feb704af9913e..c8d5dd54cb8a2 100644
--- a/datafusion/datasource/src/test_util.rs
+++ b/datafusion/datasource/src/test_util.rs
@@ -17,28 +17,49 @@
 
 use crate::{
     file::FileSource, file_scan_config::FileScanConfig, file_stream::FileOpener,
-    schema_adapter::SchemaAdapterFactory,
 };
 
 use std::sync::Arc;
 
-use crate::TableSchema;
 use arrow::datatypes::Schema;
-use datafusion_common::{Result, Statistics};
-use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+use datafusion_common::Result;
+use datafusion_physical_expr::{PhysicalExpr, expressions::Column};
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use object_store::ObjectStore;
 
 /// Minimal [`crate::file::FileSource`] implementation for use in tests.
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub(crate) struct MockSource {
     metrics: ExecutionPlanMetricsSet,
-    projected_statistics: Option<Statistics>,
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     filter: Option<Arc<dyn PhysicalExpr>>,
+    table_schema: crate::table_schema::TableSchema,
+    projection: crate::projection::SplitProjection,
+}
+
+impl Default for MockSource {
+    fn default() -> Self {
+        let table_schema =
+            crate::table_schema::TableSchema::new(Arc::new(Schema::empty()), vec![]);
+        Self {
+            metrics: ExecutionPlanMetricsSet::new(),
+            filter: None,
+            projection: crate::projection::SplitProjection::unprojected(&table_schema),
+            table_schema,
+        }
+    }
 }
 
 impl MockSource {
+    pub fn new(table_schema: impl Into<crate::table_schema::TableSchema>) -> Self {
+        let table_schema = table_schema.into();
+        Self {
+            metrics: ExecutionPlanMetricsSet::new(),
+            filter: None,
+            projection: crate::projection::SplitProjection::unprojected(&table_schema),
+            table_schema,
+        }
+    }
+
     pub fn with_filter(mut self, filter: Arc<dyn PhysicalExpr>) -> Self {
         self.filter = Some(filter);
         self
@@ -51,7 +72,7 @@ impl FileSource for MockSource {
         _object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
+    ) -> Result<Arc<dyn FileOpener>> {
         unimplemented!()
     }
 
@@ -67,48 +88,36 @@ impl FileSource for MockSource {
         Arc::new(Self { ..self.clone() })
     }
 
-    fn with_schema(&self, _schema: TableSchema) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
-    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
-        let mut source = self.clone();
-        source.projected_statistics = Some(statistics);
-        Arc::new(source)
-    }
-
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(self
-            .projected_statistics
-            .as_ref()
-            .expect("projected_statistics must be set")
-            .clone())
-    }
-
     fn file_type(&self) -> &str {
         "mock"
     }
 
-    fn with_schema_adapter_factory(
+    fn table_schema(&self) -> &crate::table_schema::TableSchema {
+        &self.table_schema
+    }
+
+    fn try_pushdown_projection(
         &self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Result<Arc<dyn FileSource>> {
-        Ok(Arc::new(Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self.clone()
-        }))
+        projection: &datafusion_physical_plan::projection::ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection = crate::projection::SplitProjection::new(
+            self.table_schema.file_schema(),
+            &new_projection,
+        );
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
     }
 
-    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.clone()
+    fn projection(
+        &self,
+    ) -> Option<&datafusion_physical_plan::projection::ProjectionExprs> {
+        Some(&self.projection.source)
     }
 }
 
diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
index 08e5b6a5df83a..155d6efe462c1 100644
--- a/datafusion/datasource/src/url.rs
+++ b/datafusion/datasource/src/url.rs
@@ -26,8 +26,8 @@ use futures::{StreamExt, TryStreamExt};
 use glob::Pattern;
 use itertools::Itertools;
 use log::debug;
-use object_store::path::Path;
 use object_store::path::DELIMITER;
+use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
 use url::Url;
 
@@ -209,12 +209,12 @@ impl ListingTableUrl {
     /// assert_eq!(url.file_extension(), None);
     /// ```
     pub fn file_extension(&self) -> Option<&str> {
-        if let Some(mut segments) = self.url.path_segments() {
-            if let Some(last_segment) = segments.next_back() {
-                if last_segment.contains(".") && !last_segment.ends_with(".") {
-                    return last_segment.split('.').next_back();
-                }
-            }
+        if let Some(mut segments) = self.url.path_segments()
+            && let Some(last_segment) = segments.next_back()
+            && last_segment.contains(".")
+            && !last_segment.ends_with(".")
+        {
+            return last_segment.split('.').next_back();
         }
 
         None
@@ -233,27 +233,38 @@ impl ListingTableUrl {
         Some(stripped.split_terminator(DELIMITER))
     }
 
-    /// List all files identified by this [`ListingTableUrl`] for the provided `file_extension`
-    pub async fn list_all_files<'a>(
+    /// List all files identified by this [`ListingTableUrl`] for the provided `file_extension`,
+    /// optionally filtering by a path prefix
+    pub async fn list_prefixed_files<'a>(
         &'a self,
         ctx: &'a dyn Session,
         store: &'a dyn ObjectStore,
+        prefix: Option<Path>,
         file_extension: &'a str,
     ) -> Result<BoxStream<'a, Result<ObjectMeta>>> {
         let exec_options = &ctx.config_options().execution;
         let ignore_subdirectory = exec_options.listing_table_ignore_subdirectory;
 
+        // Build full_prefix for non-cached path and head() calls
+        let full_prefix = if let Some(ref p) = prefix {
+            let mut parts = self.prefix.parts().collect::<Vec<_>>();
+            parts.extend(p.parts());
+            Path::from_iter(parts.into_iter())
+        } else {
+            self.prefix.clone()
+        };
+
         let list: BoxStream<'a, Result<ObjectMeta>> = if self.is_collection() {
-            list_with_cache(ctx, store, &self.prefix).await?
+            list_with_cache(ctx, store, &self.prefix, prefix.as_ref()).await?
         } else {
-            match store.head(&self.prefix).await {
+            match store.head(&full_prefix).await {
                 Ok(meta) => futures::stream::once(async { Ok(meta) })
                     .map_err(|e| DataFusionError::ObjectStore(Box::new(e)))
                     .boxed(),
                 // If the head command fails, it is likely that object doesn't exist.
                 // Retry as though it were a prefix (aka a collection)
                 Err(object_store::Error::NotFound { .. }) => {
-                    list_with_cache(ctx, store, &self.prefix).await?
+                    list_with_cache(ctx, store, &self.prefix, prefix.as_ref()).await?
                 }
                 Err(e) => return Err(e.into()),
             }
@@ -269,6 +280,17 @@ impl ListingTableUrl {
             .boxed())
     }
 
+    /// List all files identified by this [`ListingTableUrl`] for the provided `file_extension`
+    pub async fn list_all_files<'a>(
+        &'a self,
+        ctx: &'a dyn Session,
+        store: &'a dyn ObjectStore,
+        file_extension: &'a str,
+    ) -> Result<BoxStream<'a, Result<ObjectMeta>>> {
+        self.list_prefixed_files(ctx, store, None, file_extension)
+            .await
+    }
+
     /// Returns this [`ListingTableUrl`] as a string
     pub fn as_str(&self) -> &str {
         self.as_ref()
@@ -303,27 +325,74 @@ impl ListingTableUrl {
     }
 }
 
+/// Lists files with cache support, using prefix-aware lookups.
+///
+/// # Arguments
+/// * `ctx` - The session context
+/// * `store` - The object store to list from
+/// * `table_base_path` - The table's base path (the stable cache key)
+/// * `prefix` - Optional prefix relative to table base for filtering results
+///
+/// # Cache Behavior:
+/// The cache key is always `table_base_path`. When a prefix-filtered listing
+/// is requested via `prefix`, the cache:
+/// - Looks up `table_base_path` in the cache
+/// - Filters results to match `table_base_path/prefix`
+/// - Returns filtered results without a storage call
+///
+/// On cache miss, the full table is always listed and cached, ensuring
+/// subsequent prefix queries can be served from cache.
 async fn list_with_cache<'b>(
     ctx: &'b dyn Session,
     store: &'b dyn ObjectStore,
-    prefix: &'b Path,
+    table_base_path: &Path,
+    prefix: Option<&Path>,
 ) -> Result<BoxStream<'b, Result<ObjectMeta>>> {
+    // Build the full listing path (table_base + prefix)
+    let full_prefix = match prefix {
+        Some(p) => {
+            let mut parts: Vec<_> = table_base_path.parts().collect();
+            parts.extend(p.parts());
+            Path::from_iter(parts)
+        }
+        None => table_base_path.clone(),
+    };
+
     match ctx.runtime_env().cache_manager.get_list_files_cache() {
         None => Ok(store
-            .list(Some(prefix))
+            .list(Some(&full_prefix))
             .map(|res| res.map_err(|e| DataFusionError::ObjectStore(Box::new(e))))
             .boxed()),
         Some(cache) => {
-            let vec = if let Some(res) = cache.get(prefix) {
-                debug!("Hit list all files cache");
+            // Convert prefix to Option<Path> for cache lookup
+            let prefix_filter = prefix.cloned();
+
+            // Try cache lookup with optional prefix filter
+            let vec = if let Some(res) =
+                cache.get_with_extra(table_base_path, &prefix_filter)
+            {
+                debug!("Hit list files cache");
                 res.as_ref().clone()
             } else {
+                // Cache miss - always list and cache the full table
+                // This ensures we have complete data for future prefix queries
                 let vec = store
-                    .list(Some(prefix))
+                    .list(Some(table_base_path))
                     .try_collect::<Vec<ObjectMeta>>()
                     .await?;
-                cache.put(prefix, Arc::new(vec.clone()));
-                vec
+                cache.put(table_base_path, Arc::new(vec.clone()));
+
+                // If a prefix filter was requested, apply it to the results
+                if prefix.is_some() {
+                    let full_prefix_str = full_prefix.as_ref();
+                    vec.into_iter()
+                        .filter(|meta| {
+                            meta.location.as_ref().starts_with(full_prefix_str)
+                        })
+                        .collect()
+                } else {
+                    vec
+                }
             };
             Ok(futures::stream::iter(vec.into_iter().map(Ok)).boxed())
         }
@@ -409,11 +478,11 @@ mod tests {
     use super::*;
     use async_trait::async_trait;
     use bytes::Bytes;
-    use datafusion_common::config::TableOptions;
     use datafusion_common::DFSchema;
+    use datafusion_common::config::TableOptions;
+    use datafusion_execution::TaskContext;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnv;
-    use datafusion_execution::TaskContext;
     use datafusion_expr::execution_props::ExecutionProps;
     use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -701,6 +770,220 @@ mod tests {
             panic!("Expected PermissionDenied error");
         };
 
+        // Test prefix filtering with partition-style paths
+        create_file(&store, "/data/a=1/file1.parquet").await;
+        create_file(&store, "/data/a=1/b=100/file2.parquet").await;
+        create_file(&store, "/data/a=2/b=200/file3.parquet").await;
+        create_file(&store, "/data/a=2/b=200/file4.csv").await;
+
+        assert_eq!(
+            list_prefixed_files("/data/", &store, Some(Path::from("a=1")), "parquet")
+                .await?,
+            vec!["data/a=1/b=100/file2.parquet", "data/a=1/file1.parquet"],
+        );
+
+        assert_eq!(
+            list_prefixed_files(
+                "/data/",
+                &store,
+                Some(Path::from("a=1/b=100")),
+                "parquet"
+            )
+            .await?,
+            vec!["data/a=1/b=100/file2.parquet"],
+        );
+
+        assert_eq!(
+            list_prefixed_files("/data/", &store, Some(Path::from("a=2")), "parquet")
+                .await?,
+            vec!["data/a=2/b=200/file3.parquet"],
+        );
+
+        Ok(())
+    }
+
+    /// Tests that the cached code path produces identical results to the non-cached path.
+    ///
+    /// This is critical: the cache is a transparent optimization, so both paths
+    /// MUST return the same files. Note: order is not guaranteed by ObjectStore::list,
+    /// so we sort results before comparison.
+    #[tokio::test]
+    async fn test_cache_path_equivalence() -> Result<()> {
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+
+        let store = MockObjectStore {
+            in_mem: object_store::memory::InMemory::new(),
+            forbidden_paths: vec![],
+        };
+
+        // Create test files with partition-style paths
+        create_file(&store, "/table/year=2023/data1.parquet").await;
+        create_file(&store, "/table/year=2023/month=01/data2.parquet").await;
+        create_file(&store, "/table/year=2024/data3.parquet").await;
+        create_file(&store, "/table/year=2024/month=06/data4.parquet").await;
+        create_file(&store, "/table/year=2024/month=12/data5.parquet").await;
+
+        // Session WITHOUT cache
+        let session_no_cache = MockSession::new();
+
+        // Session WITH cache - use RuntimeEnvBuilder with cache limit (no TTL needed for this test)
+        let runtime_with_cache = RuntimeEnvBuilder::new()
+            .with_object_list_cache_limit(1024 * 1024) // 1MB limit
+            .build_arc()?;
+        let session_with_cache = MockSession::with_runtime_env(runtime_with_cache);
+
+        // Test cases: (url, prefix, description)
+        let test_cases = vec![
+            ("/table/", None, "full table listing"),
+            (
+                "/table/",
+                Some(Path::from("year=2023")),
+                "single partition filter",
+            ),
+            (
+                "/table/",
+                Some(Path::from("year=2024")),
+                "different partition filter",
+            ),
+            (
+                "/table/",
+                Some(Path::from("year=2024/month=06")),
+                "nested partition filter",
+            ),
+            (
+                "/table/",
+                Some(Path::from("year=2025")),
+                "non-existent partition",
+            ),
+        ];
+
+        for (url_str, prefix, description) in test_cases {
+            let url = ListingTableUrl::parse(url_str)?;
+
+            // Get results WITHOUT cache (sorted for comparison)
+            let mut results_no_cache: Vec<String> = url
+                .list_prefixed_files(&session_no_cache, &store, prefix.clone(), "parquet")
+                .await?
+                .try_collect::<Vec<_>>()
+                .await?
+                .into_iter()
+                .map(|m| m.location.to_string())
+                .collect();
+            results_no_cache.sort();
+
+            // Get results WITH cache (first call - cache miss, sorted for comparison)
+            let mut results_with_cache_miss: Vec<String> = url
+                .list_prefixed_files(
+                    &session_with_cache,
+                    &store,
+                    prefix.clone(),
+                    "parquet",
+                )
+                .await?
+                .try_collect::<Vec<_>>()
+                .await?
+                .into_iter()
+                .map(|m| m.location.to_string())
+                .collect();
+            results_with_cache_miss.sort();
+
+            // Get results WITH cache (second call - cache hit, sorted for comparison)
+            let mut results_with_cache_hit: Vec<String> = url
+                .list_prefixed_files(&session_with_cache, &store, prefix, "parquet")
+                .await?
+                .try_collect::<Vec<_>>()
+                .await?
+                .into_iter()
+                .map(|m| m.location.to_string())
+                .collect();
+            results_with_cache_hit.sort();
+
+            // All three should contain the same files
+            assert_eq!(
+                results_no_cache, results_with_cache_miss,
+                "Cache miss path should match non-cached path for: {description}"
+            );
+            assert_eq!(
+                results_no_cache, results_with_cache_hit,
+                "Cache hit path should match non-cached path for: {description}"
+            );
+        }
+
+        Ok(())
+    }
+
+    /// Tests that prefix queries can be served from a cached full-table listing
+    #[tokio::test]
+    async fn test_cache_serves_partition_from_full_listing() -> Result<()> {
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+
+        let store = MockObjectStore {
+            in_mem: object_store::memory::InMemory::new(),
+            forbidden_paths: vec![],
+        };
+
+        // Create test files
+        create_file(&store, "/sales/region=US/q1.parquet").await;
+        create_file(&store, "/sales/region=US/q2.parquet").await;
+        create_file(&store, "/sales/region=EU/q1.parquet").await;
+
+        // Create session with cache (no TTL needed for this test)
+        let runtime = RuntimeEnvBuilder::new()
+            .with_object_list_cache_limit(1024 * 1024) // 1MB limit
+            .build_arc()?;
+        let session = MockSession::with_runtime_env(runtime);
+
+        let url = ListingTableUrl::parse("/sales/")?;
+
+        // First: query full table (populates cache)
+        let full_results: Vec<String> = url
+            .list_prefixed_files(&session, &store, None, "parquet")
+            .await?
+            .try_collect::<Vec<_>>()
+            .await?
+            .into_iter()
+            .map(|m| m.location.to_string())
+            .collect();
+        assert_eq!(full_results.len(), 3);
+
+        // Second: query with prefix (should be served from cache)
+        let mut us_results: Vec<String> = url
+            .list_prefixed_files(
+                &session,
+                &store,
+                Some(Path::from("region=US")),
+                "parquet",
+            )
+            .await?
+            .try_collect::<Vec<_>>()
+            .await?
+            .into_iter()
+            .map(|m| m.location.to_string())
+            .collect();
+        us_results.sort();
+
+        assert_eq!(
+            us_results,
+            vec!["sales/region=US/q1.parquet", "sales/region=US/q2.parquet"]
+        );
+
+        // Third: different prefix (also from cache)
+        let eu_results: Vec<String> = url
+            .list_prefixed_files(
+                &session,
+                &store,
+                Some(Path::from("region=EU")),
+                "parquet",
+            )
+            .await?
+            .try_collect::<Vec<_>>()
+            .await?
+            .into_iter()
+            .map(|m| m.location.to_string())
+            .collect();
+
+        assert_eq!(eu_results, vec!["sales/region=EU/q1.parquet"]);
+
         Ok(())
     }
 
@@ -712,7 +995,7 @@ mod tests {
             .expect("failed to create test file");
     }
 
-    /// Runs "list_all_files" and returns their paths
+    /// Runs "list_prefixed_files"  with no prefix to list all files and returns their paths
     ///
     /// Panic's on error
     async fn list_all_files(
@@ -720,19 +1003,32 @@ mod tests {
         store: &dyn ObjectStore,
         file_extension: &str,
     ) -> Result<Vec<String>> {
-        try_list_all_files(url, store, file_extension).await
+        try_list_prefixed_files(url, store, None, file_extension).await
     }
 
-    /// Runs "list_all_files" and returns their paths
-    async fn try_list_all_files(
+    /// Runs "list_prefixed_files" and returns their paths
+    ///
+    /// Panic's on error
+    async fn list_prefixed_files(
         url: &str,
         store: &dyn ObjectStore,
+        prefix: Option<Path>,
+        file_extension: &str,
+    ) -> Result<Vec<String>> {
+        try_list_prefixed_files(url, store, prefix, file_extension).await
+    }
+
+    /// Runs "list_prefixed_files" and returns their paths
+    async fn try_list_prefixed_files(
+        url: &str,
+        store: &dyn ObjectStore,
+        prefix: Option<Path>,
         file_extension: &str,
     ) -> Result<Vec<String>> {
         let session = MockSession::new();
         let url = ListingTableUrl::parse(url)?;
         let files = url
-            .list_all_files(&session, store, file_extension)
+            .list_prefixed_files(&session, store, prefix, file_extension)
             .await?
             .try_collect::<Vec<_>>()
             .await?
@@ -843,6 +1139,14 @@ mod tests {
                 runtime_env: Arc::new(RuntimeEnv::default()),
             }
         }
+
+        /// Create a MockSession with a custom RuntimeEnv (for cache testing)
+        fn with_runtime_env(runtime_env: Arc<RuntimeEnv>) -> Self {
+            Self {
+                config: SessionConfig::new(),
+                runtime_env,
+            }
+        }
     }
 
     #[async_trait::async_trait]
diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs
index 52cb17c10453e..5e4962aa48b18 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -28,15 +28,15 @@ use datafusion_common::error::Result;
 use datafusion_physical_plan::SendableRecordBatchStream;
 
 use arrow::array::{
-    builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, ArrayAccessor,
-    RecordBatch, StringArray, StructArray,
+    ArrayAccessor, RecordBatch, StringArray, StructArray, builder::UInt64Builder,
+    cast::AsArray, downcast_dictionary_array,
 };
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::cast::{
     as_boolean_array, as_date32_array, as_date64_array, as_float16_array,
-    as_float32_array, as_float64_array, as_int16_array, as_int32_array, as_int64_array,
-    as_int8_array, as_string_array, as_string_view_array, as_uint16_array,
-    as_uint32_array, as_uint64_array, as_uint8_array,
+    as_float32_array, as_float64_array, as_int8_array, as_int16_array, as_int32_array,
+    as_int64_array, as_string_array, as_string_view_array, as_uint8_array,
+    as_uint16_array, as_uint32_array, as_uint64_array,
 };
 use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
 use datafusion_common_runtime::SpawnedTask;
@@ -296,7 +296,7 @@ async fn hive_style_partitions_demuxer(
         let all_partition_values = compute_partition_keys_by_row(&rb, &partition_by)?;
 
         // Next compute how the batch should be split up to take each distinct key to its own batch
-        let take_map = compute_take_arrays(&rb, all_partition_values);
+        let take_map = compute_take_arrays(&rb, &all_partition_values);
 
         // Divide up the batch into distinct partition key batches and send each batch
         for (part_key, mut builder) in take_map.into_iter() {
@@ -502,9 +502,9 @@ fn compute_partition_keys_by_row<'a>(
             }
             _ => {
                 return not_impl_err!(
-                "it is not yet supported to write to hive partitions with datatype {}",
-                dtype
-            )
+                    "it is not yet supported to write to hive partitions with datatype {}",
+                    dtype
+                );
             }
         }
 
@@ -516,7 +516,7 @@ fn compute_partition_keys_by_row<'a>(
 
 fn compute_take_arrays(
     rb: &RecordBatch,
-    all_partition_values: Vec<Vec<Cow<str>>>,
+    all_partition_values: &[Vec<Cow<str>>],
 ) -> HashMap<Vec<String>, UInt64Builder> {
     let mut take_map = HashMap::new();
     for i in 0..rb.num_rows() {
diff --git a/datafusion/datasource/src/write/mod.rs b/datafusion/datasource/src/write/mod.rs
index 85832f81bc185..e8d2d17da8ee8 100644
--- a/datafusion/datasource/src/write/mod.rs
+++ b/datafusion/datasource/src/write/mod.rs
@@ -28,9 +28,9 @@ use datafusion_common::error::Result;
 use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
 use bytes::Bytes;
+use object_store::ObjectStore;
 use object_store::buffered::BufWriter;
 use object_store::path::Path;
-use object_store::ObjectStore;
 use tokio::io::AsyncWrite;
 
 pub mod demux;
@@ -131,6 +131,8 @@ pub struct ObjectWriterBuilder {
     object_store: Arc<dyn ObjectStore>,
     /// The size of the buffer for the object writer.
     buffer_size: Option<usize>,
+    /// The compression level for the object writer.
+    compression_level: Option<u32>,
 }
 
 impl ObjectWriterBuilder {
@@ -145,6 +147,7 @@ impl ObjectWriterBuilder {
             location: location.clone(),
             object_store,
             buffer_size: None,
+            compression_level: None,
         }
     }
 
@@ -202,6 +205,22 @@ impl ObjectWriterBuilder {
         self.buffer_size
     }
 
+    /// Set compression level for object writer.
+    pub fn set_compression_level(&mut self, compression_level: Option<u32>) {
+        self.compression_level = compression_level;
+    }
+
+    /// Set compression level for object writer, returning the builder.
+    pub fn with_compression_level(mut self, compression_level: Option<u32>) -> Self {
+        self.compression_level = compression_level;
+        self
+    }
+
+    /// Currently specified compression level.
+    pub fn get_compression_level(&self) -> Option<u32> {
+        self.compression_level
+    }
+
     /// Return a writer object that writes to the object store location.
     ///
     /// If a buffer size has not been set, the default buffer buffer size will
@@ -215,6 +234,7 @@ impl ObjectWriterBuilder {
             location,
             object_store,
             buffer_size,
+            compression_level,
         } = self;
 
         let buf_writer = match buffer_size {
@@ -222,6 +242,7 @@ impl ObjectWriterBuilder {
             None => BufWriter::new(object_store, location),
         };
 
-        file_compression_type.convert_async_writer(buf_writer)
+        file_compression_type
+            .convert_async_writer_with_level(buf_writer, compression_level)
     }
 }
diff --git a/datafusion/datasource/src/write/orchestration.rs b/datafusion/datasource/src/write/orchestration.rs
index ab836b7b7f388..39c91a1c0d676 100644
--- a/datafusion/datasource/src/write/orchestration.rs
+++ b/datafusion/datasource/src/write/orchestration.rs
@@ -28,7 +28,7 @@ use datafusion_common::error::Result;
 
 use arrow::array::RecordBatch;
 use datafusion_common::{
-    exec_datafusion_err, internal_datafusion_err, internal_err, DataFusionError,
+    DataFusionError, exec_datafusion_err, internal_datafusion_err, internal_err,
 };
 use datafusion_common_runtime::{JoinSet, SpawnedTask};
 use datafusion_execution::TaskContext;
@@ -120,7 +120,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
                         return SerializedRecordBatchResult::failure(
                             None,
                             exec_datafusion_err!("Error writing to object store: {e}"),
-                        )
+                        );
                     }
                 };
                 row_count += cnt;
@@ -148,7 +148,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
             return SerializedRecordBatchResult::failure(
                 Some(writer),
                 internal_datafusion_err!("Unknown error writing to object store"),
-            )
+            );
         }
     }
     SerializedRecordBatchResult::success(writer, row_count)
@@ -216,12 +216,20 @@ pub(crate) async fn stateless_serialize_and_write_files(
     }
 
     if any_errors {
-        match any_abort_errors{
-            true => return internal_err!("Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written."),
+        match any_abort_errors {
+            true => {
+                return internal_err!(
+                    "Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written."
+                );
+            }
             false => match triggering_error {
                 Some(e) => return Err(e),
-                None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers successfully aborted.")
-            }
+                None => {
+                    return internal_err!(
+                        "Unknown Error encountered during writing to ObjectStore. All writers successfully aborted."
+                    );
+                }
+            },
         }
     }
 
@@ -240,6 +248,7 @@ pub async fn spawn_writer_tasks_and_join(
     context: &Arc<TaskContext>,
     serializer: Arc<dyn BatchSerializer>,
     compression: FileCompressionType,
+    compression_level: Option<u32>,
     object_store: Arc<dyn ObjectStore>,
     demux_task: SpawnedTask<Result<()>>,
     mut file_stream_rx: DemuxedStreamReceiver,
@@ -265,6 +274,7 @@ pub async fn spawn_writer_tasks_and_join(
                         .execution
                         .objectstore_writer_buffer_size,
                 ))
+                .with_compression_level(compression_level)
                 .build()?;
 
         if tx_file_bundle
diff --git a/datafusion/doc/src/lib.rs b/datafusion/doc/src/lib.rs
index 977130ffc0d6a..836cb9345b51f 100644
--- a/datafusion/doc/src/lib.rs
+++ b/datafusion/doc/src/lib.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -29,15 +31,13 @@ pub use udaf::aggregate_doc_sections;
 pub use udf::scalar_doc_sections;
 pub use udwf::window_doc_sections;
 
-#[allow(rustdoc::broken_intra_doc_links)]
-/// Documentation for use by [`ScalarUDFImpl`](ScalarUDFImpl),
-/// [`AggregateUDFImpl`](AggregateUDFImpl) and [`WindowUDFImpl`](WindowUDFImpl) functions.
+/// Documentation for use by `ScalarUDFImpl`, `AggregateUDFImpl` and `WindowUDFImpl` functions.
 ///
 /// See the [`DocumentationBuilder`] to create a new [`Documentation`] struct.
 ///
 /// The DataFusion [SQL function documentation] is automatically  generated from these structs.
-/// The name of the udf will be pulled from the [`ScalarUDFImpl::name`](ScalarUDFImpl::name),
-/// [`AggregateUDFImpl::name`](AggregateUDFImpl::name) or [`WindowUDFImpl::name`](WindowUDFImpl::name)
+/// The name of the udf will be pulled from the `ScalarUDFImpl::name`,
+/// `AggregateUDFImpl::name` or `WindowUDFImpl::name`
 /// function as appropriate.
 ///
 /// All strings in the documentation are required to be
diff --git a/datafusion/doc/src/udf.rs b/datafusion/doc/src/udf.rs
index 3d18c9ac2714e..d1f51d919478d 100644
--- a/datafusion/doc/src/udf.rs
+++ b/datafusion/doc/src/udf.rs
@@ -127,6 +127,8 @@ The following regular expression functions are supported:"#,
     pub const DOC_SECTION_UNION: DocSection = DocSection {
         include: true,
         label: "Union Functions",
-        description: Some("Functions to work with the union data type, also know as tagged unions, variant types, enums or sum types. Note: Not related to the SQL UNION operator"),
+        description: Some(
+            "Functions to work with the union data type, also know as tagged unions, variant types, enums or sum types. Note: Not related to the SQL UNION operator",
+        ),
     };
 }
diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml
index f9f7a1bc63cc5..ca1fba07cae2d 100644
--- a/datafusion/execution/Cargo.toml
+++ b/datafusion/execution/Cargo.toml
@@ -51,6 +51,7 @@ sql = []
 [dependencies]
 arrow = { workspace = true }
 async-trait = { workspace = true }
+chrono = { workspace = true }
 dashmap = { workspace = true }
 datafusion-common = { workspace = true, default-features = false }
 datafusion-expr = { workspace = true, default-features = false }
@@ -64,5 +65,4 @@ tempfile = { workspace = true }
 url = { workspace = true }
 
 [dev-dependencies]
-chrono = { workspace = true }
 insta = { workspace = true }
diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs
index 3e0f4065d13f5..1ff35f27071b6 100644
--- a/datafusion/execution/src/cache/cache_manager.rs
+++ b/datafusion/execution/src/cache/cache_manager.rs
@@ -15,15 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::cache::cache_unit::DefaultFilesMetadataCache;
 use crate::cache::CacheAccessor;
+use crate::cache::cache_unit::DefaultFilesMetadataCache;
+use datafusion_common::stats::Precision;
 use datafusion_common::{Result, Statistics};
-use object_store::path::Path;
 use object_store::ObjectMeta;
+use object_store::path::Path;
 use std::any::Any;
 use std::collections::HashMap;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
+use std::time::Duration;
+
+pub use super::list_files_cache::{
+    DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, DEFAULT_LIST_FILES_CACHE_TTL,
+};
 
 /// A cache for [`Statistics`].
 ///
@@ -32,8 +38,27 @@ use std::sync::Arc;
 /// session lifetime.
 ///
 /// See [`crate::runtime_env::RuntimeEnv`] for more details
-pub type FileStatisticsCache =
-    Arc<dyn CacheAccessor<Path, Arc<Statistics>, Extra = ObjectMeta>>;
+pub trait FileStatisticsCache:
+    CacheAccessor<Path, Arc<Statistics>, Extra = ObjectMeta>
+{
+    /// Retrieves the information about the entries currently cached.
+    fn list_entries(&self) -> HashMap<Path, FileStatisticsCacheEntry>;
+}
+
+/// Represents information about a cached statistics entry.
+/// This is used to expose the statistics cache contents to outside modules.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FileStatisticsCacheEntry {
+    pub object_meta: ObjectMeta,
+    /// Number of table rows.
+    pub num_rows: Precision<usize>,
+    /// Number of table columns.
+    pub num_columns: usize,
+    /// Total table size, in bytes.
+    pub table_size_bytes: Precision<usize>,
+    /// Size of the statistics entry, in bytes.
+    pub statistics_size_bytes: usize,
+}
 
 /// Cache for storing the [`ObjectMeta`]s that result from listing a path
 ///
@@ -41,9 +66,34 @@ pub type FileStatisticsCache =
 /// command on the local filesystem. This operation can be expensive,
 /// especially when done over remote object stores.
 ///
-/// See [`crate::runtime_env::RuntimeEnv`] for more details
-pub type ListFilesCache =
-    Arc<dyn CacheAccessor<Path, Arc<Vec<ObjectMeta>>, Extra = ObjectMeta>>;
+/// The cache key is always the table's base path, ensuring a stable cache key.
+/// The `Extra` type is `Option<Path>`, representing an optional prefix filter
+/// (relative to the table base path) for partition-aware lookups.
+///
+/// When `get_with_extra(key, Some(prefix))` is called:
+/// - The cache entry for `key` (table base path) is fetched
+/// - Results are filtered to only include files matching `key/prefix`
+/// - Filtered results are returned without making a storage call
+///
+/// This enables efficient partition pruning: a single cached listing of the
+/// full table can serve queries for any partition subset.
+///
+/// See [`crate::runtime_env::RuntimeEnv`] for more details.
+pub trait ListFilesCache:
+    CacheAccessor<Path, Arc<Vec<ObjectMeta>>, Extra = Option<Path>>
+{
+    /// Returns the cache's memory limit in bytes.
+    fn cache_limit(&self) -> usize;
+
+    /// Returns the TTL (time-to-live) for cache entries, if configured.
+    fn cache_ttl(&self) -> Option<Duration>;
+
+    /// Updates the cache with a new memory limit in bytes.
+    fn update_cache_limit(&self, limit: usize);
+
+    /// Updates the cache with a new TTL (time-to-live).
+    fn update_cache_ttl(&self, ttl: Option<Duration>);
+}
 
 /// Generic file-embedded metadata used with [`FileMetadataCache`].
 ///
@@ -103,13 +153,13 @@ pub struct FileMetadataCacheEntry {
     pub extra: HashMap<String, String>,
 }
 
-impl Debug for dyn CacheAccessor<Path, Arc<Statistics>, Extra = ObjectMeta> {
+impl Debug for dyn FileStatisticsCache {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(f, "Cache name: {} with length: {}", self.name(), self.len())
     }
 }
 
-impl Debug for dyn CacheAccessor<Path, Arc<Vec<ObjectMeta>>, Extra = ObjectMeta> {
+impl Debug for dyn ListFilesCache {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(f, "Cache name: {} with length: {}", self.name(), self.len())
     }
@@ -130,8 +180,8 @@ impl Debug for dyn FileMetadataCache {
 /// See [`CacheManagerConfig`] for configuration options.
 #[derive(Debug)]
 pub struct CacheManager {
-    file_statistic_cache: Option<FileStatisticsCache>,
-    list_files_cache: Option<ListFilesCache>,
+    file_statistic_cache: Option<Arc<dyn FileStatisticsCache>>,
+    list_files_cache: Option<Arc<dyn ListFilesCache>>,
     file_metadata_cache: Arc<dyn FileMetadataCache>,
 }
 
@@ -140,7 +190,18 @@ impl CacheManager {
         let file_statistic_cache =
             config.table_files_statistics_cache.as_ref().map(Arc::clone);
 
-        let list_files_cache = config.list_files_cache.as_ref().map(Arc::clone);
+        let list_files_cache = config
+            .list_files_cache
+            .as_ref()
+            .inspect(|c| {
+                // the cache memory limit or ttl might have changed, ensure they are updated
+                c.update_cache_limit(config.list_files_cache_limit);
+                // Only update TTL if explicitly set in config, otherwise preserve the cache's existing TTL
+                if let Some(ttl) = config.list_files_cache_ttl {
+                    c.update_cache_ttl(Some(ttl));
+                }
+            })
+            .map(Arc::clone);
 
         let file_metadata_cache = config
             .file_metadata_cache
@@ -161,15 +222,27 @@ impl CacheManager {
     }
 
     /// Get the cache of listing files statistics.
-    pub fn get_file_statistic_cache(&self) -> Option<FileStatisticsCache> {
+    pub fn get_file_statistic_cache(&self) -> Option<Arc<dyn FileStatisticsCache>> {
         self.file_statistic_cache.clone()
     }
 
     /// Get the cache for storing the result of listing [`ObjectMeta`]s under the same path.
-    pub fn get_list_files_cache(&self) -> Option<ListFilesCache> {
+    pub fn get_list_files_cache(&self) -> Option<Arc<dyn ListFilesCache>> {
         self.list_files_cache.clone()
     }
 
+    /// Get the memory limit of the list files cache.
+    pub fn get_list_files_cache_limit(&self) -> usize {
+        self.list_files_cache
+            .as_ref()
+            .map_or(DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, |c| c.cache_limit())
+    }
+
+    /// Get the TTL (time-to-live) of the list files cache.
+    pub fn get_list_files_cache_ttl(&self) -> Option<Duration> {
+        self.list_files_cache.as_ref().and_then(|c| c.cache_ttl())
+    }
+
     /// Get the file embedded metadata cache.
     pub fn get_file_metadata_cache(&self) -> Arc<dyn FileMetadataCache> {
         Arc::clone(&self.file_metadata_cache)
@@ -181,21 +254,28 @@ impl CacheManager {
     }
 }
 
-const DEFAULT_METADATA_CACHE_LIMIT: usize = 50 * 1024 * 1024; // 50M
+pub const DEFAULT_METADATA_CACHE_LIMIT: usize = 50 * 1024 * 1024; // 50M
 
 #[derive(Clone)]
 pub struct CacheManagerConfig {
-    /// Enable cache of files statistics when listing files.
-    /// Avoid get same file statistics repeatedly in same datafusion session.
-    /// Default is disable. Fow now only supports Parquet files.
-    pub table_files_statistics_cache: Option<FileStatisticsCache>,
-    /// Enable cache of file metadata when listing files.
-    /// This setting avoids listing file meta of the same path repeatedly
-    /// in same session, which may be expensive in certain situations (e.g. remote object storage).
+    /// Enable caching of file statistics when listing files.
+    /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session.
+    /// Default is disabled. Currently only Parquet files are supported.
+    pub table_files_statistics_cache: Option<Arc<dyn FileStatisticsCache>>,
+    /// Enable caching of file metadata when listing files.
+    /// Enabling the cache avoids repeat list and object metadata fetch operations, which may be
+    /// expensive in certain situations (e.g. remote object storage), for objects under paths that
+    /// are cached.
     /// Note that if this option is enabled, DataFusion will not see any updates to the underlying
-    /// location.  
-    /// Default is disable.
-    pub list_files_cache: Option<ListFilesCache>,
+    /// storage for at least `list_files_cache_ttl` duration.
+    /// Default is disabled.
+    pub list_files_cache: Option<Arc<dyn ListFilesCache>>,
+    /// Limit of the `list_files_cache`, in bytes. Default: 1MiB.
+    pub list_files_cache_limit: usize,
+    /// The duration the list files cache will consider an entry valid after insertion. Note that
+    /// changes to the underlying storage system, such as adding or removing data, will not be
+    /// visible until an entry expires. Default: None (infinite).
+    pub list_files_cache_ttl: Option<Duration>,
     /// Cache of file-embedded metadata, used to avoid reading it multiple times when processing a
     /// data file (e.g., Parquet footer and page metadata).
     /// If not provided, the [`CacheManager`] will create a [`DefaultFilesMetadataCache`].
@@ -209,6 +289,8 @@ impl Default for CacheManagerConfig {
         Self {
             table_files_statistics_cache: Default::default(),
             list_files_cache: Default::default(),
+            list_files_cache_limit: DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT,
+            list_files_cache_ttl: DEFAULT_LIST_FILES_CACHE_TTL,
             file_metadata_cache: Default::default(),
             metadata_cache_limit: DEFAULT_METADATA_CACHE_LIMIT,
         }
@@ -221,20 +303,39 @@ impl CacheManagerConfig {
     /// Default is `None` (disabled).
     pub fn with_files_statistics_cache(
         mut self,
-        cache: Option<FileStatisticsCache>,
+        cache: Option<Arc<dyn FileStatisticsCache>>,
     ) -> Self {
         self.table_files_statistics_cache = cache;
         self
     }
 
     /// Set the cache for listing files.
-    ///     
+    ///
     /// Default is `None` (disabled).
-    pub fn with_list_files_cache(mut self, cache: Option<ListFilesCache>) -> Self {
+    pub fn with_list_files_cache(
+        mut self,
+        cache: Option<Arc<dyn ListFilesCache>>,
+    ) -> Self {
         self.list_files_cache = cache;
         self
     }
 
+    /// Sets the limit of the list files cache, in bytes.
+    ///
+    /// Default: 1MiB (1,048,576 bytes).
+    pub fn with_list_files_cache_limit(mut self, limit: usize) -> Self {
+        self.list_files_cache_limit = limit;
+        self
+    }
+
+    /// Sets the TTL (time-to-live) for entries in the list files cache.
+    ///
+    /// Default: None (infinite).
+    pub fn with_list_files_cache_ttl(mut self, ttl: Option<Duration>) -> Self {
+        self.list_files_cache_ttl = ttl;
+        self
+    }
+
     /// Sets the cache for file-embedded metadata.
     ///
     /// Default is a [`DefaultFilesMetadataCache`].
@@ -252,3 +353,77 @@ impl CacheManagerConfig {
         self
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cache::DefaultListFilesCache;
+
+    /// Test to verify that TTL is preserved when not explicitly set in config.
+    /// This fixes issue #19396 where TTL was being unset from DefaultListFilesCache
+    /// when CacheManagerConfig::list_files_cache_ttl was not set explicitly.
+    #[test]
+    fn test_ttl_preserved_when_not_set_in_config() {
+        use std::time::Duration;
+
+        // Create a cache with TTL = 1 second
+        let list_file_cache =
+            DefaultListFilesCache::new(1024, Some(Duration::from_secs(1)));
+
+        // Verify the cache has TTL set initially
+        assert_eq!(
+            list_file_cache.cache_ttl(),
+            Some(Duration::from_secs(1)),
+            "Cache should have TTL = 1 second initially"
+        );
+
+        // Put cache in config WITHOUT setting list_files_cache_ttl
+        let config = CacheManagerConfig::default()
+            .with_list_files_cache(Some(Arc::new(list_file_cache)));
+
+        // Create CacheManager from config
+        let cache_manager = CacheManager::try_new(&config).unwrap();
+
+        // Verify TTL is preserved (not unset)
+        let cache_ttl = cache_manager.get_list_files_cache().unwrap().cache_ttl();
+
+        assert!(
+            cache_ttl.is_some(),
+            "TTL should be preserved when not set in config. Expected Some(Duration::from_secs(1)), got {cache_ttl:?}"
+        );
+
+        // Verify it's the correct TTL value
+        assert_eq!(
+            cache_ttl,
+            Some(Duration::from_secs(1)),
+            "TTL should be exactly 1 second"
+        );
+    }
+
+    /// Test to verify that TTL can still be overridden when explicitly set in config.
+    #[test]
+    fn test_ttl_overridden_when_set_in_config() {
+        use std::time::Duration;
+
+        // Create a cache with TTL = 1 second
+        let list_file_cache =
+            DefaultListFilesCache::new(1024, Some(Duration::from_secs(1)));
+
+        // Put cache in config WITH a different TTL set
+        let config = CacheManagerConfig::default()
+            .with_list_files_cache(Some(Arc::new(list_file_cache)))
+            .with_list_files_cache_ttl(Some(Duration::from_secs(60)));
+
+        // Create CacheManager from config
+        let cache_manager = CacheManager::try_new(&config).unwrap();
+
+        // Verify TTL is overridden to the config value
+        let cache_ttl = cache_manager.get_list_files_cache().unwrap().cache_ttl();
+
+        assert_eq!(
+            cache_ttl,
+            Some(Duration::from_secs(60)),
+            "TTL should be overridden to 60 seconds when set in config"
+        );
+    }
+}
diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs
index d27c266b768ad..5351df449a7c1 100644
--- a/datafusion/execution/src/cache/cache_unit.rs
+++ b/datafusion/execution/src/cache/cache_unit.rs
@@ -16,19 +16,18 @@
 // under the License.
 
 use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
 
-use crate::cache::cache_manager::{
-    FileMetadata, FileMetadataCache, FileMetadataCacheEntry,
-};
-use crate::cache::lru_queue::LruQueue;
 use crate::cache::CacheAccessor;
+use crate::cache::cache_manager::{FileStatisticsCache, FileStatisticsCacheEntry};
 
 use datafusion_common::Statistics;
 
 use dashmap::DashMap;
-use object_store::path::Path;
 use object_store::ObjectMeta;
+use object_store::path::Path;
+
+pub use crate::cache::DefaultFilesMetadataCache;
 
 /// Default implementation of [`FileStatisticsCache`]
 ///
@@ -42,6 +41,29 @@ pub struct DefaultFileStatisticsCache {
     statistics: DashMap<Path, (ObjectMeta, Arc<Statistics>)>,
 }
 
+impl FileStatisticsCache for DefaultFileStatisticsCache {
+    fn list_entries(&self) -> HashMap<Path, FileStatisticsCacheEntry> {
+        let mut entries = HashMap::<Path, FileStatisticsCacheEntry>::new();
+
+        for entry in &self.statistics {
+            let path = entry.key();
+            let (object_meta, stats) = entry.value();
+            entries.insert(
+                path.clone(),
+                FileStatisticsCacheEntry {
+                    object_meta: object_meta.clone(),
+                    num_rows: stats.num_rows,
+                    num_columns: stats.column_statistics.len(),
+                    table_size_bytes: stats.total_byte_size,
+                    statistics_size_bytes: 0, // TODO: set to the real size in the future
+                },
+            );
+        }
+
+        entries
+    }
+}
+
 impl CacheAccessor<Path, Arc<Statistics>> for DefaultFileStatisticsCache {
     type Extra = ObjectMeta;
 
@@ -87,8 +109,8 @@ impl CacheAccessor<Path, Arc<Statistics>> for DefaultFileStatisticsCache {
             .map(|x| x.1)
     }
 
-    fn remove(&mut self, k: &Path) -> Option<Arc<Statistics>> {
-        self.statistics.remove(k).map(|x| x.1 .1)
+    fn remove(&self, k: &Path) -> Option<Arc<Statistics>> {
+        self.statistics.remove(k).map(|x| x.1.1)
     }
 
     fn contains_key(&self, k: &Path) -> bool {
@@ -107,340 +129,18 @@ impl CacheAccessor<Path, Arc<Statistics>> for DefaultFileStatisticsCache {
     }
 }
 
-/// Default implementation of [`ListFilesCache`]
-///
-/// Collected files metadata for listing files.
-///
-/// Cache is not invalided until user calls [`Self::remove`] or [`Self::clear`].
-///
-/// [`ListFilesCache`]: crate::cache::cache_manager::ListFilesCache
-#[derive(Default)]
-pub struct DefaultListFilesCache {
-    statistics: DashMap<Path, Arc<Vec<ObjectMeta>>>,
-}
-
-impl CacheAccessor<Path, Arc<Vec<ObjectMeta>>> for DefaultListFilesCache {
-    type Extra = ObjectMeta;
-
-    fn get(&self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
-        self.statistics.get(k).map(|x| Arc::clone(x.value()))
-    }
-
-    fn get_with_extra(
-        &self,
-        _k: &Path,
-        _e: &Self::Extra,
-    ) -> Option<Arc<Vec<ObjectMeta>>> {
-        panic!("Not supported DefaultListFilesCache get_with_extra")
-    }
-
-    fn put(
-        &self,
-        key: &Path,
-        value: Arc<Vec<ObjectMeta>>,
-    ) -> Option<Arc<Vec<ObjectMeta>>> {
-        self.statistics.insert(key.clone(), value)
-    }
-
-    fn put_with_extra(
-        &self,
-        _key: &Path,
-        _value: Arc<Vec<ObjectMeta>>,
-        _e: &Self::Extra,
-    ) -> Option<Arc<Vec<ObjectMeta>>> {
-        panic!("Not supported DefaultListFilesCache put_with_extra")
-    }
-
-    fn remove(&mut self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
-        self.statistics.remove(k).map(|x| x.1)
-    }
-
-    fn contains_key(&self, k: &Path) -> bool {
-        self.statistics.contains_key(k)
-    }
-
-    fn len(&self) -> usize {
-        self.statistics.len()
-    }
-
-    fn clear(&self) {
-        self.statistics.clear()
-    }
-
-    fn name(&self) -> String {
-        "DefaultListFilesCache".to_string()
-    }
-}
-
-/// Handles the inner state of the [`DefaultFilesMetadataCache`] struct.
-struct DefaultFilesMetadataCacheState {
-    lru_queue: LruQueue<Path, (ObjectMeta, Arc<dyn FileMetadata>)>,
-    memory_limit: usize,
-    memory_used: usize,
-    cache_hits: HashMap<Path, usize>,
-}
-
-impl DefaultFilesMetadataCacheState {
-    fn new(memory_limit: usize) -> Self {
-        Self {
-            lru_queue: LruQueue::new(),
-            memory_limit,
-            memory_used: 0,
-            cache_hits: HashMap::new(),
-        }
-    }
-
-    /// Returns the respective entry from the cache, if it exists and the `size` and `last_modified`
-    /// properties from [`ObjectMeta`] match.
-    /// If the entry exists, it becomes the most recently used.
-    fn get(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
-        self.lru_queue
-            .get(&k.location)
-            .map(|(object_meta, metadata)| {
-                if object_meta.size != k.size
-                    || object_meta.last_modified != k.last_modified
-                {
-                    None
-                } else {
-                    *self.cache_hits.entry(k.location.clone()).or_insert(0) += 1;
-                    Some(Arc::clone(metadata))
-                }
-            })
-            .unwrap_or(None)
-    }
-
-    /// Checks if the metadata is currently cached (entry exists and the `size` and `last_modified`
-    /// properties of [`ObjectMeta`] match).
-    /// The LRU queue is not updated.
-    fn contains_key(&self, k: &ObjectMeta) -> bool {
-        self.lru_queue
-            .peek(&k.location)
-            .map(|(object_meta, _)| {
-                object_meta.size == k.size && object_meta.last_modified == k.last_modified
-            })
-            .unwrap_or(false)
-    }
-
-    /// Adds a new key-value pair to cache, meaning LRU entries might be evicted if required.
-    /// If the key is already in the cache, the previous metadata is returned.
-    /// If the size of the metadata is greater than the `memory_limit`, the value is not inserted.
-    fn put(
-        &mut self,
-        key: ObjectMeta,
-        value: Arc<dyn FileMetadata>,
-    ) -> Option<Arc<dyn FileMetadata>> {
-        let value_size = value.memory_size();
-
-        // no point in trying to add this value to the cache if it cannot fit entirely
-        if value_size > self.memory_limit {
-            return None;
-        }
-
-        self.cache_hits.insert(key.location.clone(), 0);
-        // if the key is already in the cache, the old value is removed
-        let old_value = self.lru_queue.put(key.location.clone(), (key, value));
-        self.memory_used += value_size;
-        if let Some((_, ref old_metadata)) = old_value {
-            self.memory_used -= old_metadata.memory_size();
-        }
-
-        self.evict_entries();
-
-        old_value.map(|v| v.1)
-    }
-
-    /// Evicts entries from the LRU cache until `memory_used` is lower than `memory_limit`.
-    fn evict_entries(&mut self) {
-        while self.memory_used > self.memory_limit {
-            if let Some(removed) = self.lru_queue.pop() {
-                let metadata: Arc<dyn FileMetadata> = removed.1 .1;
-                self.memory_used -= metadata.memory_size();
-            } else {
-                // cache is empty while memory_used > memory_limit, cannot happen
-                debug_assert!(
-                    false,
-                    "cache is empty while memory_used > memory_limit, cannot happen"
-                );
-                return;
-            }
-        }
-    }
-
-    /// Removes an entry from the cache and returns it, if it exists.
-    fn remove(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
-        if let Some((_, old_metadata)) = self.lru_queue.remove(&k.location) {
-            self.memory_used -= old_metadata.memory_size();
-            self.cache_hits.remove(&k.location);
-            Some(old_metadata)
-        } else {
-            None
-        }
-    }
-
-    /// Returns the number of entries currently cached.
-    fn len(&self) -> usize {
-        self.lru_queue.len()
-    }
-
-    /// Removes all entries from the cache.
-    fn clear(&mut self) {
-        self.lru_queue.clear();
-        self.memory_used = 0;
-        self.cache_hits.clear();
-    }
-}
-
-/// Default implementation of [`FileMetadataCache`]
-///
-/// Collected file embedded metadata cache.
-///
-/// The metadata for each file is invalidated when the file size or last
-/// modification time have been changed.
-///
-/// # Internal details
-///
-/// The `memory_limit` controls the maximum size of the cache, which uses a
-/// Least Recently Used eviction algorithm. When adding a new entry, if the total
-/// size of the cached entries exceeds `memory_limit`, the least recently used entries
-/// are evicted until the total size is lower than `memory_limit`.
-///
-/// # `Extra` Handling
-///
-/// Users should use the [`Self::get`] and [`Self::put`] methods. The
-/// [`Self::get_with_extra`] and [`Self::put_with_extra`] methods simply call
-/// `get` and `put`, respectively.
-pub struct DefaultFilesMetadataCache {
-    // the state is wrapped in a Mutex to ensure the operations are atomic
-    state: Mutex<DefaultFilesMetadataCacheState>,
-}
-
-impl DefaultFilesMetadataCache {
-    /// Create a new instance of [`DefaultFilesMetadataCache`].
-    ///
-    /// # Arguments
-    /// `memory_limit`:  the maximum size of the cache, in bytes
-    //
-    pub fn new(memory_limit: usize) -> Self {
-        Self {
-            state: Mutex::new(DefaultFilesMetadataCacheState::new(memory_limit)),
-        }
-    }
-
-    /// Returns the size of the cached memory, in bytes.
-    pub fn memory_used(&self) -> usize {
-        let state = self.state.lock().unwrap();
-        state.memory_used
-    }
-}
-
-impl FileMetadataCache for DefaultFilesMetadataCache {
-    fn cache_limit(&self) -> usize {
-        let state = self.state.lock().unwrap();
-        state.memory_limit
-    }
-
-    fn update_cache_limit(&self, limit: usize) {
-        let mut state = self.state.lock().unwrap();
-        state.memory_limit = limit;
-        state.evict_entries();
-    }
-
-    fn list_entries(&self) -> HashMap<Path, FileMetadataCacheEntry> {
-        let state = self.state.lock().unwrap();
-        let mut entries = HashMap::<Path, FileMetadataCacheEntry>::new();
-
-        for (path, (object_meta, metadata)) in state.lru_queue.list_entries() {
-            entries.insert(
-                path.clone(),
-                FileMetadataCacheEntry {
-                    object_meta: object_meta.clone(),
-                    size_bytes: metadata.memory_size(),
-                    hits: *state.cache_hits.get(path).expect("entry must exist"),
-                    extra: metadata.extra_info(),
-                },
-            );
-        }
-
-        entries
-    }
-}
-
-impl CacheAccessor<ObjectMeta, Arc<dyn FileMetadata>> for DefaultFilesMetadataCache {
-    type Extra = ObjectMeta;
-
-    fn get(&self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
-        let mut state = self.state.lock().unwrap();
-        state.get(k)
-    }
-
-    fn get_with_extra(
-        &self,
-        k: &ObjectMeta,
-        _e: &Self::Extra,
-    ) -> Option<Arc<dyn FileMetadata>> {
-        self.get(k)
-    }
-
-    fn put(
-        &self,
-        key: &ObjectMeta,
-        value: Arc<dyn FileMetadata>,
-    ) -> Option<Arc<dyn FileMetadata>> {
-        let mut state = self.state.lock().unwrap();
-        state.put(key.clone(), value)
-    }
-
-    fn put_with_extra(
-        &self,
-        key: &ObjectMeta,
-        value: Arc<dyn FileMetadata>,
-        _e: &Self::Extra,
-    ) -> Option<Arc<dyn FileMetadata>> {
-        self.put(key, value)
-    }
-
-    fn remove(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
-        let mut state = self.state.lock().unwrap();
-        state.remove(k)
-    }
-
-    fn contains_key(&self, k: &ObjectMeta) -> bool {
-        let state = self.state.lock().unwrap();
-        state.contains_key(k)
-    }
-
-    fn len(&self) -> usize {
-        let state = self.state.lock().unwrap();
-        state.len()
-    }
-
-    fn clear(&self) {
-        let mut state = self.state.lock().unwrap();
-        state.clear();
-    }
-
-    fn name(&self) -> String {
-        "DefaultFilesMetadataCache".to_string()
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use std::collections::HashMap;
-    use std::sync::Arc;
-
-    use crate::cache::cache_manager::{
-        FileMetadata, FileMetadataCache, FileMetadataCacheEntry,
-    };
-    use crate::cache::cache_unit::{
-        DefaultFileStatisticsCache, DefaultFilesMetadataCache, DefaultListFilesCache,
-    };
+    use super::*;
     use crate::cache::CacheAccessor;
+    use crate::cache::cache_manager::{FileStatisticsCache, FileStatisticsCacheEntry};
+    use crate::cache::cache_unit::DefaultFileStatisticsCache;
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use chrono::DateTime;
     use datafusion_common::Statistics;
-    use object_store::path::Path;
+    use datafusion_common::stats::Precision;
     use object_store::ObjectMeta;
+    use object_store::path::Path;
 
     #[test]
     fn test_statistics_cache() {
@@ -481,470 +181,24 @@ mod tests {
         assert!(cache.get_with_extra(&meta2.location, &meta2).is_none());
 
         // different file
-        let mut meta2 = meta;
+        let mut meta2 = meta.clone();
         meta2.location = Path::from("test2");
         assert!(cache.get_with_extra(&meta2.location, &meta2).is_none());
-    }
-
-    #[test]
-    fn test_list_file_cache() {
-        let meta = ObjectMeta {
-            location: Path::from("test"),
-            last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00")
-                .unwrap()
-                .into(),
-            size: 1024,
-            e_tag: None,
-            version: None,
-        };
 
-        let cache = DefaultListFilesCache::default();
-        assert!(cache.get(&meta.location).is_none());
-
-        cache.put(&meta.location, vec![meta.clone()].into());
+        // test the list_entries method
+        let entries = cache.list_entries();
         assert_eq!(
-            cache.get(&meta.location).unwrap().first().unwrap().clone(),
-            meta.clone()
-        );
-    }
-
-    pub struct TestFileMetadata {
-        metadata: String,
-    }
-
-    impl FileMetadata for TestFileMetadata {
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        fn memory_size(&self) -> usize {
-            self.metadata.len()
-        }
-
-        fn extra_info(&self) -> HashMap<String, String> {
-            HashMap::from([("extra_info".to_owned(), "abc".to_owned())])
-        }
-    }
-
-    #[test]
-    fn test_default_file_metadata_cache() {
-        let object_meta = ObjectMeta {
-            location: Path::from("test"),
-            last_modified: DateTime::parse_from_rfc3339("2025-07-29T12:12:12+00:00")
-                .unwrap()
-                .into(),
-            size: 1024,
-            e_tag: None,
-            version: None,
-        };
-
-        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
-            metadata: "retrieved_metadata".to_owned(),
-        });
-
-        let mut cache = DefaultFilesMetadataCache::new(1024 * 1024);
-        assert!(cache.get(&object_meta).is_none());
-
-        // put
-        cache.put(&object_meta, Arc::clone(&metadata));
-
-        // get and contains of a valid entry
-        assert!(cache.contains_key(&object_meta));
-        let value = cache.get(&object_meta);
-        assert!(value.is_some());
-        let test_file_metadata = Arc::downcast::<TestFileMetadata>(value.unwrap());
-        assert!(test_file_metadata.is_ok());
-        assert_eq!(test_file_metadata.unwrap().metadata, "retrieved_metadata");
-
-        // file size changed
-        let mut object_meta2 = object_meta.clone();
-        object_meta2.size = 2048;
-        assert!(cache.get(&object_meta2).is_none());
-        assert!(!cache.contains_key(&object_meta2));
-
-        // file last_modified changed
-        let mut object_meta2 = object_meta.clone();
-        object_meta2.last_modified =
-            DateTime::parse_from_rfc3339("2025-07-29T13:13:13+00:00")
-                .unwrap()
-                .into();
-        assert!(cache.get(&object_meta2).is_none());
-        assert!(!cache.contains_key(&object_meta2));
-
-        // different file
-        let mut object_meta2 = object_meta.clone();
-        object_meta2.location = Path::from("test2");
-        assert!(cache.get(&object_meta2).is_none());
-        assert!(!cache.contains_key(&object_meta2));
-
-        // remove
-        cache.remove(&object_meta);
-        assert!(cache.get(&object_meta).is_none());
-        assert!(!cache.contains_key(&object_meta));
-
-        // len and clear
-        cache.put(&object_meta, Arc::clone(&metadata));
-        cache.put(&object_meta2, metadata);
-        assert_eq!(cache.len(), 2);
-        cache.clear();
-        assert_eq!(cache.len(), 0);
-    }
-
-    fn generate_test_metadata_with_size(
-        path: &str,
-        size: usize,
-    ) -> (ObjectMeta, Arc<dyn FileMetadata>) {
-        let object_meta = ObjectMeta {
-            location: Path::from(path),
-            last_modified: chrono::Utc::now(),
-            size: size as u64,
-            e_tag: None,
-            version: None,
-        };
-        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
-            metadata: "a".repeat(size),
-        });
-
-        (object_meta, metadata)
-    }
-
-    #[test]
-    fn test_default_file_metadata_cache_with_limit() {
-        let mut cache = DefaultFilesMetadataCache::new(1000);
-        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
-        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 500);
-        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
-
-        cache.put(&object_meta1, metadata1);
-        cache.put(&object_meta2, metadata2);
-        cache.put(&object_meta3, metadata3);
-
-        // all entries will fit
-        assert_eq!(cache.len(), 3);
-        assert_eq!(cache.memory_used(), 900);
-        assert!(cache.contains_key(&object_meta1));
-        assert!(cache.contains_key(&object_meta2));
-        assert!(cache.contains_key(&object_meta3));
-
-        // add a new entry which will remove the least recently used ("1")
-        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 200);
-        cache.put(&object_meta4, metadata4);
-        assert_eq!(cache.len(), 3);
-        assert_eq!(cache.memory_used(), 1000);
-        assert!(!cache.contains_key(&object_meta1));
-        assert!(cache.contains_key(&object_meta4));
-
-        // get entry "2", which will move it to the top of the queue, and add a new one which will
-        // remove the new least recently used ("3")
-        cache.get(&object_meta2);
-        let (object_meta5, metadata5) = generate_test_metadata_with_size("5", 100);
-        cache.put(&object_meta5, metadata5);
-        assert_eq!(cache.len(), 3);
-        assert_eq!(cache.memory_used(), 800);
-        assert!(!cache.contains_key(&object_meta3));
-        assert!(cache.contains_key(&object_meta5));
-
-        // new entry which will not be able to fit in the 1000 bytes allocated
-        let (object_meta6, metadata6) = generate_test_metadata_with_size("6", 1200);
-        cache.put(&object_meta6, metadata6);
-        assert_eq!(cache.len(), 3);
-        assert_eq!(cache.memory_used(), 800);
-        assert!(!cache.contains_key(&object_meta6));
-
-        // new entry which is able to fit without removing any entry
-        let (object_meta7, metadata7) = generate_test_metadata_with_size("7", 200);
-        cache.put(&object_meta7, metadata7);
-        assert_eq!(cache.len(), 4);
-        assert_eq!(cache.memory_used(), 1000);
-        assert!(cache.contains_key(&object_meta7));
-
-        // new entry which will remove all other entries
-        let (object_meta8, metadata8) = generate_test_metadata_with_size("8", 999);
-        cache.put(&object_meta8, metadata8);
-        assert_eq!(cache.len(), 1);
-        assert_eq!(cache.memory_used(), 999);
-        assert!(cache.contains_key(&object_meta8));
-
-        // when updating an entry, the previous ones are not unnecessarily removed
-        let (object_meta9, metadata9) = generate_test_metadata_with_size("9", 300);
-        let (object_meta10, metadata10) = generate_test_metadata_with_size("10", 200);
-        let (object_meta11_v1, metadata11_v1) =
-            generate_test_metadata_with_size("11", 400);
-        cache.put(&object_meta9, metadata9);
-        cache.put(&object_meta10, metadata10);
-        cache.put(&object_meta11_v1, metadata11_v1);
-        assert_eq!(cache.memory_used(), 900);
-        assert_eq!(cache.len(), 3);
-        let (object_meta11_v2, metadata11_v2) =
-            generate_test_metadata_with_size("11", 500);
-        cache.put(&object_meta11_v2, metadata11_v2);
-        assert_eq!(cache.memory_used(), 1000);
-        assert_eq!(cache.len(), 3);
-        assert!(cache.contains_key(&object_meta9));
-        assert!(cache.contains_key(&object_meta10));
-        assert!(cache.contains_key(&object_meta11_v2));
-        assert!(!cache.contains_key(&object_meta11_v1));
-
-        // when updating an entry that now exceeds the limit, the LRU ("9") needs to be removed
-        let (object_meta11_v3, metadata11_v3) =
-            generate_test_metadata_with_size("11", 501);
-        cache.put(&object_meta11_v3, metadata11_v3);
-        assert_eq!(cache.memory_used(), 701);
-        assert_eq!(cache.len(), 2);
-        assert!(cache.contains_key(&object_meta10));
-        assert!(cache.contains_key(&object_meta11_v3));
-        assert!(!cache.contains_key(&object_meta11_v2));
-
-        // manually removing an entry that is not the LRU
-        cache.remove(&object_meta11_v3);
-        assert_eq!(cache.len(), 1);
-        assert_eq!(cache.memory_used(), 200);
-        assert!(cache.contains_key(&object_meta10));
-        assert!(!cache.contains_key(&object_meta11_v3));
-
-        // clear
-        cache.clear();
-        assert_eq!(cache.len(), 0);
-        assert_eq!(cache.memory_used(), 0);
-
-        // resizing the cache should clear the extra entries
-        let (object_meta12, metadata12) = generate_test_metadata_with_size("12", 300);
-        let (object_meta13, metadata13) = generate_test_metadata_with_size("13", 200);
-        let (object_meta14, metadata14) = generate_test_metadata_with_size("14", 500);
-        cache.put(&object_meta12, metadata12);
-        cache.put(&object_meta13, metadata13);
-        cache.put(&object_meta14, metadata14);
-        assert_eq!(cache.len(), 3);
-        assert_eq!(cache.memory_used(), 1000);
-        cache.update_cache_limit(600);
-        assert_eq!(cache.len(), 1);
-        assert_eq!(cache.memory_used(), 500);
-        assert!(!cache.contains_key(&object_meta12));
-        assert!(!cache.contains_key(&object_meta13));
-        assert!(cache.contains_key(&object_meta14));
-    }
-
-    #[test]
-    fn test_default_file_metadata_cache_entries_info() {
-        let mut cache = DefaultFilesMetadataCache::new(1000);
-        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
-        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 200);
-        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
-
-        // initial entries, all will have hits = 0
-        cache.put(&object_meta1, metadata1);
-        cache.put(&object_meta2, metadata2);
-        cache.put(&object_meta3, metadata3);
-        assert_eq!(
-            cache.list_entries(),
-            HashMap::from([
-                (
-                    Path::from("1"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta1.clone(),
-                        size_bytes: 100,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("2"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta2.clone(),
-                        size_bytes: 200,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("3"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta3.clone(),
-                        size_bytes: 300,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                )
-            ])
-        );
-
-        // new hit on "1"
-        cache.get(&object_meta1);
-        assert_eq!(
-            cache.list_entries(),
-            HashMap::from([
-                (
-                    Path::from("1"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta1.clone(),
-                        size_bytes: 100,
-                        hits: 1,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("2"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta2.clone(),
-                        size_bytes: 200,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("3"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta3.clone(),
-                        size_bytes: 300,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                )
-            ])
-        );
-
-        // new entry, will evict "2"
-        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 600);
-        cache.put(&object_meta4, metadata4);
-        assert_eq!(
-            cache.list_entries(),
-            HashMap::from([
-                (
-                    Path::from("1"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta1.clone(),
-                        size_bytes: 100,
-                        hits: 1,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("3"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta3.clone(),
-                        size_bytes: 300,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("4"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta4.clone(),
-                        size_bytes: 600,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                )
-            ])
-        );
-
-        // replace entry "1"
-        let (object_meta1_new, metadata1_new) = generate_test_metadata_with_size("1", 50);
-        cache.put(&object_meta1_new, metadata1_new);
-        assert_eq!(
-            cache.list_entries(),
-            HashMap::from([
-                (
-                    Path::from("1"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta1_new.clone(),
-                        size_bytes: 50,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("3"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta3.clone(),
-                        size_bytes: 300,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("4"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta4.clone(),
-                        size_bytes: 600,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                )
-            ])
-        );
-
-        // remove entry "4"
-        cache.remove(&object_meta4);
-        assert_eq!(
-            cache.list_entries(),
-            HashMap::from([
-                (
-                    Path::from("1"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta1_new.clone(),
-                        size_bytes: 50,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                ),
-                (
-                    Path::from("3"),
-                    FileMetadataCacheEntry {
-                        object_meta: object_meta3.clone(),
-                        size_bytes: 300,
-                        hits: 0,
-                        extra: HashMap::from([(
-                            "extra_info".to_owned(),
-                            "abc".to_owned()
-                        )]),
-                    }
-                )
-            ])
+            entries,
+            HashMap::from([(
+                Path::from("test"),
+                FileStatisticsCacheEntry {
+                    object_meta: meta.clone(),
+                    num_rows: Precision::Absent,
+                    num_columns: 1,
+                    table_size_bytes: Precision::Absent,
+                    statistics_size_bytes: 0,
+                }
+            )])
         );
-
-        // clear
-        cache.clear();
-        assert_eq!(cache.list_entries(), HashMap::from([]));
     }
 }
diff --git a/datafusion/execution/src/cache/file_metadata_cache.rs b/datafusion/execution/src/cache/file_metadata_cache.rs
new file mode 100644
index 0000000000000..c7a24dd878e4f
--- /dev/null
+++ b/datafusion/execution/src/cache/file_metadata_cache.rs
@@ -0,0 +1,737 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    collections::HashMap,
+    sync::{Arc, Mutex},
+};
+
+use object_store::{ObjectMeta, path::Path};
+
+use crate::cache::{
+    CacheAccessor,
+    cache_manager::{FileMetadata, FileMetadataCache, FileMetadataCacheEntry},
+    lru_queue::LruQueue,
+};
+
+/// Handles the inner state of the [`DefaultFilesMetadataCache`] struct.
+struct DefaultFilesMetadataCacheState {
+    lru_queue: LruQueue<Path, (ObjectMeta, Arc<dyn FileMetadata>)>,
+    memory_limit: usize,
+    memory_used: usize,
+    cache_hits: HashMap<Path, usize>,
+}
+
+impl DefaultFilesMetadataCacheState {
+    fn new(memory_limit: usize) -> Self {
+        Self {
+            lru_queue: LruQueue::new(),
+            memory_limit,
+            memory_used: 0,
+            cache_hits: HashMap::new(),
+        }
+    }
+
+    /// Returns the respective entry from the cache, if it exists and the `size` and `last_modified`
+    /// properties from [`ObjectMeta`] match.
+    /// If the entry exists, it becomes the most recently used.
+    fn get(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
+        self.lru_queue
+            .get(&k.location)
+            .map(|(object_meta, metadata)| {
+                if object_meta.size != k.size
+                    || object_meta.last_modified != k.last_modified
+                {
+                    None
+                } else {
+                    *self.cache_hits.entry(k.location.clone()).or_insert(0) += 1;
+                    Some(Arc::clone(metadata))
+                }
+            })
+            .unwrap_or(None)
+    }
+
+    /// Checks if the metadata is currently cached (entry exists and the `size` and `last_modified`
+    /// properties of [`ObjectMeta`] match).
+    /// The LRU queue is not updated.
+    fn contains_key(&self, k: &ObjectMeta) -> bool {
+        self.lru_queue
+            .peek(&k.location)
+            .map(|(object_meta, _)| {
+                object_meta.size == k.size && object_meta.last_modified == k.last_modified
+            })
+            .unwrap_or(false)
+    }
+
+    /// Adds a new key-value pair to cache, meaning LRU entries might be evicted if required.
+    /// If the key is already in the cache, the previous metadata is returned.
+    /// If the size of the metadata is greater than the `memory_limit`, the value is not inserted.
+    fn put(
+        &mut self,
+        key: ObjectMeta,
+        value: Arc<dyn FileMetadata>,
+    ) -> Option<Arc<dyn FileMetadata>> {
+        let value_size = value.memory_size();
+
+        // no point in trying to add this value to the cache if it cannot fit entirely
+        if value_size > self.memory_limit {
+            return None;
+        }
+
+        self.cache_hits.insert(key.location.clone(), 0);
+        // if the key is already in the cache, the old value is removed
+        let old_value = self.lru_queue.put(key.location.clone(), (key, value));
+        self.memory_used += value_size;
+        if let Some((_, ref old_metadata)) = old_value {
+            self.memory_used -= old_metadata.memory_size();
+        }
+
+        self.evict_entries();
+
+        old_value.map(|v| v.1)
+    }
+
+    /// Evicts entries from the LRU cache until `memory_used` is lower than `memory_limit`.
+    fn evict_entries(&mut self) {
+        while self.memory_used > self.memory_limit {
+            if let Some(removed) = self.lru_queue.pop() {
+                let metadata: Arc<dyn FileMetadata> = removed.1.1;
+                self.memory_used -= metadata.memory_size();
+            } else {
+                // cache is empty while memory_used > memory_limit, cannot happen
+                debug_assert!(
+                    false,
+                    "cache is empty while memory_used > memory_limit, cannot happen"
+                );
+                return;
+            }
+        }
+    }
+
+    /// Removes an entry from the cache and returns it, if it exists.
+    fn remove(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
+        if let Some((_, old_metadata)) = self.lru_queue.remove(&k.location) {
+            self.memory_used -= old_metadata.memory_size();
+            self.cache_hits.remove(&k.location);
+            Some(old_metadata)
+        } else {
+            None
+        }
+    }
+
+    /// Returns the number of entries currently cached.
+    fn len(&self) -> usize {
+        self.lru_queue.len()
+    }
+
+    /// Removes all entries from the cache.
+    fn clear(&mut self) {
+        self.lru_queue.clear();
+        self.memory_used = 0;
+        self.cache_hits.clear();
+    }
+}
+
+/// Default implementation of [`FileMetadataCache`]
+///
+/// Collected file embedded metadata cache.
+///
+/// The metadata for each file is invalidated when the file size or last
+/// modification time have been changed.
+///
+/// # Internal details
+///
+/// The `memory_limit` controls the maximum size of the cache, which uses a
+/// Least Recently Used eviction algorithm. When adding a new entry, if the total
+/// size of the cached entries exceeds `memory_limit`, the least recently used entries
+/// are evicted until the total size is lower than `memory_limit`.
+///
+/// # `Extra` Handling
+///
+/// Users should use the [`Self::get`] and [`Self::put`] methods. The
+/// [`Self::get_with_extra`] and [`Self::put_with_extra`] methods simply call
+/// `get` and `put`, respectively.
+pub struct DefaultFilesMetadataCache {
+    // the state is wrapped in a Mutex to ensure the operations are atomic
+    state: Mutex<DefaultFilesMetadataCacheState>,
+}
+
+impl DefaultFilesMetadataCache {
+    /// Create a new instance of [`DefaultFilesMetadataCache`].
+    ///
+    /// # Arguments
+    /// `memory_limit`:  the maximum size of the cache, in bytes
+    //
+    pub fn new(memory_limit: usize) -> Self {
+        Self {
+            state: Mutex::new(DefaultFilesMetadataCacheState::new(memory_limit)),
+        }
+    }
+
+    /// Returns the size of the cached memory, in bytes.
+    pub fn memory_used(&self) -> usize {
+        let state = self.state.lock().unwrap();
+        state.memory_used
+    }
+}
+
+impl FileMetadataCache for DefaultFilesMetadataCache {
+    fn cache_limit(&self) -> usize {
+        let state = self.state.lock().unwrap();
+        state.memory_limit
+    }
+
+    fn update_cache_limit(&self, limit: usize) {
+        let mut state = self.state.lock().unwrap();
+        state.memory_limit = limit;
+        state.evict_entries();
+    }
+
+    fn list_entries(&self) -> HashMap<Path, FileMetadataCacheEntry> {
+        let state = self.state.lock().unwrap();
+        let mut entries = HashMap::<Path, FileMetadataCacheEntry>::new();
+
+        for (path, (object_meta, metadata)) in state.lru_queue.list_entries() {
+            entries.insert(
+                path.clone(),
+                FileMetadataCacheEntry {
+                    object_meta: object_meta.clone(),
+                    size_bytes: metadata.memory_size(),
+                    hits: *state.cache_hits.get(path).expect("entry must exist"),
+                    extra: metadata.extra_info(),
+                },
+            );
+        }
+
+        entries
+    }
+}
+
+impl CacheAccessor<ObjectMeta, Arc<dyn FileMetadata>> for DefaultFilesMetadataCache {
+    type Extra = ObjectMeta;
+
+    fn get(&self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
+        let mut state = self.state.lock().unwrap();
+        state.get(k)
+    }
+
+    fn get_with_extra(
+        &self,
+        k: &ObjectMeta,
+        _e: &Self::Extra,
+    ) -> Option<Arc<dyn FileMetadata>> {
+        self.get(k)
+    }
+
+    fn put(
+        &self,
+        key: &ObjectMeta,
+        value: Arc<dyn FileMetadata>,
+    ) -> Option<Arc<dyn FileMetadata>> {
+        let mut state = self.state.lock().unwrap();
+        state.put(key.clone(), value)
+    }
+
+    fn put_with_extra(
+        &self,
+        key: &ObjectMeta,
+        value: Arc<dyn FileMetadata>,
+        _e: &Self::Extra,
+    ) -> Option<Arc<dyn FileMetadata>> {
+        self.put(key, value)
+    }
+
+    fn remove(&self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
+        let mut state = self.state.lock().unwrap();
+        state.remove(k)
+    }
+
+    fn contains_key(&self, k: &ObjectMeta) -> bool {
+        let state = self.state.lock().unwrap();
+        state.contains_key(k)
+    }
+
+    fn len(&self) -> usize {
+        let state = self.state.lock().unwrap();
+        state.len()
+    }
+
+    fn clear(&self) {
+        let mut state = self.state.lock().unwrap();
+        state.clear();
+    }
+
+    fn name(&self) -> String {
+        "DefaultFilesMetadataCache".to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use crate::cache::CacheAccessor;
+    use crate::cache::cache_manager::{
+        FileMetadata, FileMetadataCache, FileMetadataCacheEntry,
+    };
+    use crate::cache::file_metadata_cache::DefaultFilesMetadataCache;
+    use object_store::ObjectMeta;
+    use object_store::path::Path;
+
+    pub struct TestFileMetadata {
+        metadata: String,
+    }
+
+    impl FileMetadata for TestFileMetadata {
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
+
+        fn memory_size(&self) -> usize {
+            self.metadata.len()
+        }
+
+        fn extra_info(&self) -> HashMap<String, String> {
+            HashMap::from([("extra_info".to_owned(), "abc".to_owned())])
+        }
+    }
+
+    #[test]
+    fn test_default_file_metadata_cache() {
+        let object_meta = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: chrono::DateTime::parse_from_rfc3339(
+                "2025-07-29T12:12:12+00:00",
+            )
+            .unwrap()
+            .into(),
+            size: 1024,
+            e_tag: None,
+            version: None,
+        };
+
+        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
+            metadata: "retrieved_metadata".to_owned(),
+        });
+
+        let cache = DefaultFilesMetadataCache::new(1024 * 1024);
+        assert!(cache.get(&object_meta).is_none());
+
+        // put
+        cache.put(&object_meta, Arc::clone(&metadata));
+
+        // get and contains of a valid entry
+        assert!(cache.contains_key(&object_meta));
+        let value = cache.get(&object_meta);
+        assert!(value.is_some());
+        let test_file_metadata = Arc::downcast::<TestFileMetadata>(value.unwrap());
+        assert!(test_file_metadata.is_ok());
+        assert_eq!(test_file_metadata.unwrap().metadata, "retrieved_metadata");
+
+        // file size changed
+        let mut object_meta2 = object_meta.clone();
+        object_meta2.size = 2048;
+        assert!(cache.get(&object_meta2).is_none());
+        assert!(!cache.contains_key(&object_meta2));
+
+        // file last_modified changed
+        let mut object_meta2 = object_meta.clone();
+        object_meta2.last_modified =
+            chrono::DateTime::parse_from_rfc3339("2025-07-29T13:13:13+00:00")
+                .unwrap()
+                .into();
+        assert!(cache.get(&object_meta2).is_none());
+        assert!(!cache.contains_key(&object_meta2));
+
+        // different file
+        let mut object_meta2 = object_meta.clone();
+        object_meta2.location = Path::from("test2");
+        assert!(cache.get(&object_meta2).is_none());
+        assert!(!cache.contains_key(&object_meta2));
+
+        // remove
+        cache.remove(&object_meta);
+        assert!(cache.get(&object_meta).is_none());
+        assert!(!cache.contains_key(&object_meta));
+
+        // len and clear
+        cache.put(&object_meta, Arc::clone(&metadata));
+        cache.put(&object_meta2, metadata);
+        assert_eq!(cache.len(), 2);
+        cache.clear();
+        assert_eq!(cache.len(), 0);
+    }
+
+    fn generate_test_metadata_with_size(
+        path: &str,
+        size: usize,
+    ) -> (ObjectMeta, Arc<dyn FileMetadata>) {
+        let object_meta = ObjectMeta {
+            location: Path::from(path),
+            last_modified: chrono::Utc::now(),
+            size: size as u64,
+            e_tag: None,
+            version: None,
+        };
+        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
+            metadata: "a".repeat(size),
+        });
+
+        (object_meta, metadata)
+    }
+
+    #[test]
+    fn test_default_file_metadata_cache_with_limit() {
+        let cache = DefaultFilesMetadataCache::new(1000);
+        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
+        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 500);
+        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
+
+        cache.put(&object_meta1, metadata1);
+        cache.put(&object_meta2, metadata2);
+        cache.put(&object_meta3, metadata3);
+
+        // all entries will fit
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.memory_used(), 900);
+        assert!(cache.contains_key(&object_meta1));
+        assert!(cache.contains_key(&object_meta2));
+        assert!(cache.contains_key(&object_meta3));
+
+        // add a new entry which will remove the least recently used ("1")
+        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 200);
+        cache.put(&object_meta4, metadata4);
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.memory_used(), 1000);
+        assert!(!cache.contains_key(&object_meta1));
+        assert!(cache.contains_key(&object_meta4));
+
+        // get entry "2", which will move it to the top of the queue, and add a new one which will
+        // remove the new least recently used ("3")
+        cache.get(&object_meta2);
+        let (object_meta5, metadata5) = generate_test_metadata_with_size("5", 100);
+        cache.put(&object_meta5, metadata5);
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.memory_used(), 800);
+        assert!(!cache.contains_key(&object_meta3));
+        assert!(cache.contains_key(&object_meta5));
+
+        // new entry which will not be able to fit in the 1000 bytes allocated
+        let (object_meta6, metadata6) = generate_test_metadata_with_size("6", 1200);
+        cache.put(&object_meta6, metadata6);
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.memory_used(), 800);
+        assert!(!cache.contains_key(&object_meta6));
+
+        // new entry which is able to fit without removing any entry
+        let (object_meta7, metadata7) = generate_test_metadata_with_size("7", 200);
+        cache.put(&object_meta7, metadata7);
+        assert_eq!(cache.len(), 4);
+        assert_eq!(cache.memory_used(), 1000);
+        assert!(cache.contains_key(&object_meta7));
+
+        // new entry which will remove all other entries
+        let (object_meta8, metadata8) = generate_test_metadata_with_size("8", 999);
+        cache.put(&object_meta8, metadata8);
+        assert_eq!(cache.len(), 1);
+        assert_eq!(cache.memory_used(), 999);
+        assert!(cache.contains_key(&object_meta8));
+
+        // when updating an entry, the previous ones are not unnecessarily removed
+        let (object_meta9, metadata9) = generate_test_metadata_with_size("9", 300);
+        let (object_meta10, metadata10) = generate_test_metadata_with_size("10", 200);
+        let (object_meta11_v1, metadata11_v1) =
+            generate_test_metadata_with_size("11", 400);
+        cache.put(&object_meta9, metadata9);
+        cache.put(&object_meta10, metadata10);
+        cache.put(&object_meta11_v1, metadata11_v1);
+        assert_eq!(cache.memory_used(), 900);
+        assert_eq!(cache.len(), 3);
+        let (object_meta11_v2, metadata11_v2) =
+            generate_test_metadata_with_size("11", 500);
+        cache.put(&object_meta11_v2, metadata11_v2);
+        assert_eq!(cache.memory_used(), 1000);
+        assert_eq!(cache.len(), 3);
+        assert!(cache.contains_key(&object_meta9));
+        assert!(cache.contains_key(&object_meta10));
+        assert!(cache.contains_key(&object_meta11_v2));
+        assert!(!cache.contains_key(&object_meta11_v1));
+
+        // when updating an entry that now exceeds the limit, the LRU ("9") needs to be removed
+        let (object_meta11_v3, metadata11_v3) =
+            generate_test_metadata_with_size("11", 501);
+        cache.put(&object_meta11_v3, metadata11_v3);
+        assert_eq!(cache.memory_used(), 701);
+        assert_eq!(cache.len(), 2);
+        assert!(cache.contains_key(&object_meta10));
+        assert!(cache.contains_key(&object_meta11_v3));
+        assert!(!cache.contains_key(&object_meta11_v2));
+
+        // manually removing an entry that is not the LRU
+        cache.remove(&object_meta11_v3);
+        assert_eq!(cache.len(), 1);
+        assert_eq!(cache.memory_used(), 200);
+        assert!(cache.contains_key(&object_meta10));
+        assert!(!cache.contains_key(&object_meta11_v3));
+
+        // clear
+        cache.clear();
+        assert_eq!(cache.len(), 0);
+        assert_eq!(cache.memory_used(), 0);
+
+        // resizing the cache should clear the extra entries
+        let (object_meta12, metadata12) = generate_test_metadata_with_size("12", 300);
+        let (object_meta13, metadata13) = generate_test_metadata_with_size("13", 200);
+        let (object_meta14, metadata14) = generate_test_metadata_with_size("14", 500);
+        cache.put(&object_meta12, metadata12);
+        cache.put(&object_meta13, metadata13);
+        cache.put(&object_meta14, metadata14);
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.memory_used(), 1000);
+        cache.update_cache_limit(600);
+        assert_eq!(cache.len(), 1);
+        assert_eq!(cache.memory_used(), 500);
+        assert!(!cache.contains_key(&object_meta12));
+        assert!(!cache.contains_key(&object_meta13));
+        assert!(cache.contains_key(&object_meta14));
+    }
+
+    #[test]
+    fn test_default_file_metadata_cache_entries_info() {
+        let cache = DefaultFilesMetadataCache::new(1000);
+        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
+        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 200);
+        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
+
+        // initial entries, all will have hits = 0
+        cache.put(&object_meta1, metadata1);
+        cache.put(&object_meta2, metadata2);
+        cache.put(&object_meta3, metadata3);
+        assert_eq!(
+            cache.list_entries(),
+            HashMap::from([
+                (
+                    Path::from("1"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta1.clone(),
+                        size_bytes: 100,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("2"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta2.clone(),
+                        size_bytes: 200,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("3"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta3.clone(),
+                        size_bytes: 300,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                )
+            ])
+        );
+
+        // new hit on "1"
+        cache.get(&object_meta1);
+        assert_eq!(
+            cache.list_entries(),
+            HashMap::from([
+                (
+                    Path::from("1"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta1.clone(),
+                        size_bytes: 100,
+                        hits: 1,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("2"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta2.clone(),
+                        size_bytes: 200,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("3"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta3.clone(),
+                        size_bytes: 300,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                )
+            ])
+        );
+
+        // new entry, will evict "2"
+        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 600);
+        cache.put(&object_meta4, metadata4);
+        assert_eq!(
+            cache.list_entries(),
+            HashMap::from([
+                (
+                    Path::from("1"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta1.clone(),
+                        size_bytes: 100,
+                        hits: 1,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("3"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta3.clone(),
+                        size_bytes: 300,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("4"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta4.clone(),
+                        size_bytes: 600,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                )
+            ])
+        );
+
+        // replace entry "1"
+        let (object_meta1_new, metadata1_new) = generate_test_metadata_with_size("1", 50);
+        cache.put(&object_meta1_new, metadata1_new);
+        assert_eq!(
+            cache.list_entries(),
+            HashMap::from([
+                (
+                    Path::from("1"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta1_new.clone(),
+                        size_bytes: 50,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("3"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta3.clone(),
+                        size_bytes: 300,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("4"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta4.clone(),
+                        size_bytes: 600,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                )
+            ])
+        );
+
+        // remove entry "4"
+        cache.remove(&object_meta4);
+        assert_eq!(
+            cache.list_entries(),
+            HashMap::from([
+                (
+                    Path::from("1"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta1_new.clone(),
+                        size_bytes: 50,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                ),
+                (
+                    Path::from("3"),
+                    FileMetadataCacheEntry {
+                        object_meta: object_meta3.clone(),
+                        size_bytes: 300,
+                        hits: 0,
+                        extra: HashMap::from([(
+                            "extra_info".to_owned(),
+                            "abc".to_owned()
+                        )]),
+                    }
+                )
+            ])
+        );
+
+        // clear
+        cache.clear();
+        assert_eq!(cache.list_entries(), HashMap::from([]));
+    }
+}
diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs
new file mode 100644
index 0000000000000..661bc47b5468a
--- /dev/null
+++ b/datafusion/execution/src/cache/list_files_cache.rs
@@ -0,0 +1,1072 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::mem::size_of;
+use std::{
+    sync::{Arc, Mutex},
+    time::Duration,
+};
+
+use datafusion_common::instant::Instant;
+use object_store::{ObjectMeta, path::Path};
+
+use crate::cache::{CacheAccessor, cache_manager::ListFilesCache, lru_queue::LruQueue};
+
+pub trait TimeProvider: Send + Sync + 'static {
+    fn now(&self) -> Instant;
+}
+
+#[derive(Debug, Default)]
+pub struct SystemTimeProvider;
+
+impl TimeProvider for SystemTimeProvider {
+    fn now(&self) -> Instant {
+        Instant::now()
+    }
+}
+
+/// Default implementation of [`ListFilesCache`]
+///
+/// Caches file metadata for file listing operations.
+///
+/// # Internal details
+///
+/// The `memory_limit` parameter controls the maximum size of the cache, which uses a Least
+/// Recently Used eviction algorithm. When adding a new entry, if the total number of entries in
+/// the cache exceeds `memory_limit`, the least recently used entries are evicted until the total
+/// size is lower than the `memory_limit`.
+///
+/// # `Extra` Handling
+///
+/// Users should use the [`Self::get`] and [`Self::put`] methods. The
+/// [`Self::get_with_extra`] and [`Self::put_with_extra`] methods simply call
+/// `get` and `put`, respectively.
+pub struct DefaultListFilesCache {
+    state: Mutex<DefaultListFilesCacheState>,
+    time_provider: Arc<dyn TimeProvider>,
+}
+
+impl Default for DefaultListFilesCache {
+    fn default() -> Self {
+        Self::new(DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, None)
+    }
+}
+
+impl DefaultListFilesCache {
+    /// Creates a new instance of [`DefaultListFilesCache`].
+    ///
+    /// # Arguments
+    /// * `memory_limit` - The maximum size of the cache, in bytes.
+    /// * `ttl` - The TTL (time-to-live) of entries in the cache.
+    pub fn new(memory_limit: usize, ttl: Option<Duration>) -> Self {
+        Self {
+            state: Mutex::new(DefaultListFilesCacheState::new(memory_limit, ttl)),
+            time_provider: Arc::new(SystemTimeProvider),
+        }
+    }
+
+    #[cfg(test)]
+    pub(crate) fn with_time_provider(mut self, provider: Arc<dyn TimeProvider>) -> Self {
+        self.time_provider = provider;
+        self
+    }
+
+    /// Returns the cache's memory limit in bytes.
+    pub fn cache_limit(&self) -> usize {
+        self.state.lock().unwrap().memory_limit
+    }
+
+    /// Updates the cache with a new memory limit in bytes.
+    pub fn update_cache_limit(&self, limit: usize) {
+        let mut state = self.state.lock().unwrap();
+        state.memory_limit = limit;
+        state.evict_entries();
+    }
+
+    /// Returns the TTL (time-to-live) applied to cache entries.
+    pub fn cache_ttl(&self) -> Option<Duration> {
+        self.state.lock().unwrap().ttl
+    }
+}
+
+struct ListFilesEntry {
+    metas: Arc<Vec<ObjectMeta>>,
+    size_bytes: usize,
+    expires: Option<Instant>,
+}
+
+impl ListFilesEntry {
+    fn try_new(
+        metas: Arc<Vec<ObjectMeta>>,
+        ttl: Option<Duration>,
+        now: Instant,
+    ) -> Option<Self> {
+        let size_bytes = (metas.capacity() * size_of::<ObjectMeta>())
+            + metas.iter().map(meta_heap_bytes).reduce(|acc, b| acc + b)?;
+
+        Some(Self {
+            metas,
+            size_bytes,
+            expires: ttl.map(|t| now + t),
+        })
+    }
+}
+
+/// Calculates the number of bytes an [`ObjectMeta`] occupies in the heap.
+fn meta_heap_bytes(object_meta: &ObjectMeta) -> usize {
+    let mut size = object_meta.location.as_ref().len();
+
+    if let Some(e) = &object_meta.e_tag {
+        size += e.len();
+    }
+    if let Some(v) = &object_meta.version {
+        size += v.len();
+    }
+
+    size
+}
+
+/// The default memory limit for the [`DefaultListFilesCache`]
+pub const DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT: usize = 1024 * 1024; // 1MiB
+
+/// The default cache TTL for the [`DefaultListFilesCache`]
+pub const DEFAULT_LIST_FILES_CACHE_TTL: Option<Duration> = None; // Infinite
+
+/// Handles the inner state of the [`DefaultListFilesCache`] struct.
+pub struct DefaultListFilesCacheState {
+    lru_queue: LruQueue<Path, ListFilesEntry>,
+    memory_limit: usize,
+    memory_used: usize,
+    ttl: Option<Duration>,
+}
+
+impl Default for DefaultListFilesCacheState {
+    fn default() -> Self {
+        Self {
+            lru_queue: LruQueue::new(),
+            memory_limit: DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT,
+            memory_used: 0,
+            ttl: DEFAULT_LIST_FILES_CACHE_TTL,
+        }
+    }
+}
+
+impl DefaultListFilesCacheState {
+    fn new(memory_limit: usize, ttl: Option<Duration>) -> Self {
+        Self {
+            lru_queue: LruQueue::new(),
+            memory_limit,
+            memory_used: 0,
+            ttl,
+        }
+    }
+
+    /// Performs a prefix-aware cache lookup.
+    ///
+    /// # Arguments
+    /// * `table_base` - The table's base path (the cache key)
+    /// * `prefix` - Optional prefix filter relative to the table base path
+    /// * `now` - Current time for expiration checking
+    ///
+    /// # Behavior
+    /// - Fetches the cache entry for `table_base`
+    /// - If `prefix` is `Some`, filters results to only files matching `table_base/prefix`
+    /// - Returns the (potentially filtered) results
+    ///
+    /// # Example
+    /// ```text
+    /// get_with_prefix("my_table", Some("a=1"), now)
+    ///   → Fetch cache entry for "my_table"
+    ///   → Filter to files matching "my_table/a=1/*"
+    ///   → Return filtered results
+    /// ```
+    fn get_with_prefix(
+        &mut self,
+        table_base: &Path,
+        prefix: Option<&Path>,
+        now: Instant,
+    ) -> Option<Arc<Vec<ObjectMeta>>> {
+        let entry = self.lru_queue.get(table_base)?;
+
+        // Check expiration
+        if let Some(exp) = entry.expires
+            && now > exp
+        {
+            self.remove(table_base);
+            return None;
+        }
+
+        // Early return if no prefix filter - return all files
+        let Some(prefix) = prefix else {
+            return Some(Arc::clone(&entry.metas));
+        };
+
+        // Build the full prefix path: table_base/prefix
+        let mut parts: Vec<_> = table_base.parts().collect();
+        parts.extend(prefix.parts());
+        let full_prefix = Path::from_iter(parts);
+        let full_prefix_str = full_prefix.as_ref();
+
+        // Filter files to only those matching the prefix
+        let filtered: Vec<ObjectMeta> = entry
+            .metas
+            .iter()
+            .filter(|meta| meta.location.as_ref().starts_with(full_prefix_str))
+            .cloned()
+            .collect();
+
+        if filtered.is_empty() {
+            None
+        } else {
+            Some(Arc::new(filtered))
+        }
+    }
+
+    /// Checks if the respective entry is currently cached.
+    ///
+    /// If the entry has expired by `now` it is removed from the cache.
+    ///
+    /// The LRU queue is not updated.
+    fn contains_key(&mut self, k: &Path, now: Instant) -> bool {
+        let Some(entry) = self.lru_queue.peek(k) else {
+            return false;
+        };
+
+        match entry.expires {
+            Some(exp) if now > exp => {
+                self.remove(k);
+                false
+            }
+            _ => true,
+        }
+    }
+
+    /// Adds a new key-value pair to cache expiring at `now` + the TTL.
+    ///
+    /// This means that LRU entries might be evicted if required.
+    /// If the key is already in the cache, the previous entry is returned.
+    /// If the size of the entry is greater than the `memory_limit`, the value is not inserted.
+    fn put(
+        &mut self,
+        key: &Path,
+        value: Arc<Vec<ObjectMeta>>,
+        now: Instant,
+    ) -> Option<Arc<Vec<ObjectMeta>>> {
+        let entry = ListFilesEntry::try_new(value, self.ttl, now)?;
+        let entry_size = entry.size_bytes;
+
+        // no point in trying to add this value to the cache if it cannot fit entirely
+        if entry_size > self.memory_limit {
+            return None;
+        }
+
+        // if the key is already in the cache, the old value is removed
+        let old_value = self.lru_queue.put(key.clone(), entry);
+        self.memory_used += entry_size;
+
+        if let Some(entry) = &old_value {
+            self.memory_used -= entry.size_bytes;
+        }
+
+        self.evict_entries();
+
+        old_value.map(|v| v.metas)
+    }
+
+    /// Evicts entries from the LRU cache until `memory_used` is lower than `memory_limit`.
+    fn evict_entries(&mut self) {
+        while self.memory_used > self.memory_limit {
+            if let Some(removed) = self.lru_queue.pop() {
+                self.memory_used -= removed.1.size_bytes;
+            } else {
+                // cache is empty while memory_used > memory_limit, cannot happen
+                debug_assert!(
+                    false,
+                    "cache is empty while memory_used > memory_limit, cannot happen"
+                );
+                return;
+            }
+        }
+    }
+
+    /// Removes an entry from the cache and returns it, if it exists.
+    fn remove(&mut self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
+        if let Some(entry) = self.lru_queue.remove(k) {
+            self.memory_used -= entry.size_bytes;
+            Some(entry.metas)
+        } else {
+            None
+        }
+    }
+
+    /// Returns the number of entries currently cached.
+    fn len(&self) -> usize {
+        self.lru_queue.len()
+    }
+
+    /// Removes all entries from the cache.
+    fn clear(&mut self) {
+        self.lru_queue.clear();
+        self.memory_used = 0;
+    }
+}
+
+impl ListFilesCache for DefaultListFilesCache {
+    fn cache_limit(&self) -> usize {
+        let state = self.state.lock().unwrap();
+        state.memory_limit
+    }
+
+    fn cache_ttl(&self) -> Option<Duration> {
+        let state = self.state.lock().unwrap();
+        state.ttl
+    }
+
+    fn update_cache_limit(&self, limit: usize) {
+        let mut state = self.state.lock().unwrap();
+        state.memory_limit = limit;
+        state.evict_entries();
+    }
+
+    fn update_cache_ttl(&self, ttl: Option<Duration>) {
+        let mut state = self.state.lock().unwrap();
+        state.ttl = ttl;
+        state.evict_entries();
+    }
+}
+
+impl CacheAccessor<Path, Arc<Vec<ObjectMeta>>> for DefaultListFilesCache {
+    type Extra = Option<Path>;
+
+    /// Gets all files for the given table base path.
+    ///
+    /// This is equivalent to calling `get_with_extra(k, &None)`.
+    fn get(&self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
+        self.get_with_extra(k, &None)
+    }
+
+    /// Performs a prefix-aware cache lookup.
+    ///
+    /// # Arguments
+    /// * `table_base` - The table's base path (the cache key)
+    /// * `prefix` - Optional prefix filter (relative to table base) for partition filtering
+    ///
+    /// # Behavior
+    /// - Fetches the cache entry for `table_base`
+    /// - If `prefix` is `Some`, filters results to only files matching `table_base/prefix`
+    /// - Returns the (potentially filtered) results
+    ///
+    /// This enables efficient partition pruning - a single cached listing of the full table
+    /// can serve queries for any partition subset without additional storage calls.
+    fn get_with_extra(
+        &self,
+        table_base: &Path,
+        prefix: &Self::Extra,
+    ) -> Option<Arc<Vec<ObjectMeta>>> {
+        let mut state = self.state.lock().unwrap();
+        let now = self.time_provider.now();
+        state.get_with_prefix(table_base, prefix.as_ref(), now)
+    }
+
+    fn put(
+        &self,
+        key: &Path,
+        value: Arc<Vec<ObjectMeta>>,
+    ) -> Option<Arc<Vec<ObjectMeta>>> {
+        let mut state = self.state.lock().unwrap();
+        let now = self.time_provider.now();
+        state.put(key, value, now)
+    }
+
+    fn put_with_extra(
+        &self,
+        key: &Path,
+        value: Arc<Vec<ObjectMeta>>,
+        _e: &Self::Extra,
+    ) -> Option<Arc<Vec<ObjectMeta>>> {
+        self.put(key, value)
+    }
+
+    fn remove(&self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
+        let mut state = self.state.lock().unwrap();
+        state.remove(k)
+    }
+
+    fn contains_key(&self, k: &Path) -> bool {
+        let mut state = self.state.lock().unwrap();
+        let now = self.time_provider.now();
+        state.contains_key(k, now)
+    }
+
+    fn len(&self) -> usize {
+        let state = self.state.lock().unwrap();
+        state.len()
+    }
+
+    fn clear(&self) {
+        let mut state = self.state.lock().unwrap();
+        state.clear();
+    }
+
+    fn name(&self) -> String {
+        String::from("DefaultListFilesCache")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::DateTime;
+    use std::thread;
+
+    struct MockTimeProvider {
+        base: Instant,
+        offset: Mutex<Duration>,
+    }
+
+    impl MockTimeProvider {
+        fn new() -> Self {
+            Self {
+                base: Instant::now(),
+                offset: Mutex::new(Duration::ZERO),
+            }
+        }
+
+        fn inc(&self, duration: Duration) {
+            let mut offset = self.offset.lock().unwrap();
+            *offset += duration;
+        }
+    }
+
+    impl TimeProvider for MockTimeProvider {
+        fn now(&self) -> Instant {
+            self.base + *self.offset.lock().unwrap()
+        }
+    }
+
+    /// Helper function to create a test ObjectMeta with a specific path and location string size
+    fn create_test_object_meta(path: &str, location_size: usize) -> ObjectMeta {
+        // Create a location string of the desired size by padding with zeros
+        let location_str = if location_size > path.len() {
+            format!("{}{}", path, "0".repeat(location_size - path.len()))
+        } else {
+            path.to_string()
+        };
+
+        ObjectMeta {
+            location: Path::from(location_str),
+            last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00")
+                .unwrap()
+                .into(),
+            size: 1024,
+            e_tag: None,
+            version: None,
+        }
+    }
+
+    /// Helper function to create a vector of ObjectMeta with at least meta_size bytes
+    fn create_test_list_files_entry(
+        path: &str,
+        count: usize,
+        meta_size: usize,
+    ) -> (Path, Arc<Vec<ObjectMeta>>, usize) {
+        let metas: Vec<ObjectMeta> = (0..count)
+            .map(|i| create_test_object_meta(&format!("file{i}"), meta_size))
+            .collect();
+        let metas = Arc::new(metas);
+
+        // Calculate actual size using the same logic as ListFilesEntry::try_new
+        let size = (metas.capacity() * size_of::<ObjectMeta>())
+            + metas.iter().map(meta_heap_bytes).sum::<usize>();
+
+        (Path::from(path), metas, size)
+    }
+
+    #[test]
+    fn test_basic_operations() {
+        let cache = DefaultListFilesCache::default();
+        let path = Path::from("test_path");
+
+        // Initially cache is empty
+        assert!(cache.get(&path).is_none());
+        assert!(!cache.contains_key(&path));
+        assert_eq!(cache.len(), 0);
+
+        // Put an entry
+        let meta = create_test_object_meta("file1", 50);
+        let value = Arc::new(vec![meta.clone()]);
+        cache.put(&path, Arc::clone(&value));
+
+        // Entry should be retrievable
+        assert!(cache.contains_key(&path));
+        assert_eq!(cache.len(), 1);
+        let retrieved = cache.get(&path).unwrap();
+        assert_eq!(retrieved.len(), 1);
+        assert_eq!(retrieved[0].location, meta.location);
+
+        // Remove the entry
+        let removed = cache.remove(&path).unwrap();
+        assert_eq!(removed.len(), 1);
+        assert!(!cache.contains_key(&path));
+        assert_eq!(cache.len(), 0);
+
+        // Put multiple entries
+        let (path1, value1, _) = create_test_list_files_entry("path1", 2, 50);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 3, 50);
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        assert_eq!(cache.len(), 2);
+
+        // Clear all entries
+        cache.clear();
+        assert_eq!(cache.len(), 0);
+        assert!(!cache.contains_key(&path1));
+        assert!(!cache.contains_key(&path2));
+    }
+
+    #[test]
+    fn test_lru_eviction_basic() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+        let (path3, value3, _) = create_test_list_files_entry("path3", 1, 100);
+
+        // Set cache limit to exactly fit all three entries
+        let cache = DefaultListFilesCache::new(size * 3, None);
+
+        // All three entries should fit
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        cache.put(&path3, value3);
+        assert_eq!(cache.len(), 3);
+        assert!(cache.contains_key(&path1));
+        assert!(cache.contains_key(&path2));
+        assert!(cache.contains_key(&path3));
+
+        // Adding a new entry should evict path1 (LRU)
+        let (path4, value4, _) = create_test_list_files_entry("path4", 1, 100);
+        cache.put(&path4, value4);
+
+        assert_eq!(cache.len(), 3);
+        assert!(!cache.contains_key(&path1)); // Evicted
+        assert!(cache.contains_key(&path2));
+        assert!(cache.contains_key(&path3));
+        assert!(cache.contains_key(&path4));
+    }
+
+    #[test]
+    fn test_lru_ordering_after_access() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+        let (path3, value3, _) = create_test_list_files_entry("path3", 1, 100);
+
+        // Set cache limit to fit exactly three entries
+        let cache = DefaultListFilesCache::new(size * 3, None);
+
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        cache.put(&path3, value3);
+        assert_eq!(cache.len(), 3);
+
+        // Access path1 to move it to front (MRU)
+        // Order is now: path2 (LRU), path3, path1 (MRU)
+        cache.get(&path1);
+
+        // Adding a new entry should evict path2 (the LRU)
+        let (path4, value4, _) = create_test_list_files_entry("path4", 1, 100);
+        cache.put(&path4, value4);
+
+        assert_eq!(cache.len(), 3);
+        assert!(cache.contains_key(&path1)); // Still present (recently accessed)
+        assert!(!cache.contains_key(&path2)); // Evicted (was LRU)
+        assert!(cache.contains_key(&path3));
+        assert!(cache.contains_key(&path4));
+    }
+
+    #[test]
+    fn test_reject_too_large() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+
+        // Set cache limit to fit both entries
+        let cache = DefaultListFilesCache::new(size * 2, None);
+
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        assert_eq!(cache.len(), 2);
+
+        // Try to add an entry that's too large to fit in the cache
+        let (path_large, value_large, _) = create_test_list_files_entry("large", 1, 1000);
+        cache.put(&path_large, value_large);
+
+        // Large entry should not be added
+        assert!(!cache.contains_key(&path_large));
+        assert_eq!(cache.len(), 2);
+        assert!(cache.contains_key(&path1));
+        assert!(cache.contains_key(&path2));
+    }
+
+    #[test]
+    fn test_multiple_evictions() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+        let (path3, value3, _) = create_test_list_files_entry("path3", 1, 100);
+
+        // Set cache limit for exactly 3 entries
+        let cache = DefaultListFilesCache::new(size * 3, None);
+
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        cache.put(&path3, value3);
+        assert_eq!(cache.len(), 3);
+
+        // Add a large entry that requires evicting 2 entries
+        let (path_large, value_large, _) = create_test_list_files_entry("large", 1, 200);
+        cache.put(&path_large, value_large);
+
+        // path1 and path2 should be evicted (both LRU), path3 and path_large remain
+        assert_eq!(cache.len(), 2);
+        assert!(!cache.contains_key(&path1)); // Evicted
+        assert!(!cache.contains_key(&path2)); // Evicted
+        assert!(cache.contains_key(&path3));
+        assert!(cache.contains_key(&path_large));
+    }
+
+    #[test]
+    fn test_cache_limit_resize() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+        let (path3, value3, _) = create_test_list_files_entry("path3", 1, 100);
+
+        let cache = DefaultListFilesCache::new(size * 3, None);
+
+        // Add three entries
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        cache.put(&path3, value3);
+        assert_eq!(cache.len(), 3);
+
+        // Resize cache to only fit one entry
+        cache.update_cache_limit(size);
+
+        // Should keep only the most recent entry (path3, the MRU)
+        assert_eq!(cache.len(), 1);
+        assert!(cache.contains_key(&path3));
+        // Earlier entries (LRU) should be evicted
+        assert!(!cache.contains_key(&path1));
+        assert!(!cache.contains_key(&path2));
+    }
+
+    #[test]
+    fn test_entry_update_with_size_change() {
+        let (path1, value1, size) = create_test_list_files_entry("path1", 1, 100);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 100);
+        let (path3, value3_v1, _) = create_test_list_files_entry("path3", 1, 100);
+
+        let cache = DefaultListFilesCache::new(size * 3, None);
+
+        // Add three entries
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+        cache.put(&path3, value3_v1);
+        assert_eq!(cache.len(), 3);
+
+        // Update path3 with same size - should not cause eviction
+        let (_, value3_v2, _) = create_test_list_files_entry("path3", 1, 100);
+        cache.put(&path3, value3_v2);
+
+        assert_eq!(cache.len(), 3);
+        assert!(cache.contains_key(&path1));
+        assert!(cache.contains_key(&path2));
+        assert!(cache.contains_key(&path3));
+
+        // Update path3 with larger size that requires evicting path1 (LRU)
+        let (_, value3_v3, _) = create_test_list_files_entry("path3", 1, 200);
+        cache.put(&path3, value3_v3);
+
+        assert_eq!(cache.len(), 2);
+        assert!(!cache.contains_key(&path1)); // Evicted (was LRU)
+        assert!(cache.contains_key(&path2));
+        assert!(cache.contains_key(&path3));
+    }
+
+    #[test]
+    fn test_cache_with_ttl() {
+        let ttl = Duration::from_millis(100);
+        let cache = DefaultListFilesCache::new(10000, Some(ttl));
+
+        let (path1, value1, _) = create_test_list_files_entry("path1", 2, 50);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 2, 50);
+
+        cache.put(&path1, value1);
+        cache.put(&path2, value2);
+
+        // Entries should be accessible immediately
+        assert!(cache.get(&path1).is_some());
+        assert!(cache.get(&path2).is_some());
+        assert!(cache.contains_key(&path1));
+        assert!(cache.contains_key(&path2));
+        assert_eq!(cache.len(), 2);
+
+        // Wait for TTL to expire
+        thread::sleep(Duration::from_millis(150));
+
+        // Entries should now return None and be removed when observed through get or contains_key
+        assert!(cache.get(&path1).is_none());
+        assert_eq!(cache.len(), 1); // path1 was removed by get()
+        assert!(!cache.contains_key(&path2));
+        assert_eq!(cache.len(), 0); // path2 was removed by contains_key()
+    }
+
+    #[test]
+    fn test_cache_with_ttl_and_lru() {
+        let ttl = Duration::from_millis(200);
+
+        let mock_time = Arc::new(MockTimeProvider::new());
+        let cache = DefaultListFilesCache::new(1000, Some(ttl))
+            .with_time_provider(Arc::clone(&mock_time) as Arc<dyn TimeProvider>);
+
+        let (path1, value1, _) = create_test_list_files_entry("path1", 1, 400);
+        let (path2, value2, _) = create_test_list_files_entry("path2", 1, 400);
+        let (path3, value3, _) = create_test_list_files_entry("path3", 1, 400);
+
+        cache.put(&path1, value1);
+        mock_time.inc(Duration::from_millis(50));
+        cache.put(&path2, value2);
+        mock_time.inc(Duration::from_millis(50));
+
+        // path3 should evict path1 due to size limit
+        cache.put(&path3, value3);
+        assert!(!cache.contains_key(&path1)); // Evicted by LRU
+        assert!(cache.contains_key(&path2));
+        assert!(cache.contains_key(&path3));
+
+        mock_time.inc(Duration::from_millis(151));
+
+        assert!(!cache.contains_key(&path2)); // Expired
+        assert!(cache.contains_key(&path3)); // Still valid 
+    }
+
+    #[test]
+    fn test_meta_heap_bytes_calculation() {
+        // Test with minimal ObjectMeta (no e_tag, no version)
+        let meta1 = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: chrono::Utc::now(),
+            size: 100,
+            e_tag: None,
+            version: None,
+        };
+        assert_eq!(meta_heap_bytes(&meta1), 4); // Just the location string "test"
+
+        // Test with e_tag
+        let meta2 = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: chrono::Utc::now(),
+            size: 100,
+            e_tag: Some("etag123".to_string()),
+            version: None,
+        };
+        assert_eq!(meta_heap_bytes(&meta2), 4 + 7); // location (4) + e_tag (7)
+
+        // Test with version
+        let meta3 = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: chrono::Utc::now(),
+            size: 100,
+            e_tag: None,
+            version: Some("v1.0".to_string()),
+        };
+        assert_eq!(meta_heap_bytes(&meta3), 4 + 4); // location (4) + version (4)
+
+        // Test with both e_tag and version
+        let meta4 = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: chrono::Utc::now(),
+            size: 100,
+            e_tag: Some("tag".to_string()),
+            version: Some("ver".to_string()),
+        };
+        assert_eq!(meta_heap_bytes(&meta4), 4 + 3 + 3); // location (4) + e_tag (3) + version (3)
+    }
+
+    #[test]
+    fn test_entry_creation() {
+        // Test with empty vector
+        let empty_vec: Arc<Vec<ObjectMeta>> = Arc::new(vec![]);
+        let now = Instant::now();
+        let entry = ListFilesEntry::try_new(empty_vec, None, now);
+        assert!(entry.is_none());
+
+        // Validate entry size
+        let metas: Vec<ObjectMeta> = (0..5)
+            .map(|i| create_test_object_meta(&format!("file{i}"), 30))
+            .collect();
+        let metas = Arc::new(metas);
+        let entry = ListFilesEntry::try_new(metas, None, now).unwrap();
+        assert_eq!(entry.metas.len(), 5);
+        // Size should be: capacity * sizeof(ObjectMeta) + (5 * 30) for heap bytes
+        let expected_size =
+            (entry.metas.capacity() * size_of::<ObjectMeta>()) + (entry.metas.len() * 30);
+        assert_eq!(entry.size_bytes, expected_size);
+
+        // Test with TTL
+        let meta = create_test_object_meta("file", 50);
+        let ttl = Duration::from_secs(10);
+        let entry =
+            ListFilesEntry::try_new(Arc::new(vec![meta]), Some(ttl), now).unwrap();
+        assert!(entry.expires.unwrap() > now);
+    }
+
+    #[test]
+    fn test_memory_tracking() {
+        let cache = DefaultListFilesCache::new(1000, None);
+
+        // Verify cache starts with 0 memory used
+        {
+            let state = cache.state.lock().unwrap();
+            assert_eq!(state.memory_used, 0);
+        }
+
+        // Add entry and verify memory tracking
+        let (path1, value1, size1) = create_test_list_files_entry("path1", 1, 100);
+        cache.put(&path1, value1);
+        {
+            let state = cache.state.lock().unwrap();
+            assert_eq!(state.memory_used, size1);
+        }
+
+        // Add another entry
+        let (path2, value2, size2) = create_test_list_files_entry("path2", 1, 200);
+        cache.put(&path2, value2);
+        {
+            let state = cache.state.lock().unwrap();
+            assert_eq!(state.memory_used, size1 + size2);
+        }
+
+        // Remove first entry and verify memory decreases
+        cache.remove(&path1);
+        {
+            let state = cache.state.lock().unwrap();
+            assert_eq!(state.memory_used, size2);
+        }
+
+        // Clear and verify memory is 0
+        cache.clear();
+        {
+            let state = cache.state.lock().unwrap();
+            assert_eq!(state.memory_used, 0);
+        }
+    }
+
+    // Prefix-aware cache tests
+
+    /// Helper function to create ObjectMeta with a specific location path
+    fn create_object_meta_with_path(location: &str) -> ObjectMeta {
+        ObjectMeta {
+            location: Path::from(location),
+            last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00")
+                .unwrap()
+                .into(),
+            size: 1024,
+            e_tag: None,
+            version: None,
+        }
+    }
+
+    #[test]
+    fn test_prefix_aware_cache_hit() {
+        // Scenario: Cache has full table listing, query for partition returns filtered results
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        // Create files for a partitioned table
+        let table_base = Path::from("my_table");
+        let files = Arc::new(vec![
+            create_object_meta_with_path("my_table/a=1/file1.parquet"),
+            create_object_meta_with_path("my_table/a=1/file2.parquet"),
+            create_object_meta_with_path("my_table/a=2/file3.parquet"),
+            create_object_meta_with_path("my_table/a=2/file4.parquet"),
+        ]);
+
+        // Cache the full table listing
+        cache.put(&table_base, files);
+
+        // Query for partition a=1 using get_with_extra
+        // New API: get_with_extra(table_base, Some(relative_prefix))
+        let prefix_a1 = Some(Path::from("a=1"));
+        let result = cache.get_with_extra(&table_base, &prefix_a1);
+
+        // Should return filtered results (only files from a=1)
+        assert!(result.is_some());
+        let filtered = result.unwrap();
+        assert_eq!(filtered.len(), 2);
+        assert!(
+            filtered
+                .iter()
+                .all(|m| m.location.as_ref().starts_with("my_table/a=1"))
+        );
+
+        // Query for partition a=2
+        let prefix_a2 = Some(Path::from("a=2"));
+        let result_2 = cache.get_with_extra(&table_base, &prefix_a2);
+
+        assert!(result_2.is_some());
+        let filtered_2 = result_2.unwrap();
+        assert_eq!(filtered_2.len(), 2);
+        assert!(
+            filtered_2
+                .iter()
+                .all(|m| m.location.as_ref().starts_with("my_table/a=2"))
+        );
+    }
+
+    #[test]
+    fn test_prefix_aware_cache_no_filter_returns_all() {
+        // Scenario: Query with no prefix filter should return all files
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        let table_base = Path::from("my_table");
+
+        // Cache full table listing with 4 files
+        let full_files = Arc::new(vec![
+            create_object_meta_with_path("my_table/a=1/file1.parquet"),
+            create_object_meta_with_path("my_table/a=1/file2.parquet"),
+            create_object_meta_with_path("my_table/a=2/file3.parquet"),
+            create_object_meta_with_path("my_table/a=2/file4.parquet"),
+        ]);
+        cache.put(&table_base, full_files);
+
+        // Query with no prefix filter (None) should return all 4 files
+        let result = cache.get_with_extra(&table_base, &None);
+        assert!(result.is_some());
+        let files = result.unwrap();
+        assert_eq!(files.len(), 4);
+
+        // Also test using get() which delegates to get_with_extra(&None)
+        let result_get = cache.get(&table_base);
+        assert!(result_get.is_some());
+        assert_eq!(result_get.unwrap().len(), 4);
+    }
+
+    #[test]
+    fn test_prefix_aware_cache_miss_no_entry() {
+        // Scenario: Table not cached, query should miss
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        let table_base = Path::from("my_table");
+
+        // Query for full table should miss (nothing cached)
+        let result = cache.get_with_extra(&table_base, &None);
+        assert!(result.is_none());
+
+        // Query with prefix should also miss
+        let prefix = Some(Path::from("a=1"));
+        let result_2 = cache.get_with_extra(&table_base, &prefix);
+        assert!(result_2.is_none());
+    }
+
+    #[test]
+    fn test_prefix_aware_cache_no_matching_files() {
+        // Scenario: Cache has table listing but no files match the requested partition
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        let table_base = Path::from("my_table");
+        let files = Arc::new(vec![
+            create_object_meta_with_path("my_table/a=1/file1.parquet"),
+            create_object_meta_with_path("my_table/a=2/file2.parquet"),
+        ]);
+        cache.put(&table_base, files);
+
+        // Query for partition a=3 which doesn't exist
+        let prefix_a3 = Some(Path::from("a=3"));
+        let result = cache.get_with_extra(&table_base, &prefix_a3);
+
+        // Should return None since no files match
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_prefix_aware_nested_partitions() {
+        // Scenario: Table with multiple partition levels (e.g., year/month/day)
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        let table_base = Path::from("events");
+        let files = Arc::new(vec![
+            create_object_meta_with_path(
+                "events/year=2024/month=01/day=01/file1.parquet",
+            ),
+            create_object_meta_with_path(
+                "events/year=2024/month=01/day=02/file2.parquet",
+            ),
+            create_object_meta_with_path(
+                "events/year=2024/month=02/day=01/file3.parquet",
+            ),
+            create_object_meta_with_path(
+                "events/year=2025/month=01/day=01/file4.parquet",
+            ),
+        ]);
+        cache.put(&table_base, files);
+
+        // Query for year=2024/month=01 (should get 2 files)
+        let prefix_month = Some(Path::from("year=2024/month=01"));
+        let result = cache.get_with_extra(&table_base, &prefix_month);
+        assert!(result.is_some());
+        assert_eq!(result.unwrap().len(), 2);
+
+        // Query for year=2024 (should get 3 files)
+        let prefix_year = Some(Path::from("year=2024"));
+        let result_year = cache.get_with_extra(&table_base, &prefix_year);
+        assert!(result_year.is_some());
+        assert_eq!(result_year.unwrap().len(), 3);
+
+        // Query for specific day (should get 1 file)
+        let prefix_day = Some(Path::from("year=2024/month=01/day=01"));
+        let result_day = cache.get_with_extra(&table_base, &prefix_day);
+        assert!(result_day.is_some());
+        assert_eq!(result_day.unwrap().len(), 1);
+    }
+
+    #[test]
+    fn test_prefix_aware_different_tables() {
+        // Scenario: Multiple tables cached, queries should not cross-contaminate
+        let cache = DefaultListFilesCache::new(100000, None);
+
+        let table_a = Path::from("table_a");
+        let table_b = Path::from("table_b");
+
+        let files_a = Arc::new(vec![create_object_meta_with_path(
+            "table_a/part=1/file1.parquet",
+        )]);
+        let files_b = Arc::new(vec![
+            create_object_meta_with_path("table_b/part=1/file1.parquet"),
+            create_object_meta_with_path("table_b/part=2/file2.parquet"),
+        ]);
+
+        cache.put(&table_a, files_a);
+        cache.put(&table_b, files_b);
+
+        // Query table_a should only return table_a files
+        let result_a = cache.get(&table_a);
+        assert!(result_a.is_some());
+        assert_eq!(result_a.unwrap().len(), 1);
+
+        // Query table_b with prefix should only return matching table_b files
+        let prefix = Some(Path::from("part=1"));
+        let result_b = cache.get_with_extra(&table_b, &prefix);
+        assert!(result_b.is_some());
+        assert_eq!(result_b.unwrap().len(), 1);
+    }
+}
diff --git a/datafusion/execution/src/cache/mod.rs b/datafusion/execution/src/cache/mod.rs
index b1857c94facdf..8172069fdbabd 100644
--- a/datafusion/execution/src/cache/mod.rs
+++ b/datafusion/execution/src/cache/mod.rs
@@ -19,10 +19,17 @@ pub mod cache_manager;
 pub mod cache_unit;
 pub mod lru_queue;
 
-/// The cache accessor, users usually working on this interface while manipulating caches.
-/// This interface does not get `mut` references and thus has to handle its own
-/// locking via internal mutability. It can be accessed via multiple concurrent queries
-/// during planning and execution.
+mod file_metadata_cache;
+mod list_files_cache;
+
+pub use file_metadata_cache::DefaultFilesMetadataCache;
+pub use list_files_cache::DefaultListFilesCache;
+
+/// A trait that can be implemented to provide custom cache behavior for the caches managed by
+/// [`cache_manager::CacheManager`].
+///
+/// Implementations must handle their own locking via internal mutability, as methods do not
+/// take mutable references and may be accessed by multiple concurrent queries.
 pub trait CacheAccessor<K, V>: Send + Sync {
     // Extra info but not part of the cache key or cache value.
     type Extra: Clone;
@@ -36,7 +43,7 @@ pub trait CacheAccessor<K, V>: Send + Sync {
     /// Put value into cache. Returns the old value associated with the key if there was one.
     fn put_with_extra(&self, key: &K, value: V, e: &Self::Extra) -> Option<V>;
     /// Remove an entry from the cache, returning value if they existed in the map.
-    fn remove(&mut self, k: &K) -> Option<V>;
+    fn remove(&self, k: &K) -> Option<V>;
     /// Check if the cache contains a specific key.
     fn contains_key(&self, k: &K) -> bool;
     /// Fetch the total number of cache entries.
diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs
index a0b180bf40206..30ba7de76a471 100644
--- a/datafusion/execution/src/config.rs
+++ b/datafusion/execution/src/config.rs
@@ -23,8 +23,8 @@ use std::{
 };
 
 use datafusion_common::{
-    config::{ConfigExtension, ConfigOptions, SpillCompression},
     Result, ScalarValue,
+    config::{ConfigExtension, ConfigOptions, SpillCompression},
 };
 
 /// Configuration options for [`SessionContext`].
@@ -424,6 +424,13 @@ impl SessionConfig {
         self.options.optimizer.enable_round_robin_repartition
     }
 
+    /// Enables or disables sort pushdown optimization, and currently only
+    /// applies to Parquet data source.
+    pub fn with_enable_sort_pushdown(mut self, enabled: bool) -> Self {
+        self.options_mut().optimizer.enable_sort_pushdown = enabled;
+        self
+    }
+
     /// Set the size of [`sort_spill_reservation_bytes`] to control
     /// memory pre-reservation
     ///
diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs
index c2923d6112a6c..cb87053d8d035 100644
--- a/datafusion/execution/src/disk_manager.rs
+++ b/datafusion/execution/src/disk_manager.rs
@@ -18,19 +18,19 @@
 //! [`DiskManager`]: Manages files generated during query execution
 
 use datafusion_common::{
-    config_err, resources_datafusion_err, resources_err, DataFusionError, Result,
+    DataFusionError, Result, config_err, resources_datafusion_err, resources_err,
 };
 use log::debug;
 use parking_lot::Mutex;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 use tempfile::{Builder, NamedTempFile, TempDir};
 
-use crate::memory_pool::human_readable_size;
+use datafusion_common::human_readable_size;
 
-const DEFAULT_MAX_TEMP_DIRECTORY_SIZE: u64 = 100 * 1024 * 1024 * 1024; // 100GB
+pub const DEFAULT_MAX_TEMP_DIRECTORY_SIZE: u64 = 100 * 1024 * 1024 * 1024; // 100GB
 
 /// Builder pattern for the [DiskManager] structure
 #[derive(Clone, Debug)]
@@ -79,7 +79,7 @@ impl DiskManagerBuilder {
                 used_disk_space: Arc::new(AtomicU64::new(0)),
             }),
             DiskManagerMode::Directories(conf_dirs) => {
-                let local_dirs = create_local_dirs(conf_dirs)?;
+                let local_dirs = create_local_dirs(&conf_dirs)?;
                 debug!(
                     "Created local dirs {local_dirs:?} as DataFusion working directory"
                 );
@@ -115,9 +115,10 @@ pub enum DiskManagerMode {
 }
 
 /// Configuration for temporary disk access
-#[allow(deprecated)]
 #[deprecated(since = "48.0.0", note = "Use DiskManagerBuilder instead")]
 #[derive(Debug, Clone, Default)]
+#[allow(clippy::allow_attributes)]
+#[allow(deprecated)]
 pub enum DiskManagerConfig {
     /// Use the provided [DiskManager] instance
     Existing(Arc<DiskManager>),
@@ -135,7 +136,7 @@ pub enum DiskManagerConfig {
     Disabled,
 }
 
-#[allow(deprecated)]
+#[expect(deprecated)]
 impl DiskManagerConfig {
     /// Create temporary files in a temporary directory chosen by the OS
     pub fn new() -> Self {
@@ -177,7 +178,7 @@ impl DiskManager {
     }
 
     /// Create a DiskManager given the configuration
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     #[deprecated(since = "48.0.0", note = "Use DiskManager::builder() instead")]
     pub fn try_new(config: DiskManagerConfig) -> Result<Arc<Self>> {
         match config {
@@ -188,7 +189,7 @@ impl DiskManager {
                 used_disk_space: Arc::new(AtomicU64::new(0)),
             })),
             DiskManagerConfig::NewSpecified(conf_dirs) => {
-                let local_dirs = create_local_dirs(conf_dirs)?;
+                let local_dirs = create_local_dirs(&conf_dirs)?;
                 debug!(
                     "Created local dirs {local_dirs:?} as DataFusion working directory"
                 );
@@ -246,6 +247,24 @@ impl DiskManager {
         self.used_disk_space.load(Ordering::Relaxed)
     }
 
+    /// Returns the maximum temporary directory size in bytes
+    pub fn max_temp_directory_size(&self) -> u64 {
+        self.max_temp_directory_size
+    }
+
+    /// Returns the temporary directory paths
+    pub fn temp_dir_paths(&self) -> Vec<PathBuf> {
+        self.local_dirs
+            .lock()
+            .as_ref()
+            .map(|dirs| {
+                dirs.iter()
+                    .map(|temp_dir| temp_dir.path().to_path_buf())
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
     /// Return true if this disk manager supports creating temporary
     /// files. If this returns false, any call to `create_tmp_file`
     /// will error.
@@ -408,7 +427,7 @@ impl Drop for RefCountedTempFile {
 }
 
 /// Setup local dirs by creating one new dir in each of the given dirs
-fn create_local_dirs(local_dirs: Vec<PathBuf>) -> Result<Vec<Arc<TempDir>>> {
+fn create_local_dirs(local_dirs: &[PathBuf]) -> Result<Vec<Arc<TempDir>>> {
     local_dirs
         .iter()
         .map(|root| {
@@ -490,7 +509,10 @@ mod tests {
         );
         assert!(!manager.tmp_files_enabled());
         assert_eq!(
-            manager.create_tmp_file("Testing").unwrap_err().strip_backtrace(),
+            manager
+                .create_tmp_file("Testing")
+                .unwrap_err()
+                .strip_backtrace(),
             "Resources exhausted: Memory Exhausted while Testing (DiskManager is disabled)",
         )
     }
diff --git a/datafusion/execution/src/lib.rs b/datafusion/execution/src/lib.rs
index 55243e301e0e9..aced2f46d7224 100644
--- a/datafusion/execution/src/lib.rs
+++ b/datafusion/execution/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 //! DataFusion execution configuration and runtime structures
 
@@ -46,4 +48,4 @@ pub mod registry {
 pub use disk_manager::DiskManager;
 pub use registry::FunctionRegistry;
 pub use stream::{RecordBatchStream, SendableRecordBatchStream};
-pub use task::TaskContext;
+pub use task::{TaskContext, TaskContextProvider};
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index e620b23267962..fbf9ce41da8fe 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -18,17 +18,18 @@
 //! [`MemoryPool`] for memory management during query execution, [`proxy`] for
 //! help with allocation accounting.
 
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use std::hash::{Hash, Hasher};
-use std::{cmp::Ordering, sync::atomic, sync::Arc};
+use std::{cmp::Ordering, sync::Arc, sync::atomic};
 
 mod pool;
 pub mod proxy {
-    pub use datafusion_common::utils::proxy::{
-        HashTableAllocExt, RawTableAllocExt, VecAllocExt,
-    };
+    pub use datafusion_common::utils::proxy::{HashTableAllocExt, VecAllocExt};
 }
 
+pub use datafusion_common::{
+    human_readable_count, human_readable_duration, human_readable_size, units,
+};
 pub use pool::*;
 
 /// Tracks and potentially limits memory use across operators during execution.
@@ -475,34 +476,6 @@ impl Drop for MemoryReservation {
     }
 }
 
-pub mod units {
-    pub const TB: u64 = 1 << 40;
-    pub const GB: u64 = 1 << 30;
-    pub const MB: u64 = 1 << 20;
-    pub const KB: u64 = 1 << 10;
-}
-
-/// Present size in human-readable form
-pub fn human_readable_size(size: usize) -> String {
-    use units::*;
-
-    let size = size as u64;
-    let (value, unit) = {
-        if size >= 2 * TB {
-            (size as f64 / TB as f64, "TB")
-        } else if size >= 2 * GB {
-            (size as f64 / GB as f64, "GB")
-        } else if size >= 2 * MB {
-            (size as f64 / MB as f64, "MB")
-        } else if size >= 2 * KB {
-            (size as f64 / KB as f64, "KB")
-        } else {
-            (size as f64, "B")
-        }
-    };
-    format!("{value:.1} {unit}")
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index d6b55182aa6ba..bf74b5f6f4c6b 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use crate::memory_pool::{
-    human_readable_size, MemoryConsumer, MemoryLimit, MemoryPool, MemoryReservation,
+    MemoryConsumer, MemoryLimit, MemoryPool, MemoryReservation, human_readable_size,
 };
 use datafusion_common::HashMap;
-use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, resources_datafusion_err};
 use log::debug;
 use parking_lot::Mutex;
 use std::{
@@ -260,8 +260,13 @@ fn insufficient_capacity_err(
     additional: usize,
     available: usize,
 ) -> DataFusionError {
-    resources_datafusion_err!("Failed to allocate additional {} for {} with {} already allocated for this reservation - {} remain available for the total pool", 
-    human_readable_size(additional), reservation.registration.consumer.name, human_readable_size(reservation.size), human_readable_size(available))
+    resources_datafusion_err!(
+        "Failed to allocate additional {} for {} with {} already allocated for this reservation - {} remain available for the total pool",
+        human_readable_size(additional),
+        reservation.registration.consumer.name,
+        human_readable_size(reservation.size),
+        human_readable_size(available)
+    )
 }
 
 #[derive(Debug)]
@@ -319,8 +324,8 @@ impl TrackedConsumer {
 ///
 /// For more examples of using `TrackConsumersPool`, see the [memory_pool_tracking.rs] example
 ///
-/// [memory_pool_tracking.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/memory_pool_tracking.rs
-/// [memory_pool_execution_plan.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/memory_pool_execution_plan.rs
+/// [memory_pool_tracking.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
+/// [memory_pool_execution_plan.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/execution_monitoring/memory_pool_execution_plan.rs
 #[derive(Debug)]
 pub struct TrackConsumersPool<I> {
     /// The wrapped memory pool that actually handles reservation logic
@@ -466,8 +471,8 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
                     DataFusionError::ResourcesExhausted(
                         provide_top_memory_consumers_to_error_msg(
                             &reservation.consumer().name,
-                            e,
-                            self.report_top(self.top.into()),
+                            &e,
+                            &self.report_top(self.top.into()),
                         ),
                     )
                 }
@@ -494,16 +499,18 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
 
 fn provide_top_memory_consumers_to_error_msg(
     consumer_name: &str,
-    error_msg: String,
-    top_consumers: String,
+    error_msg: &str,
+    top_consumers: &str,
 ) -> String {
-    format!("Additional allocation failed for {consumer_name} with top memory consumers (across reservations) as:\n{top_consumers}\nError: {error_msg}")
+    format!(
+        "Additional allocation failed for {consumer_name} with top memory consumers (across reservations) as:\n{top_consumers}\nError: {error_msg}"
+    )
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use insta::{allow_duplicates, assert_snapshot, Settings};
+    use insta::{Settings, allow_duplicates, assert_snapshot};
     use std::sync::Arc;
 
     fn make_settings() -> Settings {
@@ -717,11 +724,11 @@ mod tests {
             assert!(res.is_err());
             let error = res.unwrap_err().strip_backtrace();
             allow_duplicates!(assert_snapshot!(error, @r"
-                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-                  r1#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
-                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 70.0 B remain available for the total pool
-                "));
+            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+              r1#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
+              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 70.0 B remain available for the total pool
+            "));
 
             // Test: unregister one
             // only the remaining one should be listed
@@ -730,10 +737,10 @@ mod tests {
             assert!(res.is_err());
             let error = res.unwrap_err().strip_backtrace();
             allow_duplicates!(assert_snapshot!(error, @r"
-                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-                "));
+            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
+            "));
 
             // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
             // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
@@ -741,10 +748,10 @@ mod tests {
             assert!(res.is_err());
             let error = res.unwrap_err().strip_backtrace();
             allow_duplicates!(assert_snapshot!(error, @r"
-                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-                "));
+            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
+            "));
 
             // Test: the registration needs to free itself (or be dropped),
             // for the proper error message
@@ -752,10 +759,10 @@ mod tests {
             assert!(res.is_err());
             let error = res.unwrap_err().strip_backtrace();
             allow_duplicates!(assert_snapshot!(error, @r"
-                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-                "));
+            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
+            "));
         }
 
         let tracked_spill_pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs
index aedee7d44460d..22ce1f0cf2bbf 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -21,11 +21,11 @@
 
 use dashmap::DashMap;
 use datafusion_common::{
-    exec_err, internal_datafusion_err, not_impl_err, DataFusionError, Result,
+    DataFusionError, Result, exec_err, internal_datafusion_err, not_impl_err,
 };
+use object_store::ObjectStore;
 #[cfg(not(target_arch = "wasm32"))]
 use object_store::local::LocalFileSystem;
-use object_store::ObjectStore;
 use std::sync::Arc;
 use url::Url;
 
@@ -158,9 +158,11 @@ pub trait ObjectStoreRegistry: Send + Sync + std::fmt::Debug + 'static {
 
     /// Deregister the store previously registered with the same key. Returns the
     /// deregistered store if it existed.
-    #[allow(unused_variables)]
+    #[expect(unused_variables)]
     fn deregister_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
-        not_impl_err!("ObjectStoreRegistry::deregister_store is not implemented for this ObjectStoreRegistry")
+        not_impl_err!(
+            "ObjectStoreRegistry::deregister_store is not implemented for this ObjectStoreRegistry"
+        )
     }
 
     /// Get a suitable store for the provided URL. For example:
@@ -290,17 +292,29 @@ mod tests {
         assert_eq!(err.strip_backtrace(), "External error: invalid port number");
 
         let err = ObjectStoreUrl::parse("s3://bucket?").unwrap_err();
-        assert_eq!(err.strip_backtrace(), "Execution error: ObjectStoreUrl must only contain scheme and authority, got: ?");
+        assert_eq!(
+            err.strip_backtrace(),
+            "Execution error: ObjectStoreUrl must only contain scheme and authority, got: ?"
+        );
 
         let err = ObjectStoreUrl::parse("s3://bucket?foo=bar").unwrap_err();
-        assert_eq!(err.strip_backtrace(), "Execution error: ObjectStoreUrl must only contain scheme and authority, got: ?foo=bar");
+        assert_eq!(
+            err.strip_backtrace(),
+            "Execution error: ObjectStoreUrl must only contain scheme and authority, got: ?foo=bar"
+        );
 
         let err = ObjectStoreUrl::parse("s3://host:123/foo").unwrap_err();
-        assert_eq!(err.strip_backtrace(), "Execution error: ObjectStoreUrl must only contain scheme and authority, got: /foo");
+        assert_eq!(
+            err.strip_backtrace(),
+            "Execution error: ObjectStoreUrl must only contain scheme and authority, got: /foo"
+        );
 
         let err =
             ObjectStoreUrl::parse("s3://username:password@host:123/foo").unwrap_err();
-        assert_eq!(err.strip_backtrace(), "Execution error: ObjectStoreUrl must only contain scheme and authority, got: /foo");
+        assert_eq!(
+            err.strip_backtrace(),
+            "Execution error: ObjectStoreUrl must only contain scheme and authority, got: /foo"
+        );
     }
 
     #[test]
diff --git a/datafusion/execution/src/parquet_encryption.rs b/datafusion/execution/src/parquet_encryption.rs
index 027421e08f549..45eac10264e88 100644
--- a/datafusion/execution/src/parquet_encryption.rs
+++ b/datafusion/execution/src/parquet_encryption.rs
@@ -32,7 +32,7 @@ use std::sync::Arc;
 /// integrate with a user's key management service (KMS).
 /// For example usage, see the [`parquet_encrypted_with_kms` example].
 ///
-/// [`parquet_encrypted_with_kms` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/parquet_encrypted_with_kms.rs
+/// [`parquet_encrypted_with_kms` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs
 #[async_trait]
 pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
     /// Generate file encryption properties to use when writing a Parquet file.
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index d69987600855c..67398d59f1374 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -18,7 +18,7 @@
 //! Execution [`RuntimeEnv`] environment that manages access to object
 //! store, memory manager, disk manager.
 
-#[allow(deprecated)]
+#[expect(deprecated)]
 use crate::disk_manager::DiskManagerConfig;
 use crate::{
     disk_manager::{DiskManager, DiskManagerBuilder, DiskManagerMode},
@@ -31,14 +31,14 @@ use crate::{
 use crate::cache::cache_manager::{CacheManager, CacheManagerConfig};
 #[cfg(feature = "parquet_encryption")]
 use crate::parquet_encryption::{EncryptionFactory, EncryptionFactoryRegistry};
-use datafusion_common::{config::ConfigEntry, Result};
+use datafusion_common::{Result, config::ConfigEntry};
 use object_store::ObjectStore;
-use std::path::PathBuf;
 use std::sync::Arc;
 use std::{
     fmt::{Debug, Formatter},
     num::NonZeroUsize,
 };
+use std::{path::PathBuf, time::Duration};
 use url::Url;
 
 #[derive(Clone)]
@@ -91,6 +91,53 @@ impl Debug for RuntimeEnv {
     }
 }
 
+/// Creates runtime configuration entries with the provided values
+///
+/// This helper function defines the structure and metadata for all runtime configuration
+/// entries to avoid duplication between `RuntimeEnv::config_entries()` and
+/// `RuntimeEnvBuilder::entries()`.
+fn create_runtime_config_entries(
+    memory_limit: Option<String>,
+    max_temp_directory_size: Option<String>,
+    temp_directory: Option<String>,
+    metadata_cache_limit: Option<String>,
+    list_files_cache_limit: Option<String>,
+    list_files_cache_ttl: Option<String>,
+) -> Vec<ConfigEntry> {
+    vec![
+        ConfigEntry {
+            key: "datafusion.runtime.memory_limit".to_string(),
+            value: memory_limit,
+            description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.max_temp_directory_size".to_string(),
+            value: max_temp_directory_size,
+            description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.temp_directory".to_string(),
+            value: temp_directory,
+            description: "The path to the temporary file directory.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.metadata_cache_limit".to_string(),
+            value: metadata_cache_limit,
+            description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.list_files_cache_limit".to_string(),
+            value: list_files_cache_limit,
+            description: "Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.list_files_cache_ttl".to_string(),
+            value: list_files_cache_ttl,
+            description: "TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes.",
+        },
+    ]
+}
+
 impl RuntimeEnv {
     /// Registers a custom `ObjectStore` to be used with a specific url.
     /// This allows DataFusion to create external tables from urls that do not have
@@ -173,6 +220,86 @@ impl RuntimeEnv {
     ) -> Result<Arc<dyn EncryptionFactory>> {
         self.parquet_encryption_factory_registry.get_factory(id)
     }
+
+    /// Returns the current runtime configuration entries
+    pub fn config_entries(&self) -> Vec<ConfigEntry> {
+        use crate::memory_pool::MemoryLimit;
+
+        /// Convert bytes to a human-readable format
+        fn format_byte_size(size: u64) -> String {
+            const GB: u64 = 1024 * 1024 * 1024;
+            const MB: u64 = 1024 * 1024;
+            const KB: u64 = 1024;
+
+            match size {
+                s if s >= GB => format!("{}G", s / GB),
+                s if s >= MB => format!("{}M", s / MB),
+                s if s >= KB => format!("{}K", s / KB),
+                s => format!("{s}"),
+            }
+        }
+
+        fn format_duration(duration: Duration) -> String {
+            let total = duration.as_secs();
+            let mins = total / 60;
+            let secs = total % 60;
+
+            format!("{mins}m{secs}s")
+        }
+
+        let memory_limit_value = match self.memory_pool.memory_limit() {
+            MemoryLimit::Finite(size) => Some(format_byte_size(
+                size.try_into()
+                    .expect("Memory limit size conversion failed"),
+            )),
+            MemoryLimit::Infinite => Some("unlimited".to_string()),
+            MemoryLimit::Unknown => None,
+        };
+
+        let max_temp_dir_size = self.disk_manager.max_temp_directory_size();
+        let max_temp_dir_value = format_byte_size(max_temp_dir_size);
+
+        let temp_paths = self.disk_manager.temp_dir_paths();
+        let temp_dir_value = if temp_paths.is_empty() {
+            None
+        } else {
+            Some(
+                temp_paths
+                    .iter()
+                    .map(|p| p.display().to_string())
+                    .collect::<Vec<_>>()
+                    .join(","),
+            )
+        };
+
+        let metadata_cache_limit = self.cache_manager.get_metadata_cache_limit();
+        let metadata_cache_value = format_byte_size(
+            metadata_cache_limit
+                .try_into()
+                .expect("Metadata cache size conversion failed"),
+        );
+
+        let list_files_cache_limit = self.cache_manager.get_list_files_cache_limit();
+        let list_files_cache_value = format_byte_size(
+            list_files_cache_limit
+                .try_into()
+                .expect("List files cache size conversion failed"),
+        );
+
+        let list_files_cache_ttl = self
+            .cache_manager
+            .get_list_files_cache_ttl()
+            .map(format_duration);
+
+        create_runtime_config_entries(
+            memory_limit_value,
+            Some(max_temp_dir_value),
+            temp_dir_value,
+            Some(metadata_cache_value),
+            Some(list_files_cache_value),
+            list_files_cache_ttl,
+        )
+    }
 }
 
 impl Default for RuntimeEnv {
@@ -186,7 +313,7 @@ impl Default for RuntimeEnv {
 /// See example on [`RuntimeEnv`]
 #[derive(Clone)]
 pub struct RuntimeEnvBuilder {
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     /// DiskManager to manage temporary disk file usage
     pub disk_manager: DiskManagerConfig,
     /// DiskManager builder to manager temporary disk file usage
@@ -224,7 +351,7 @@ impl RuntimeEnvBuilder {
         }
     }
 
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     #[deprecated(since = "48.0.0", note = "Use with_disk_manager_builder instead")]
     /// Customize disk manager
     pub fn with_disk_manager(mut self, disk_manager: DiskManagerConfig) -> Self {
@@ -294,6 +421,18 @@ impl RuntimeEnvBuilder {
         self
     }
 
+    /// Specifies the memory limit for the object list cache, in bytes.
+    pub fn with_object_list_cache_limit(mut self, limit: usize) -> Self {
+        self.cache_manager = self.cache_manager.with_list_files_cache_limit(limit);
+        self
+    }
+
+    /// Specifies the duration entries in the object list cache will be considered valid.
+    pub fn with_object_list_cache_ttl(mut self, ttl: Option<Duration>) -> Self {
+        self.cache_manager = self.cache_manager.with_list_files_cache_ttl(ttl);
+        self
+    }
+
     /// Build a RuntimeEnv
     pub fn build(self) -> Result<RuntimeEnv> {
         let Self {
@@ -313,7 +452,7 @@ impl RuntimeEnvBuilder {
             disk_manager: if let Some(builder) = disk_manager_builder {
                 Arc::new(builder.build()?)
             } else {
-                #[allow(deprecated)]
+                #[expect(deprecated)]
                 DiskManager::try_new(disk_manager)?
             },
             cache_manager: CacheManager::try_new(&cache_manager)?,
@@ -335,6 +474,10 @@ impl RuntimeEnvBuilder {
                 .cache_manager
                 .get_file_statistic_cache(),
             list_files_cache: runtime_env.cache_manager.get_list_files_cache(),
+            list_files_cache_limit: runtime_env
+                .cache_manager
+                .get_list_files_cache_limit(),
+            list_files_cache_ttl: runtime_env.cache_manager.get_list_files_cache_ttl(),
             file_metadata_cache: Some(
                 runtime_env.cache_manager.get_file_metadata_cache(),
             ),
@@ -342,7 +485,7 @@ impl RuntimeEnvBuilder {
         };
 
         Self {
-            #[allow(deprecated)]
+            #[expect(deprecated)]
             disk_manager: DiskManagerConfig::Existing(Arc::clone(
                 &runtime_env.disk_manager,
             )),
@@ -359,28 +502,14 @@ impl RuntimeEnvBuilder {
 
     /// Returns a list of all available runtime configurations with their current values and descriptions
     pub fn entries(&self) -> Vec<ConfigEntry> {
-        vec![
-            ConfigEntry {
-                key: "datafusion.runtime.memory_limit".to_string(),
-                value: None, // Default is system-dependent
-                description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.max_temp_directory_size".to_string(),
-                value: Some("100G".to_string()),
-                description: "Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.temp_directory".to_string(),
-                value: None, // Default is system-dependent
-                description: "The path to the temporary file directory.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.metadata_cache_limit".to_string(),
-                value: Some("50M".to_owned()),
-                description: "Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
-            }
-        ]
+        create_runtime_config_entries(
+            None,
+            Some("100G".to_string()),
+            None,
+            Some("50M".to_owned()),
+            Some("1M".to_owned()),
+            None,
+        )
     }
 
     /// Generate documentation that can be included in the user guide
diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs
index c2a6cfe2c833f..38f31cf4629eb 100644
--- a/datafusion/execution/src/task.rs
+++ b/datafusion/execution/src/task.rs
@@ -19,7 +19,7 @@ use crate::{
     config::SessionConfig, memory_pool::MemoryPool, registry::FunctionRegistry,
     runtime_env::RuntimeEnv,
 };
-use datafusion_common::{internal_datafusion_err, plan_datafusion_err, Result};
+use datafusion_common::{Result, internal_datafusion_err, plan_datafusion_err};
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 use std::collections::HashSet;
@@ -211,6 +211,11 @@ impl FunctionRegistry for TaskContext {
     }
 }
 
+/// Produce the [`TaskContext`].
+pub trait TaskContextProvider {
+    fn task_ctx(&self) -> Arc<TaskContext>;
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml
index 0c4fa2c211cf1..5ee46b454e791 100644
--- a/datafusion/expr-common/Cargo.toml
+++ b/datafusion/expr-common/Cargo.toml
@@ -45,4 +45,4 @@ arrow = { workspace = true }
 datafusion-common = { workspace = true }
 indexmap = { workspace = true }
 itertools = { workspace = true }
-paste = "^1.0"
+paste = { workspace = true }
diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
index 2829a9416f033..fc4e90114beea 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -18,7 +18,7 @@
 //! Accumulator module contains the trait definition for aggregation function's accumulators.
 
 use arrow::array::ArrayRef;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use std::fmt::Debug;
 
 /// Tracks an aggregate function's state.
diff --git a/datafusion/expr-common/src/casts.rs b/datafusion/expr-common/src/casts.rs
index 8939ff1371bb9..dc0bd74b1f286 100644
--- a/datafusion/expr-common/src/casts.rs
+++ b/datafusion/expr-common/src/casts.rs
@@ -24,10 +24,9 @@
 use std::cmp::Ordering;
 
 use arrow::datatypes::{
-    DataType, TimeUnit, MAX_DECIMAL128_FOR_EACH_PRECISION,
-    MAX_DECIMAL32_FOR_EACH_PRECISION, MAX_DECIMAL64_FOR_EACH_PRECISION,
-    MIN_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL32_FOR_EACH_PRECISION,
-    MIN_DECIMAL64_FOR_EACH_PRECISION,
+    DataType, MAX_DECIMAL32_FOR_EACH_PRECISION, MAX_DECIMAL64_FOR_EACH_PRECISION,
+    MAX_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL32_FOR_EACH_PRECISION,
+    MIN_DECIMAL64_FOR_EACH_PRECISION, MIN_DECIMAL128_FOR_EACH_PRECISION, TimeUnit,
 };
 use arrow::temporal_conversions::{MICROSECONDS, MILLISECONDS, NANOSECONDS};
 use datafusion_common::ScalarValue;
@@ -382,7 +381,7 @@ fn try_cast_binary(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::compute::{cast_with_options, CastOptions};
+    use arrow::compute::{CastOptions, cast_with_options};
     use arrow::datatypes::{Field, Fields, TimeUnit};
     use std::sync::Arc;
 
diff --git a/datafusion/expr-common/src/columnar_value.rs b/datafusion/expr-common/src/columnar_value.rs
index a21ad5bbbcc30..99c21d4abdb6e 100644
--- a/datafusion/expr-common/src/columnar_value.rs
+++ b/datafusion/expr-common/src/columnar_value.rs
@@ -17,12 +17,19 @@
 
 //! [`ColumnarValue`] represents the result of evaluating an expression.
 
-use arrow::array::{Array, ArrayRef, NullArray};
-use arrow::compute::{kernels, CastOptions};
-use arrow::datatypes::DataType;
-use arrow::util::pretty::pretty_format_columns;
-use datafusion_common::format::DEFAULT_CAST_OPTIONS;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use arrow::{
+    array::{Array, ArrayRef, Date32Array, Date64Array, NullArray},
+    compute::{CastOptions, kernels, max, min},
+    datatypes::DataType,
+    util::pretty::pretty_format_columns,
+};
+use datafusion_common::internal_datafusion_err;
+use datafusion_common::{
+    Result, ScalarValue,
+    format::DEFAULT_CAST_OPTIONS,
+    internal_err,
+    scalar::{date_to_timestamp_multiplier, ensure_timestamp_in_bounds},
+};
 use std::fmt;
 use std::sync::Arc;
 
@@ -113,10 +120,12 @@ impl ColumnarValue {
         }
     }
 
-    /// Convert a columnar value into an Arrow [`ArrayRef`] with the specified
-    /// number of rows. [`Self::Scalar`] is converted by repeating the same
-    /// scalar multiple times which is not as efficient as handling the scalar
-    /// directly.
+    /// Convert any [`Self::Scalar`] into an Arrow [`ArrayRef`] with the specified
+    /// number of rows  by repeating the same scalar multiple times,
+    /// which is not as efficient as handling the scalar directly.
+    /// [`Self::Array`] will just be returned as is.
+    ///
+    /// See [`Self::into_array_of_size`] if you need to validate the length of the output array.
     ///
     /// See [`Self::values_to_arrays`] to convert multiple columnar values into
     /// arrays of the same length.
@@ -135,6 +144,38 @@ impl ColumnarValue {
     /// number of rows. [`Self::Scalar`] is converted by repeating the same
     /// scalar multiple times which is not as efficient as handling the scalar
     /// directly.
+    /// This validates that if this is [`Self::Array`], it has the expected length.
+    ///
+    /// See [`Self::values_to_arrays`] to convert multiple columnar values into
+    /// arrays of the same length.
+    ///
+    /// # Errors
+    ///
+    /// Errors if `self` is a Scalar that fails to be converted into an array of size or
+    /// if the array length does not match the expected length
+    pub fn into_array_of_size(self, num_rows: usize) -> Result<ArrayRef> {
+        match self {
+            ColumnarValue::Array(array) => {
+                if array.len() == num_rows {
+                    Ok(array)
+                } else {
+                    internal_err!(
+                        "Array length {} does not match expected length {}",
+                        array.len(),
+                        num_rows
+                    )
+                }
+            }
+            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(num_rows),
+        }
+    }
+
+    /// Convert any [`Self::Scalar`] into an Arrow [`ArrayRef`] with the specified
+    /// number of rows  by repeating the same scalar multiple times,
+    /// which is not as efficient as handling the scalar directly.
+    /// [`Self::Array`] will just be returned as is.
+    ///
+    /// See [`Self::to_array_of_size`] if you need to validate the length of the output array.
     ///
     /// See [`Self::values_to_arrays`] to convert multiple columnar values into
     /// arrays of the same length.
@@ -149,6 +190,36 @@ impl ColumnarValue {
         })
     }
 
+    /// Convert a columnar value into an Arrow [`ArrayRef`] with the specified
+    /// number of rows. [`Self::Scalar`] is converted by repeating the same
+    /// scalar multiple times which is not as efficient as handling the scalar
+    /// directly.
+    /// This validates that if this is [`Self::Array`], it has the expected length.
+    ///
+    /// See [`Self::values_to_arrays`] to convert multiple columnar values into
+    /// arrays of the same length.
+    ///
+    /// # Errors
+    ///
+    /// Errors if `self` is a Scalar that fails to be converted into an array of size or
+    /// if the array length does not match the expected length
+    pub fn to_array_of_size(&self, num_rows: usize) -> Result<ArrayRef> {
+        match self {
+            ColumnarValue::Array(array) => {
+                if array.len() == num_rows {
+                    Ok(Arc::clone(array))
+                } else {
+                    internal_err!(
+                        "Array length {} does not match expected length {}",
+                        array.len(),
+                        num_rows
+                    )
+                }
+            }
+            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(num_rows),
+        }
+    }
+
     /// Null columnar values are implemented as a null array in order to pass batch
     /// num_rows
     pub fn create_null_array(num_rows: usize) -> Self {
@@ -183,7 +254,8 @@ impl ColumnarValue {
                         Some(array_len)
                     } else {
                         return internal_err!(
-                            "Arguments has mixed length. Expected length: {array_len}, found length: {}", a.len()
+                            "Arguments has mixed length. Expected length: {array_len}, found length: {}",
+                            a.len()
                         );
                     }
                 }
@@ -210,9 +282,14 @@ impl ColumnarValue {
     ) -> Result<ColumnarValue> {
         let cast_options = cast_options.cloned().unwrap_or(DEFAULT_CAST_OPTIONS);
         match self {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(
-                kernels::cast::cast_with_options(array, cast_type, &cast_options)?,
-            )),
+            ColumnarValue::Array(array) => {
+                ensure_date_array_timestamp_bounds(array, cast_type)?;
+                Ok(ColumnarValue::Array(kernels::cast::cast_with_options(
+                    array,
+                    cast_type,
+                    &cast_options,
+                )?))
+            }
             ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
                 scalar.cast_to_with_options(cast_type, &cast_options)?,
             )),
@@ -220,6 +297,59 @@ impl ColumnarValue {
     }
 }
 
+fn ensure_date_array_timestamp_bounds(
+    array: &ArrayRef,
+    cast_type: &DataType,
+) -> Result<()> {
+    let source_type = array.data_type().clone();
+    let Some(multiplier) = date_to_timestamp_multiplier(&source_type, cast_type) else {
+        return Ok(());
+    };
+
+    if multiplier <= 1 {
+        return Ok(());
+    }
+
+    // Use compute kernels to find min/max instead of iterating all elements
+    let (min_val, max_val): (Option<i64>, Option<i64>) = match &source_type {
+        DataType::Date32 => {
+            let arr = array
+                .as_any()
+                .downcast_ref::<Date32Array>()
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Expected Date32Array but found {}",
+                        array.data_type()
+                    )
+                })?;
+            (min(arr).map(|v| v as i64), max(arr).map(|v| v as i64))
+        }
+        DataType::Date64 => {
+            let arr = array
+                .as_any()
+                .downcast_ref::<Date64Array>()
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Expected Date64Array but found {}",
+                        array.data_type()
+                    )
+                })?;
+            (min(arr), max(arr))
+        }
+        _ => return Ok(()), // Not a date type, nothing to do
+    };
+
+    // Only validate the min and max values instead of all elements
+    if let Some(min) = min_val {
+        ensure_timestamp_in_bounds(min, multiplier, &source_type, cast_type)?;
+    }
+    if let Some(max) = max_val {
+        ensure_timestamp_in_bounds(max, multiplier, &source_type, cast_type)?;
+    }
+
+    Ok(())
+}
+
 // Implement Display trait for ColumnarValue
 impl fmt::Display for ColumnarValue {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -247,7 +377,38 @@ impl fmt::Display for ColumnarValue {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::Int32Array;
+    use arrow::{
+        array::{Date64Array, Int32Array},
+        datatypes::TimeUnit,
+    };
+
+    #[test]
+    fn into_array_of_size() {
+        // Array case
+        let arr = make_array(1, 3);
+        let arr_columnar_value = ColumnarValue::Array(Arc::clone(&arr));
+        assert_eq!(&arr_columnar_value.into_array_of_size(3).unwrap(), &arr);
+
+        // Scalar case
+        let scalar_columnar_value = ColumnarValue::Scalar(ScalarValue::Int32(Some(42)));
+        let expected_array = make_array(42, 100);
+        assert_eq!(
+            &scalar_columnar_value.into_array_of_size(100).unwrap(),
+            &expected_array
+        );
+
+        // Array case with wrong size
+        let arr = make_array(1, 3);
+        let arr_columnar_value = ColumnarValue::Array(Arc::clone(&arr));
+        let result = arr_columnar_value.into_array_of_size(5);
+        let err = result.unwrap_err();
+        assert!(
+            err.to_string().starts_with(
+                "Internal error: Array length 3 does not match expected length 5"
+            ),
+            "Found: {err}"
+        );
+    }
 
     #[test]
     fn values_to_arrays() {
@@ -391,4 +552,19 @@ mod tests {
             )
         );
     }
+
+    #[test]
+    fn cast_date64_array_to_timestamp_overflow() {
+        let overflow_value = i64::MAX / 1_000_000 + 1;
+        let array: ArrayRef = Arc::new(Date64Array::from(vec![Some(overflow_value)]));
+        let value = ColumnarValue::Array(array);
+        let result =
+            value.cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None), None);
+        let err = result.expect_err("expected overflow to be detected");
+        assert!(
+            err.to_string()
+                .contains("converted value exceeds the representable i64 range"),
+            "unexpected error: {err}"
+        );
+    }
 }
diff --git a/datafusion/expr-common/src/dyn_eq.rs b/datafusion/expr-common/src/dyn_eq.rs
index e0ebcae4879d6..75d9c06d67f56 100644
--- a/datafusion/expr-common/src/dyn_eq.rs
+++ b/datafusion/expr-common/src/dyn_eq.rs
@@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
 ///
 /// Note: This trait should not be implemented directly. Implement `Eq` and `Any` and use
 /// the blanket implementation.
-#[allow(private_bounds)]
+#[expect(private_bounds)]
 pub trait DynEq: private::EqSealed {
     fn dyn_eq(&self, other: &dyn Any) -> bool;
 }
@@ -45,7 +45,7 @@ impl<T: Eq + Any> DynEq for T {
 ///
 /// Note: This trait should not be implemented directly. Implement `Hash` and `Any` and use
 /// the blanket implementation.
-#[allow(private_bounds)]
+#[expect(private_bounds)]
 pub trait DynHash: private::HashSealed {
     fn dyn_hash(&self, _state: &mut dyn Hasher);
 }
diff --git a/datafusion/expr-common/src/groups_accumulator.rs b/datafusion/expr-common/src/groups_accumulator.rs
index 9bcc1edff8824..860e69245a7fd 100644
--- a/datafusion/expr-common/src/groups_accumulator.rs
+++ b/datafusion/expr-common/src/groups_accumulator.rs
@@ -18,7 +18,7 @@
 //! Vectorized [`GroupsAccumulator`]
 
 use arrow::array::{ArrayRef, BooleanArray};
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 
 /// Describes how many rows should be emitted during grouping.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs
index 7515b59b9221b..f93ef3b79595b 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -22,16 +22,19 @@ use std::fmt::{self, Display, Formatter};
 use std::ops::{AddAssign, SubAssign};
 
 use crate::operator::Operator;
-use crate::type_coercion::binary::{comparison_coercion_numeric, BinaryTypeCoercer};
+use crate::type_coercion::binary::{BinaryTypeCoercer, comparison_coercion_numeric};
 
-use arrow::compute::{cast_with_options, CastOptions};
+use arrow::compute::{CastOptions, cast_with_options};
 use arrow::datatypes::{
-    DataType, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit,
+    DataType, IntervalDayTime, IntervalMonthDayNano, IntervalUnit,
     MAX_DECIMAL128_FOR_EACH_PRECISION, MAX_DECIMAL256_FOR_EACH_PRECISION,
-    MIN_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL256_FOR_EACH_PRECISION,
+    MIN_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL256_FOR_EACH_PRECISION, TimeUnit,
 };
 use datafusion_common::rounding::{alter_fp_rounding_mode, next_down, next_up};
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{
+    DataFusionError, Result, ScalarValue, assert_eq_or_internal_err,
+    assert_or_internal_err, internal_err,
+};
 
 macro_rules! get_extreme_value {
     ($extreme:ident, $value:expr) => {
@@ -266,24 +269,23 @@ impl Interval {
     ///   - Floating-point endpoints with `NaN`, `INF`, or `NEG_INF` are converted
     ///     to `NULL`s.
     pub fn try_new(lower: ScalarValue, upper: ScalarValue) -> Result<Self> {
-        if lower.data_type() != upper.data_type() {
-            return internal_err!("Endpoints of an Interval should have the same type");
-        }
+        assert_eq_or_internal_err!(
+            lower.data_type(),
+            upper.data_type(),
+            "Endpoints of an Interval should have the same type"
+        );
 
         let interval = Self::new(lower, upper);
 
-        if interval.lower.is_null()
-            || interval.upper.is_null()
-            || interval.lower <= interval.upper
-        {
-            Ok(interval)
-        } else {
-            internal_err!(
-                "Interval's lower bound {} is greater than the upper bound {}",
-                interval.lower,
-                interval.upper
-            )
-        }
+        assert_or_internal_err!(
+            interval.lower.is_null()
+                || interval.upper.is_null()
+                || interval.lower <= interval.upper,
+            "Interval's lower bound {} is greater than the upper bound {}",
+            interval.lower,
+            interval.upper
+        );
+        Ok(interval)
     }
 
     /// Only for internal usage. Responsible for standardizing booleans and
@@ -430,21 +432,33 @@ impl Interval {
         )
     }
 
-    pub const CERTAINLY_FALSE: Self = Self {
+    /// An interval containing only the 'false' truth value.
+    pub const FALSE: Self = Self {
         lower: ScalarValue::Boolean(Some(false)),
         upper: ScalarValue::Boolean(Some(false)),
     };
 
-    pub const UNCERTAIN: Self = Self {
+    #[deprecated(since = "52.0.0", note = "Use `FALSE` instead")]
+    pub const CERTAINLY_FALSE: Self = Self::FALSE;
+
+    /// An interval containing both the 'true', and 'false' truth values.
+    pub const TRUE_OR_FALSE: Self = Self {
         lower: ScalarValue::Boolean(Some(false)),
         upper: ScalarValue::Boolean(Some(true)),
     };
 
-    pub const CERTAINLY_TRUE: Self = Self {
+    #[deprecated(since = "52.0.0", note = "Use `TRUE_OR_FALSE` instead")]
+    pub const UNCERTAIN: Self = Self::TRUE_OR_FALSE;
+
+    /// An interval containing only the 'true' truth value.
+    pub const TRUE: Self = Self {
         lower: ScalarValue::Boolean(Some(true)),
         upper: ScalarValue::Boolean(Some(true)),
     };
 
+    #[deprecated(since = "52.0.0", note = "Use `TRUE` instead")]
+    pub const CERTAINLY_TRUE: Self = Self::TRUE;
+
     /// Decide if this interval is certainly greater than, possibly greater than,
     /// or can't be greater than `other` by returning `[true, true]`,
     /// `[false, true]` or `[false, false]` respectively.
@@ -454,27 +468,28 @@ impl Interval {
     ///       to an error.
     pub fn gt<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        if self.data_type().ne(&rhs.data_type()) {
-            internal_err!(
-                "Only intervals with the same data type are comparable, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            )
-        } else if !(self.upper.is_null() || rhs.lower.is_null())
-            && self.upper <= rhs.lower
-        {
+        let lhs_type = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            lhs_type,
+            rhs_type,
+            "Only intervals with the same data type are comparable, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
+        if !(self.upper.is_null() || rhs.lower.is_null()) && self.upper <= rhs.lower {
             // Values in this interval are certainly less than or equal to
             // those in the given interval.
-            Ok(Self::CERTAINLY_FALSE)
+            Ok(Self::FALSE)
         } else if !(self.lower.is_null() || rhs.upper.is_null())
             && (self.lower > rhs.upper)
         {
             // Values in this interval are certainly greater than those in the
             // given interval.
-            Ok(Self::CERTAINLY_TRUE)
+            Ok(Self::TRUE)
         } else {
             // All outcomes are possible.
-            Ok(Self::UNCERTAIN)
+            Ok(Self::TRUE_OR_FALSE)
         }
     }
 
@@ -487,27 +502,28 @@ impl Interval {
     ///       to an error.
     pub fn gt_eq<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        if self.data_type().ne(&rhs.data_type()) {
-            internal_err!(
-                "Only intervals with the same data type are comparable, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            )
-        } else if !(self.lower.is_null() || rhs.upper.is_null())
-            && self.lower >= rhs.upper
-        {
+        let lhs_type = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            lhs_type,
+            rhs_type,
+            "Only intervals with the same data type are comparable, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
+        if !(self.lower.is_null() || rhs.upper.is_null()) && self.lower >= rhs.upper {
             // Values in this interval are certainly greater than or equal to
             // those in the given interval.
-            Ok(Self::CERTAINLY_TRUE)
+            Ok(Self::TRUE)
         } else if !(self.upper.is_null() || rhs.lower.is_null())
             && (self.upper < rhs.lower)
         {
             // Values in this interval are certainly less than those in the
             // given interval.
-            Ok(Self::CERTAINLY_FALSE)
+            Ok(Self::FALSE)
         } else {
             // All outcomes are possible.
-            Ok(Self::UNCERTAIN)
+            Ok(Self::TRUE_OR_FALSE)
         }
     }
 
@@ -542,25 +558,26 @@ impl Interval {
     ///       to an error.
     pub fn equal<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        if BinaryTypeCoercer::new(&self.data_type(), &Operator::Eq, &rhs.data_type())
-            .get_result_type()
-            .is_err()
-        {
-            internal_err!(
-                "Interval data types must be compatible for equality checks, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            )
-        } else if !self.lower.is_null()
+        let types_compatible =
+            BinaryTypeCoercer::new(&self.data_type(), &Operator::Eq, &rhs.data_type())
+                .get_result_type()
+                .is_ok();
+        assert_or_internal_err!(
+            types_compatible,
+            "Interval data types must be compatible for equality checks, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
+        if !self.lower.is_null()
             && (self.lower == self.upper)
             && (rhs.lower == rhs.upper)
             && (self.lower == rhs.lower)
         {
-            Ok(Self::CERTAINLY_TRUE)
+            Ok(Self::TRUE)
         } else if self.intersect(rhs)?.is_none() {
-            Ok(Self::CERTAINLY_FALSE)
+            Ok(Self::FALSE)
         } else {
-            Ok(Self::UNCERTAIN)
+            Ok(Self::TRUE_OR_FALSE)
         }
     }
 
@@ -584,8 +601,8 @@ impl Interval {
                 })
             }
 
-            // Return UNCERTAIN when intervals don't have concrete boolean bounds
-            _ => Ok(Self::UNCERTAIN),
+            // Return TRUE_OR_FALSE when intervals don't have concrete boolean bounds
+            _ => Ok(Self::TRUE_OR_FALSE),
         }
     }
 
@@ -609,21 +626,24 @@ impl Interval {
                 })
             }
 
-            // Return UNCERTAIN when intervals don't have concrete boolean bounds
-            _ => Ok(Self::UNCERTAIN),
+            // Return TRUE_OR_FALSE when intervals don't have concrete boolean bounds
+            _ => Ok(Self::TRUE_OR_FALSE),
         }
     }
 
     /// Compute the logical negation of this (boolean) interval.
     pub fn not(&self) -> Result<Self> {
-        if self.data_type().ne(&DataType::Boolean) {
-            internal_err!("Cannot apply logical negation to a non-boolean interval")
-        } else if self == &Self::CERTAINLY_TRUE {
-            Ok(Self::CERTAINLY_FALSE)
-        } else if self == &Self::CERTAINLY_FALSE {
-            Ok(Self::CERTAINLY_TRUE)
+        assert_eq_or_internal_err!(
+            self.data_type(),
+            DataType::Boolean,
+            "Cannot apply logical negation to a non-boolean interval"
+        );
+        if self == &Self::TRUE {
+            Ok(Self::FALSE)
+        } else if self == &Self::FALSE {
+            Ok(Self::TRUE)
         } else {
-            Ok(Self::UNCERTAIN)
+            Ok(Self::TRUE_OR_FALSE)
         }
     }
 
@@ -635,13 +655,15 @@ impl Interval {
     ///       to an error.
     pub fn intersect<T: Borrow<Self>>(&self, other: T) -> Result<Option<Self>> {
         let rhs = other.borrow();
-        if self.data_type().ne(&rhs.data_type()) {
-            return internal_err!(
-                "Only intervals with the same data type are intersectable, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            );
-        };
+        let lhs_type = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            lhs_type,
+            rhs_type,
+            "Only intervals with the same data type are intersectable, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
 
         // If it is evident that the result is an empty interval, short-circuit
         // and directly return `None`.
@@ -670,13 +692,15 @@ impl Interval {
     ///       to an error.
     pub fn union<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        if self.data_type().ne(&rhs.data_type()) {
-            return internal_err!(
-                "Cannot calculate the union of intervals with different data types, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            );
-        };
+        let lhs_type = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            lhs_type,
+            rhs_type,
+            "Cannot calculate the union of intervals with different data types, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
 
         let lower = if self.lower.is_null()
             || (!rhs.lower.is_null() && self.lower <= rhs.lower)
@@ -706,27 +730,28 @@ impl Interval {
     pub fn contains_value<T: Borrow<ScalarValue>>(&self, other: T) -> Result<bool> {
         let rhs = other.borrow();
 
-        let (lhs_lower, lhs_upper, rhs) = if self.data_type().eq(&rhs.data_type()) {
-            (&self.lower, &self.upper, rhs)
-        } else if let Some(common_type) =
-            comparison_coercion_numeric(&self.data_type(), &rhs.data_type())
-        {
-            (
-                &self.lower.cast_to(&common_type)?,
-                &self.upper.cast_to(&common_type)?,
-                &rhs.cast_to(&common_type)?,
-            )
+        let (lhs_lower, lhs_upper, rhs_value) = if self.data_type().eq(&rhs.data_type()) {
+            (self.lower.clone(), self.upper.clone(), rhs.clone())
         } else {
-            return internal_err!(
+            let maybe_common_type =
+                comparison_coercion_numeric(&self.data_type(), &rhs.data_type());
+            assert_or_internal_err!(
+                maybe_common_type.is_some(),
                 "Data types must be compatible for containment checks, lhs:{}, rhs:{}",
                 self.data_type(),
                 rhs.data_type()
             );
+            let common_type = maybe_common_type.expect("checked for Some");
+            (
+                self.lower.cast_to(&common_type)?,
+                self.upper.cast_to(&common_type)?,
+                rhs.cast_to(&common_type)?,
+            )
         };
 
         // We only check the upper bound for a `None` value because `None`
         // values are less than `Some` values according to Rust.
-        Ok(lhs_lower <= rhs && (lhs_upper.is_null() || rhs <= lhs_upper))
+        Ok(lhs_lower <= rhs_value && (lhs_upper.is_null() || rhs_value <= lhs_upper))
     }
 
     /// Decide if this interval is a superset of, overlaps with, or
@@ -738,23 +763,25 @@ impl Interval {
     ///       to an error.
     pub fn contains<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        if self.data_type().ne(&rhs.data_type()) {
-            return internal_err!(
-                "Interval data types must match for containment checks, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            );
-        };
+        let lhs_type = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            lhs_type,
+            rhs_type,
+            "Interval data types must match for containment checks, lhs:{}, rhs:{}",
+            self.data_type(),
+            rhs.data_type()
+        );
 
         match self.intersect(rhs)? {
             Some(intersection) => {
                 if &intersection == rhs {
-                    Ok(Self::CERTAINLY_TRUE)
+                    Ok(Self::TRUE)
                 } else {
-                    Ok(Self::UNCERTAIN)
+                    Ok(Self::TRUE_OR_FALSE)
                 }
             }
-            None => Ok(Self::CERTAINLY_FALSE),
+            None => Ok(Self::FALSE),
         }
     }
 
@@ -765,8 +792,7 @@ impl Interval {
     ///       Attempting to compare intervals of different data types will lead
     ///       to an error.
     pub fn is_superset(&self, other: &Interval, strict: bool) -> Result<bool> {
-        Ok(!(strict && self.eq(other))
-            && (self.contains(other)? == Interval::CERTAINLY_TRUE))
+        Ok(!(strict && self.eq(other)) && (self.contains(other)? == Interval::TRUE))
     }
 
     /// Add the given interval (`other`) to this interval. Say we have intervals
@@ -813,15 +839,15 @@ impl Interval {
     ///       to an error.
     pub fn mul<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        let dt = if self.data_type().eq(&rhs.data_type()) {
-            self.data_type()
-        } else {
-            return internal_err!(
-                "Intervals must have the same data type for multiplication, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            );
-        };
+        let dt = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            dt.clone(),
+            rhs_type.clone(),
+            "Intervals must have the same data type for multiplication, lhs:{}, rhs:{}",
+            dt.clone(),
+            rhs_type.clone()
+        );
 
         let zero = ScalarValue::new_zero(&dt)?;
 
@@ -832,12 +858,12 @@ impl Interval {
         ) {
             (true, true, false) => mul_helper_multi_zero_inclusive(&dt, self, rhs),
             (true, false, false) => {
-                mul_helper_single_zero_inclusive(&dt, self, rhs, zero)
+                mul_helper_single_zero_inclusive(&dt, self, rhs, &zero)
             }
             (false, true, false) => {
-                mul_helper_single_zero_inclusive(&dt, rhs, self, zero)
+                mul_helper_single_zero_inclusive(&dt, rhs, self, &zero)
             }
-            _ => mul_helper_zero_exclusive(&dt, self, rhs, zero),
+            _ => mul_helper_zero_exclusive(&dt, self, rhs, &zero),
         };
         Ok(result)
     }
@@ -856,15 +882,15 @@ impl Interval {
     ///           zero should result in an interval set, not the universal set.
     pub fn div<T: Borrow<Self>>(&self, other: T) -> Result<Self> {
         let rhs = other.borrow();
-        let dt = if self.data_type().eq(&rhs.data_type()) {
-            self.data_type()
-        } else {
-            return internal_err!(
-                "Intervals must have the same data type for division, lhs:{}, rhs:{}",
-                self.data_type(),
-                rhs.data_type()
-            );
-        };
+        let dt = self.data_type();
+        let rhs_type = rhs.data_type();
+        assert_eq_or_internal_err!(
+            dt.clone(),
+            rhs_type.clone(),
+            "Intervals must have the same data type for division, lhs:{}, rhs:{}",
+            dt.clone(),
+            rhs_type.clone()
+        );
 
         let zero = ScalarValue::new_zero(&dt)?;
         // We want 0 to be approachable from both negative and positive sides.
@@ -875,15 +901,12 @@ impl Interval {
 
         // Exit early with an unbounded interval if zero is strictly inside the
         // right hand side:
-        if rhs.contains(&zero_point)? == Self::CERTAINLY_TRUE && !dt.is_unsigned_integer()
-        {
+        if rhs.contains(&zero_point)? == Self::TRUE && !dt.is_unsigned_integer() {
             Self::make_unbounded(&dt)
         }
         // At this point, we know that only one endpoint of the right hand side
         // can be zero.
-        else if self.contains(&zero_point)? == Self::CERTAINLY_TRUE
-            && !dt.is_unsigned_integer()
-        {
+        else if self.contains(&zero_point)? == Self::TRUE && !dt.is_unsigned_integer() {
             Ok(div_helper_lhs_zero_inclusive(&dt, self, rhs, &zero_point))
         } else {
             Ok(div_helper_zero_exclusive(&dt, self, rhs, &zero_point))
@@ -1317,13 +1340,15 @@ pub fn satisfy_greater(
     right: &Interval,
     strict: bool,
 ) -> Result<Option<(Interval, Interval)>> {
-    if left.data_type().ne(&right.data_type()) {
-        return internal_err!(
-            "Intervals must have the same data type, lhs:{}, rhs:{}",
-            left.data_type(),
-            right.data_type()
-        );
-    }
+    let lhs_type = left.data_type();
+    let rhs_type = right.data_type();
+    assert_eq_or_internal_err!(
+        lhs_type.clone(),
+        rhs_type.clone(),
+        "Intervals must have the same data type, lhs:{}, rhs:{}",
+        lhs_type,
+        rhs_type
+    );
 
     if !left.upper.is_null() && left.upper <= right.lower {
         if !strict && left.upper == right.lower {
@@ -1437,10 +1462,10 @@ fn mul_helper_single_zero_inclusive(
     dt: &DataType,
     lhs: &Interval,
     rhs: &Interval,
-    zero: ScalarValue,
+    zero: &ScalarValue,
 ) -> Interval {
     // With the following interval bounds, there is no possibility to create an invalid interval.
-    if rhs.upper <= zero && !rhs.upper.is_null() {
+    if rhs.upper <= *zero && !rhs.upper.is_null() {
         // <-------=====0=====------->
         // <--======----0------------>
         let lower = mul_bounds::<false>(dt, &lhs.upper, &rhs.lower);
@@ -1489,11 +1514,11 @@ fn mul_helper_zero_exclusive(
     dt: &DataType,
     lhs: &Interval,
     rhs: &Interval,
-    zero: ScalarValue,
+    zero: &ScalarValue,
 ) -> Interval {
     let (lower, upper) = match (
-        lhs.upper <= zero && !lhs.upper.is_null(),
-        rhs.upper <= zero && !rhs.upper.is_null(),
+        lhs.upper <= *zero && !lhs.upper.is_null(),
+        rhs.upper <= *zero && !rhs.upper.is_null(),
     ) {
         // With the following interval bounds, there is no possibility to create an invalid interval.
         (true, true) => (
@@ -1738,6 +1763,44 @@ impl From<ScalarValue> for NullableInterval {
 }
 
 impl NullableInterval {
+    /// An interval containing only the 'false' truth value.
+    /// This interval is semantically equivalent to [Interval::FALSE].
+    pub const FALSE: Self = NullableInterval::NotNull {
+        values: Interval::FALSE,
+    };
+
+    /// An interval containing only the 'true' truth value.
+    /// This interval is semantically equivalent to [Interval::TRUE].
+    pub const TRUE: Self = NullableInterval::NotNull {
+        values: Interval::TRUE,
+    };
+
+    /// An interval containing only the 'unknown' truth value.
+    pub const UNKNOWN: Self = NullableInterval::Null {
+        datatype: DataType::Boolean,
+    };
+
+    /// An interval containing both the 'true', and 'false' truth values.
+    /// This interval is semantically equivalent to [Interval::TRUE_OR_FALSE].
+    pub const TRUE_OR_FALSE: Self = NullableInterval::NotNull {
+        values: Interval::TRUE_OR_FALSE,
+    };
+
+    /// An interval containing both the 'true' and 'unknown' truth values.
+    pub const TRUE_OR_UNKNOWN: Self = NullableInterval::MaybeNull {
+        values: Interval::TRUE,
+    };
+
+    /// An interval containing both the 'false' and 'unknown' truth values.
+    pub const FALSE_OR_UNKNOWN: Self = NullableInterval::MaybeNull {
+        values: Interval::FALSE,
+    };
+
+    /// An interval that contains all possible truth values: 'true', 'false' and 'unknown'.
+    pub const ANY_TRUTH_VALUE: Self = NullableInterval::MaybeNull {
+        values: Interval::TRUE_OR_FALSE,
+    };
+
     /// Get the values interval, or None if this interval is definitely null.
     pub fn values(&self) -> Option<&Interval> {
         match self {
@@ -1756,27 +1819,98 @@ impl NullableInterval {
 
     /// Return true if the value is definitely true (and not null).
     pub fn is_certainly_true(&self) -> bool {
-        match self {
-            Self::Null { .. } | Self::MaybeNull { .. } => false,
-            Self::NotNull { values } => values == &Interval::CERTAINLY_TRUE,
+        self == &Self::TRUE
+    }
+
+    /// Returns the set of possible values after applying the `is true` test on all
+    /// values in this set.
+    /// The resulting set can only contain 'TRUE' and/or 'FALSE', never 'UNKNOWN'.
+    pub fn is_true(&self) -> Result<Self> {
+        let (t, f, u) = self.is_true_false_unknown()?;
+
+        match (t, f, u) {
+            (true, false, false) => Ok(Self::TRUE),
+            (true, _, _) => Ok(Self::TRUE_OR_FALSE),
+            (false, _, _) => Ok(Self::FALSE),
         }
     }
 
     /// Return true if the value is definitely false (and not null).
     pub fn is_certainly_false(&self) -> bool {
-        match self {
-            Self::Null { .. } => false,
-            Self::MaybeNull { .. } => false,
-            Self::NotNull { values } => values == &Interval::CERTAINLY_FALSE,
+        self == &Self::FALSE
+    }
+
+    /// Returns the set of possible values after applying the `is false` test on all
+    /// values in this set.
+    /// The resulting set can only contain 'TRUE' and/or 'FALSE', never 'UNKNOWN'.
+    pub fn is_false(&self) -> Result<Self> {
+        let (t, f, u) = self.is_true_false_unknown()?;
+
+        match (t, f, u) {
+            (false, true, false) => Ok(Self::TRUE),
+            (_, true, _) => Ok(Self::TRUE_OR_FALSE),
+            (_, false, _) => Ok(Self::FALSE),
+        }
+    }
+
+    /// Return true if the value is definitely null (and not true or false).
+    pub fn is_certainly_unknown(&self) -> bool {
+        self == &Self::UNKNOWN
+    }
+
+    /// Returns the set of possible values after applying the `is unknown` test on all
+    /// values in this set.
+    /// The resulting set can only contain 'TRUE' and/or 'FALSE', never 'UNKNOWN'.
+    pub fn is_unknown(&self) -> Result<Self> {
+        let (t, f, u) = self.is_true_false_unknown()?;
+
+        match (t, f, u) {
+            (false, false, true) => Ok(Self::TRUE),
+            (_, _, true) => Ok(Self::TRUE_OR_FALSE),
+            (_, _, false) => Ok(Self::FALSE),
         }
     }
 
-    /// Perform logical negation on a boolean nullable interval.
-    fn not(&self) -> Result<Self> {
+    /// Returns a tuple of booleans indicating if this interval contains the
+    /// true, false, and unknown truth values respectively.
+    fn is_true_false_unknown(&self) -> Result<(bool, bool, bool), DataFusionError> {
+        Ok(match self {
+            NullableInterval::Null { .. } => (false, false, true),
+            NullableInterval::MaybeNull { values } => (
+                values.contains_value(ScalarValue::Boolean(Some(true)))?,
+                values.contains_value(ScalarValue::Boolean(Some(false)))?,
+                true,
+            ),
+            NullableInterval::NotNull { values } => (
+                values.contains_value(ScalarValue::Boolean(Some(true)))?,
+                values.contains_value(ScalarValue::Boolean(Some(false)))?,
+                false,
+            ),
+        })
+    }
+
+    /// Returns an interval representing the set of possible values after applying
+    /// SQL three-valued logical NOT on possible value in this interval.
+    ///
+    /// This method uses the following truth table.
+    ///
+    /// ```text
+    ///  A  | ¬A
+    /// ----|----
+    ///  F  |  T
+    ///  U  |  U
+    ///  T  |  F
+    /// ```
+    pub fn not(&self) -> Result<Self> {
         match self {
-            Self::Null { datatype } => Ok(Self::Null {
-                datatype: datatype.clone(),
-            }),
+            Self::Null { datatype } => {
+                assert_eq_or_internal_err!(
+                    datatype,
+                    &DataType::Boolean,
+                    "Cannot apply logical negation to a non-boolean interval"
+                );
+                Ok(Self::UNKNOWN)
+            }
             Self::MaybeNull { values } => Ok(Self::MaybeNull {
                 values: values.not()?,
             }),
@@ -1786,6 +1920,86 @@ impl NullableInterval {
         }
     }
 
+    /// Returns an interval representing the set of possible values after applying SQL
+    /// three-valued logical AND on each combination of possible values from `self` and `other`.
+    ///
+    /// This method uses the following truth table.
+    ///
+    /// ```text
+    ///       │   B
+    /// A ∧ B ├──────
+    ///       │ F U T
+    /// ──┬───┼──────
+    ///   │ F │ F F F
+    /// A │ U │ F U U
+    ///   │ T │ F U T
+    /// ```
+    pub fn and<T: Borrow<Self>>(&self, rhs: T) -> Result<Self> {
+        if self == &Self::FALSE || rhs.borrow() == &Self::FALSE {
+            return Ok(Self::FALSE);
+        }
+
+        match (self.values(), rhs.borrow().values()) {
+            (Some(l), Some(r)) => {
+                let values = l.and(r)?;
+                match (self, rhs.borrow()) {
+                    (Self::NotNull { .. }, Self::NotNull { .. }) => {
+                        Ok(Self::NotNull { values })
+                    }
+                    _ => Ok(Self::MaybeNull { values }),
+                }
+            }
+            (Some(v), None) | (None, Some(v)) => {
+                if v.contains_value(ScalarValue::Boolean(Some(false)))? {
+                    Ok(Self::FALSE_OR_UNKNOWN)
+                } else {
+                    Ok(Self::UNKNOWN)
+                }
+            }
+            _ => Ok(Self::UNKNOWN),
+        }
+    }
+
+    /// Returns an interval representing the set of possible values after applying SQL three-valued
+    /// logical OR on each combination of possible values from `self` and `other`.
+    ///
+    /// This method uses the following truth table.
+    ///
+    /// ```text
+    ///       │   B
+    /// A ∨ B ├──────
+    ///       │ F U T
+    /// ──┬───┼──────
+    ///   │ F │ F U T
+    /// A │ U │ U U T
+    ///   │ T │ T T T
+    /// ```
+    pub fn or<T: Borrow<Self>>(&self, rhs: T) -> Result<Self> {
+        if self == &Self::TRUE || rhs.borrow() == &Self::TRUE {
+            return Ok(Self::TRUE);
+        }
+
+        match (self.values(), rhs.borrow().values()) {
+            (Some(l), Some(r)) => {
+                let values = l.or(r)?;
+                match (self, rhs.borrow()) {
+                    (Self::NotNull { .. }, Self::NotNull { .. }) => {
+                        Ok(Self::NotNull { values })
+                    }
+                    _ => Ok(Self::MaybeNull { values }),
+                }
+            }
+            (Some(v), None) | (None, Some(v)) => {
+                if v.contains_value(ScalarValue::Boolean(Some(true)))? {
+                    Ok(Self::TRUE_OR_UNKNOWN)
+                } else {
+                    Ok(Self::UNKNOWN)
+                }
+            }
+            _ => Ok(Self::UNKNOWN),
+        }
+    }
+
     /// Apply the given operator to this interval and the given interval.
     ///
     /// # Examples
@@ -1838,7 +2052,7 @@ impl NullableInterval {
     ///     result,
     ///     NullableInterval::NotNull {
     ///         // Uncertain whether inequality is true or false
-    ///         values: Interval::UNCERTAIN,
+    ///         values: Interval::TRUE_OR_FALSE,
     ///     }
     /// );
     /// ```
@@ -1847,7 +2061,7 @@ impl NullableInterval {
             Operator::IsDistinctFrom => {
                 let values = match (self, rhs) {
                     // NULL is distinct from NULL -> False
-                    (Self::Null { .. }, Self::Null { .. }) => Interval::CERTAINLY_FALSE,
+                    (Self::Null { .. }, Self::Null { .. }) => Interval::FALSE,
                     // x is distinct from y -> x != y,
                     // if at least one of them is never null.
                     (Self::NotNull { .. }, _) | (_, Self::NotNull { .. }) => {
@@ -1857,11 +2071,11 @@ impl NullableInterval {
                             (Some(lhs_values), Some(rhs_values)) => {
                                 lhs_values.equal(rhs_values)?.not()?
                             }
-                            (Some(_), None) | (None, Some(_)) => Interval::CERTAINLY_TRUE,
+                            (Some(_), None) | (None, Some(_)) => Interval::TRUE,
                             (None, None) => unreachable!("Null case handled above"),
                         }
                     }
-                    _ => Interval::UNCERTAIN,
+                    _ => Interval::TRUE_OR_FALSE,
                 };
                 // IsDistinctFrom never returns null.
                 Ok(Self::NotNull { values })
@@ -1869,6 +2083,8 @@ impl NullableInterval {
             Operator::IsNotDistinctFrom => self
                 .apply_operator(&Operator::IsDistinctFrom, rhs)
                 .map(|i| i.not())?,
+            Operator::And => self.and(rhs),
+            Operator::Or => self.or(rhs),
             _ => {
                 if let (Some(left_values), Some(right_values)) =
                     (self.values(), rhs.values())
@@ -1918,6 +2134,30 @@ impl NullableInterval {
         }
     }
 
+    /// Determines if this interval contains a [`ScalarValue`] or not.
+    pub fn contains_value<T: Borrow<ScalarValue>>(&self, value: T) -> Result<bool> {
+        match value.borrow() {
+            ScalarValue::Null => match self {
+                NullableInterval::Null { .. } | NullableInterval::MaybeNull { .. } => {
+                    Ok(true)
+                }
+                NullableInterval::NotNull { .. } => Ok(false),
+            },
+            s if s.is_null() => match self {
+                NullableInterval::Null { datatype } => Ok(datatype.eq(&s.data_type())),
+                NullableInterval::MaybeNull { values } => {
+                    Ok(values.data_type().eq(&s.data_type()))
+                }
+                NullableInterval::NotNull { .. } => Ok(false),
+            },
+            s => match self {
+                NullableInterval::Null { .. } => Ok(false),
+                NullableInterval::MaybeNull { values }
+                | NullableInterval::NotNull { values } => values.contains_value(s),
+            },
+        }
+    }
+
     /// If the interval has collapsed to a single value, return that value.
     /// Otherwise, returns `None`.
     ///
@@ -1962,11 +2202,12 @@ impl NullableInterval {
 mod tests {
     use crate::{
         interval_arithmetic::{
-            handle_overflow, next_value, prev_value, satisfy_greater, Interval,
+            Interval, handle_overflow, next_value, prev_value, satisfy_greater,
         },
         operator::Operator,
     };
 
+    use crate::interval_arithmetic::NullableInterval;
     use arrow::datatypes::DataType;
     use datafusion_common::rounding::{next_down, next_up};
     use datafusion_common::{Result, ScalarValue};
@@ -2007,10 +2248,12 @@ mod tests {
             ScalarValue::Float64(Some(1e-6)),
         ];
         values.into_iter().zip(eps).for_each(|(value, eps)| {
-            assert!(next_value(value.clone())
-                .sub(value.clone())
-                .unwrap()
-                .lt(&eps));
+            assert!(
+                next_value(value.clone())
+                    .sub(value.clone())
+                    .unwrap()
+                    .lt(&eps)
+            );
             assert!(value.sub(prev_value(value.clone())).unwrap().lt(&eps));
             assert_ne!(next_value(value.clone()), value);
             assert_ne!(prev_value(value.clone()), value);
@@ -2188,8 +2431,8 @@ mod tests {
             ),
         ];
         for (first, second) in exactly_gt_cases {
-            assert_eq!(first.gt(second.clone())?, Interval::CERTAINLY_TRUE);
-            assert_eq!(second.lt(first)?, Interval::CERTAINLY_TRUE);
+            assert_eq!(first.gt(second.clone())?, Interval::TRUE);
+            assert_eq!(second.lt(first)?, Interval::TRUE);
         }
 
         let possibly_gt_cases = vec![
@@ -2225,8 +2468,8 @@ mod tests {
             ),
         ];
         for (first, second) in possibly_gt_cases {
-            assert_eq!(first.gt(second.clone())?, Interval::UNCERTAIN);
-            assert_eq!(second.lt(first)?, Interval::UNCERTAIN);
+            assert_eq!(first.gt(second.clone())?, Interval::TRUE_OR_FALSE);
+            assert_eq!(second.lt(first)?, Interval::TRUE_OR_FALSE);
         }
 
         let not_gt_cases = vec![
@@ -2262,8 +2505,8 @@ mod tests {
             ),
         ];
         for (first, second) in not_gt_cases {
-            assert_eq!(first.gt(second.clone())?, Interval::CERTAINLY_FALSE);
-            assert_eq!(second.lt(first)?, Interval::CERTAINLY_FALSE);
+            assert_eq!(first.gt(second.clone())?, Interval::FALSE);
+            assert_eq!(second.lt(first)?, Interval::FALSE);
         }
 
         Ok(())
@@ -2308,8 +2551,8 @@ mod tests {
             ),
         ];
         for (first, second) in exactly_gteq_cases {
-            assert_eq!(first.gt_eq(second.clone())?, Interval::CERTAINLY_TRUE);
-            assert_eq!(second.lt_eq(first)?, Interval::CERTAINLY_TRUE);
+            assert_eq!(first.gt_eq(second.clone())?, Interval::TRUE);
+            assert_eq!(second.lt_eq(first)?, Interval::TRUE);
         }
 
         let possibly_gteq_cases = vec![
@@ -2345,8 +2588,8 @@ mod tests {
             ),
         ];
         for (first, second) in possibly_gteq_cases {
-            assert_eq!(first.gt_eq(second.clone())?, Interval::UNCERTAIN);
-            assert_eq!(second.lt_eq(first)?, Interval::UNCERTAIN);
+            assert_eq!(first.gt_eq(second.clone())?, Interval::TRUE_OR_FALSE);
+            assert_eq!(second.lt_eq(first)?, Interval::TRUE_OR_FALSE);
         }
 
         let not_gteq_cases = vec![
@@ -2378,8 +2621,8 @@ mod tests {
             ),
         ];
         for (first, second) in not_gteq_cases {
-            assert_eq!(first.gt_eq(second.clone())?, Interval::CERTAINLY_FALSE);
-            assert_eq!(second.lt_eq(first)?, Interval::CERTAINLY_FALSE);
+            assert_eq!(first.gt_eq(second.clone())?, Interval::FALSE);
+            assert_eq!(second.lt_eq(first)?, Interval::FALSE);
         }
 
         Ok(())
@@ -2406,8 +2649,8 @@ mod tests {
             ),
         ];
         for (first, second) in exactly_eq_cases {
-            assert_eq!(first.equal(second.clone())?, Interval::CERTAINLY_TRUE);
-            assert_eq!(second.equal(first)?, Interval::CERTAINLY_TRUE);
+            assert_eq!(first.equal(second.clone())?, Interval::TRUE);
+            assert_eq!(second.equal(first)?, Interval::TRUE);
         }
 
         let possibly_eq_cases = vec![
@@ -2443,8 +2686,8 @@ mod tests {
             ),
         ];
         for (first, second) in possibly_eq_cases {
-            assert_eq!(first.equal(second.clone())?, Interval::UNCERTAIN);
-            assert_eq!(second.equal(first)?, Interval::UNCERTAIN);
+            assert_eq!(first.equal(second.clone())?, Interval::TRUE_OR_FALSE);
+            assert_eq!(second.equal(first)?, Interval::TRUE_OR_FALSE);
         }
 
         let not_eq_cases = vec![
@@ -2476,8 +2719,8 @@ mod tests {
             ),
         ];
         for (first, second) in not_eq_cases {
-            assert_eq!(first.equal(second.clone())?, Interval::CERTAINLY_FALSE);
-            assert_eq!(second.equal(first)?, Interval::CERTAINLY_FALSE);
+            assert_eq!(first.equal(second.clone())?, Interval::FALSE);
+            assert_eq!(second.equal(first)?, Interval::FALSE);
         }
 
         Ok(())
@@ -2486,95 +2729,178 @@ mod tests {
     #[test]
     fn and_test() -> Result<()> {
         let cases = vec![
-            (false, true, false, false, false, false),
-            (false, false, false, true, false, false),
-            (false, true, false, true, false, true),
-            (false, true, true, true, false, true),
-            (false, false, false, false, false, false),
-            (true, true, true, true, true, true),
+            (Interval::TRUE_OR_FALSE, Interval::FALSE, Interval::FALSE),
+            (
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (Interval::FALSE, Interval::FALSE, Interval::FALSE),
+            (Interval::FALSE, Interval::TRUE_OR_FALSE, Interval::FALSE),
+            (Interval::FALSE, Interval::TRUE, Interval::FALSE),
+            (Interval::TRUE, Interval::FALSE, Interval::FALSE),
+            (
+                Interval::TRUE,
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (Interval::TRUE, Interval::TRUE, Interval::TRUE),
         ];
 
         for case in cases {
             assert_eq!(
-                Interval::make(Some(case.0), Some(case.1))?
-                    .and(Interval::make(Some(case.2), Some(case.3))?)?,
-                Interval::make(Some(case.4), Some(case.5))?
+                case.0.and(&case.1)?,
+                case.2,
+                "Failed for {} AND {}",
+                case.0,
+                case.1
             );
         }
         Ok(())
     }
 
     #[test]
-    fn not_test() -> Result<()> {
+    fn or_test() -> Result<()> {
         let cases = vec![
-            (false, true, false, true),
-            (false, false, true, true),
-            (true, true, false, false),
+            (
+                Interval::TRUE_OR_FALSE,
+                Interval::FALSE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (Interval::TRUE_OR_FALSE, Interval::TRUE, Interval::TRUE),
+            (Interval::FALSE, Interval::FALSE, Interval::FALSE),
+            (
+                Interval::FALSE,
+                Interval::TRUE_OR_FALSE,
+                Interval::TRUE_OR_FALSE,
+            ),
+            (Interval::FALSE, Interval::TRUE, Interval::TRUE),
+            (Interval::TRUE, Interval::FALSE, Interval::TRUE),
+            (Interval::TRUE, Interval::TRUE_OR_FALSE, Interval::TRUE),
+            (Interval::TRUE, Interval::TRUE, Interval::TRUE),
         ];
 
         for case in cases {
             assert_eq!(
-                Interval::make(Some(case.0), Some(case.1))?.not()?,
-                Interval::make(Some(case.2), Some(case.3))?
+                case.0.or(&case.1)?,
+                case.2,
+                "Failed for {} OR {}",
+                case.0,
+                case.1
             );
         }
         Ok(())
     }
 
+    #[test]
+    fn not_test() -> Result<()> {
+        let cases = vec![
+            (Interval::TRUE_OR_FALSE, Interval::TRUE_OR_FALSE),
+            (Interval::FALSE, Interval::TRUE),
+            (Interval::TRUE, Interval::FALSE),
+        ];
+
+        for case in cases {
+            assert_eq!(case.0.not()?, case.1, "Failed for NOT {}", case.0);
+        }
+        Ok(())
+    }
+
     #[test]
     fn test_and_or_with_normalized_boolean_intervals() -> Result<()> {
         // Verify that NULL boolean bounds are normalized and don't cause errors
         let from_nulls =
             Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
 
-        assert!(from_nulls.or(&Interval::CERTAINLY_TRUE).is_ok());
-        assert!(from_nulls.and(&Interval::CERTAINLY_FALSE).is_ok());
+        assert!(from_nulls.or(&Interval::TRUE).is_ok());
+        assert!(from_nulls.and(&Interval::FALSE).is_ok());
 
         Ok(())
     }
 
+    // Tests that there's no such thing as a 'null' boolean interval.
+    // An interval with two `Boolean(None)` boundaries is normalised to `Interval::TRUE_OR_FALSE`.
     #[test]
-    fn test_and_null_boolean_intervals() -> Result<()> {
+    fn test_null_boolean_interval() {
         let null_interval =
-            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
+            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))
+                .unwrap();
 
-        let and_result = null_interval.and(&Interval::CERTAINLY_FALSE)?;
-        assert_eq!(and_result, Interval::CERTAINLY_FALSE);
+        assert_eq!(null_interval, Interval::TRUE_OR_FALSE);
+    }
 
-        let and_result = Interval::CERTAINLY_FALSE.and(&null_interval)?;
-        assert_eq!(and_result, Interval::CERTAINLY_FALSE);
+    // Asserts that `Interval::TRUE_OR_FALSE` represents a set that contains `true`, `false`, and does
+    // not contain `null`.
+    #[test]
+    fn test_uncertain_boolean_interval() {
+        assert!(
+            Interval::TRUE_OR_FALSE
+                .contains_value(ScalarValue::Boolean(Some(true)))
+                .unwrap()
+        );
+        assert!(
+            Interval::TRUE_OR_FALSE
+                .contains_value(ScalarValue::Boolean(Some(false)))
+                .unwrap()
+        );
+        assert!(
+            !Interval::TRUE_OR_FALSE
+                .contains_value(ScalarValue::Boolean(None))
+                .unwrap()
+        );
+        assert!(
+            !Interval::TRUE_OR_FALSE
+                .contains_value(ScalarValue::Null)
+                .unwrap()
+        );
+    }
 
-        let and_result = null_interval.and(&Interval::CERTAINLY_TRUE)?;
-        assert_eq!(and_result, Interval::UNCERTAIN);
+    #[test]
+    fn test_and_uncertain_boolean_intervals() -> Result<()> {
+        let and_result = Interval::TRUE_OR_FALSE.and(&Interval::FALSE)?;
+        assert_eq!(and_result, Interval::FALSE);
+
+        let and_result = Interval::FALSE.and(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(and_result, Interval::FALSE);
 
-        let and_result = Interval::CERTAINLY_TRUE.and(&null_interval)?;
-        assert_eq!(and_result, Interval::UNCERTAIN);
+        let and_result = Interval::TRUE_OR_FALSE.and(&Interval::TRUE)?;
+        assert_eq!(and_result, Interval::TRUE_OR_FALSE);
 
-        let and_result = null_interval.and(&null_interval)?;
-        assert_eq!(and_result, Interval::UNCERTAIN);
+        let and_result = Interval::TRUE.and(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(and_result, Interval::TRUE_OR_FALSE);
+
+        let and_result = Interval::TRUE_OR_FALSE.and(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(and_result, Interval::TRUE_OR_FALSE);
 
         Ok(())
     }
 
     #[test]
-    fn test_or_null_boolean_intervals() -> Result<()> {
-        let null_interval =
-            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
-
-        let or_result = null_interval.or(&Interval::CERTAINLY_FALSE)?;
-        assert_eq!(or_result, Interval::UNCERTAIN);
+    fn test_or_uncertain_boolean_intervals() -> Result<()> {
+        let or_result = Interval::TRUE_OR_FALSE.or(&Interval::FALSE)?;
+        assert_eq!(or_result, Interval::TRUE_OR_FALSE);
 
-        let or_result = Interval::CERTAINLY_FALSE.or(&null_interval)?;
-        assert_eq!(or_result, Interval::UNCERTAIN);
+        let or_result = Interval::FALSE.or(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(or_result, Interval::TRUE_OR_FALSE);
 
-        let or_result = null_interval.or(&Interval::CERTAINLY_TRUE)?;
-        assert_eq!(or_result, Interval::CERTAINLY_TRUE);
+        let or_result = Interval::TRUE_OR_FALSE.or(&Interval::TRUE)?;
+        assert_eq!(or_result, Interval::TRUE);
 
-        let or_result = Interval::CERTAINLY_TRUE.or(&null_interval)?;
-        assert_eq!(or_result, Interval::CERTAINLY_TRUE);
+        let or_result = Interval::TRUE.or(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(or_result, Interval::TRUE);
 
-        let or_result = null_interval.or(&null_interval)?;
-        assert_eq!(or_result, Interval::UNCERTAIN);
+        let or_result = Interval::TRUE_OR_FALSE.or(&Interval::TRUE_OR_FALSE)?;
+        assert_eq!(or_result, Interval::TRUE_OR_FALSE);
 
         Ok(())
     }
@@ -2817,37 +3143,37 @@ mod tests {
             (
                 Interval::make::<i64>(None, None)?,
                 Interval::make::<i64>(None, None)?,
-                Interval::CERTAINLY_TRUE,
+                Interval::TRUE,
             ),
             (
                 Interval::make(Some(1500_i64), Some(2000_i64))?,
                 Interval::make(Some(1501_i64), Some(1999_i64))?,
-                Interval::CERTAINLY_TRUE,
+                Interval::TRUE,
             ),
             (
                 Interval::make(Some(1000_i64), None)?,
                 Interval::make::<i64>(None, None)?,
-                Interval::UNCERTAIN,
+                Interval::TRUE_OR_FALSE,
             ),
             (
                 Interval::make(Some(1000_i64), Some(2000_i64))?,
                 Interval::make(Some(500), Some(1500_i64))?,
-                Interval::UNCERTAIN,
+                Interval::TRUE_OR_FALSE,
             ),
             (
                 Interval::make(Some(16.0), Some(32.0))?,
                 Interval::make(Some(32.0), Some(64.0))?,
-                Interval::UNCERTAIN,
+                Interval::TRUE_OR_FALSE,
             ),
             (
                 Interval::make(Some(1000_i64), None)?,
                 Interval::make(None, Some(0_i64))?,
-                Interval::CERTAINLY_FALSE,
+                Interval::FALSE,
             ),
             (
                 Interval::make(Some(1500_i64), Some(2000_i64))?,
                 Interval::make(Some(1000_i64), Some(1499_i64))?,
-                Interval::CERTAINLY_FALSE,
+                Interval::FALSE,
             ),
             (
                 Interval::try_new(
@@ -2855,7 +3181,7 @@ mod tests {
                     prev_value(ScalarValue::Float32(Some(1.0))),
                 )?,
                 Interval::make(Some(1.0_f32), Some(1.0_f32))?,
-                Interval::CERTAINLY_FALSE,
+                Interval::FALSE,
             ),
             (
                 Interval::try_new(
@@ -2863,7 +3189,7 @@ mod tests {
                     next_value(ScalarValue::Float32(Some(1.0))),
                 )?,
                 Interval::make(Some(1.0_f32), Some(1.0_f32))?,
-                Interval::CERTAINLY_FALSE,
+                Interval::FALSE,
             ),
         ];
         for (first, second, expected) in possible_cases {
@@ -3609,7 +3935,7 @@ mod tests {
         assert_eq!(interval.cardinality().unwrap(), 9178336040581070850);
 
         let interval = Interval::try_new(
-            ScalarValue::UInt64(Some(u64::MIN + 1)),
+            ScalarValue::UInt64(Some(1)),
             ScalarValue::UInt64(Some(u64::MAX)),
         )?;
         assert_eq!(interval.cardinality().unwrap(), u64::MAX);
@@ -4103,4 +4429,331 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn nullable_and_test() -> Result<()> {
+        // Test cases: (lhs, rhs, expected) => lhs AND rhs = expected
+        #[rustfmt::skip]
+        let cases = vec![
+            (NullableInterval::TRUE, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::TRUE, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::TRUE, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::TRUE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::FALSE, NullableInterval::TRUE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE_OR_UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                case.0.apply_operator(&Operator::And, &case.1).unwrap(),
+                case.2,
+                "Failed for {} AND {}",
+                case.0,
+                case.1
+            );
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn nullable_or_test() -> Result<()> {
+        // Test cases: (lhs, rhs, expected) => lhs OR rhs = expected
+        #[rustfmt::skip]
+        let cases = vec![
+            (NullableInterval::TRUE, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::FALSE, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::UNKNOWN, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE),
+            (NullableInterval::TRUE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE),
+            (NullableInterval::FALSE, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::FALSE, NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::FALSE, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::FALSE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::FALSE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE_OR_FALSE, NullableInterval::ANY_TRUTH_VALUE),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                case.0.apply_operator(&Operator::Or, &case.1).unwrap(),
+                case.2,
+                "Failed for {} OR {}",
+                case.0,
+                case.1
+            );
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn nullable_not_test() -> Result<()> {
+        // Test cases: (interval, expected) => NOT interval = expected
+        #[rustfmt::skip]
+        let cases = vec![
+            (NullableInterval::TRUE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::TRUE),
+            (NullableInterval::UNKNOWN, NullableInterval::UNKNOWN),
+            (NullableInterval::TRUE_OR_FALSE,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_UNKNOWN,NullableInterval::FALSE_OR_UNKNOWN),
+            (NullableInterval::FALSE_OR_UNKNOWN,NullableInterval::TRUE_OR_UNKNOWN),
+            (NullableInterval::ANY_TRUTH_VALUE, NullableInterval::ANY_TRUTH_VALUE),
+        ];
+
+        for case in cases {
+            assert_eq!(case.0.not().unwrap(), case.1, "Failed for NOT {}", case.0,);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn nullable_interval_is_certainly_true() {
+        // Test cases: (interval, expected) => interval.is_certainly_true() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, true),
+            (NullableInterval::FALSE, false),
+            (NullableInterval::UNKNOWN, false),
+            (NullableInterval::TRUE_OR_FALSE, false),
+            (NullableInterval::TRUE_OR_UNKNOWN, false),
+            (NullableInterval::FALSE_OR_UNKNOWN, false),
+            (NullableInterval::ANY_TRUTH_VALUE, false),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_certainly_true();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_is_true() {
+        // Test cases: (interval, expected) => interval.is_true() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, NullableInterval::TRUE),
+            (NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::TRUE_OR_FALSE,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_UNKNOWN,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::FALSE_OR_UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::ANY_TRUTH_VALUE,NullableInterval::TRUE_OR_FALSE),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_true().unwrap();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_is_certainly_false() {
+        // Test cases: (interval, expected) => interval.is_certainly_false() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, false),
+            (NullableInterval::FALSE, true),
+            (NullableInterval::UNKNOWN, false),
+            (NullableInterval::TRUE_OR_FALSE, false),
+            (NullableInterval::TRUE_OR_UNKNOWN, false),
+            (NullableInterval::FALSE_OR_UNKNOWN, false),
+            (NullableInterval::ANY_TRUTH_VALUE, false),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_certainly_false();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_is_false() {
+        // Test cases: (interval, expected) => interval.is_false() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::TRUE),
+            (NullableInterval::UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::TRUE_OR_FALSE,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::TRUE_OR_UNKNOWN, NullableInterval::FALSE),
+            (NullableInterval::FALSE_OR_UNKNOWN,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::ANY_TRUTH_VALUE,NullableInterval::TRUE_OR_FALSE),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_false().unwrap();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_is_certainly_unknown() {
+        // Test cases: (interval, expected) => interval.is_certainly_unknown() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, false),
+            (NullableInterval::FALSE, false),
+            (NullableInterval::UNKNOWN, true),
+            (NullableInterval::TRUE_OR_FALSE, false),
+            (NullableInterval::TRUE_OR_UNKNOWN, false),
+            (NullableInterval::FALSE_OR_UNKNOWN, false),
+            (NullableInterval::ANY_TRUTH_VALUE, false),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_certainly_unknown();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_is_unknown() {
+        // Test cases: (interval, expected) => interval.is_unknown() = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, NullableInterval::FALSE),
+            (NullableInterval::FALSE, NullableInterval::FALSE),
+            (NullableInterval::UNKNOWN, NullableInterval::TRUE),
+            (NullableInterval::TRUE_OR_FALSE, NullableInterval::FALSE),
+            (NullableInterval::TRUE_OR_UNKNOWN,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::FALSE_OR_UNKNOWN,NullableInterval::TRUE_OR_FALSE),
+            (NullableInterval::ANY_TRUTH_VALUE,NullableInterval::TRUE_OR_FALSE),
+        ];
+
+        for (interval, expected) in test_cases {
+            let result = interval.is_unknown().unwrap();
+            assert_eq!(result, expected, "Failed for interval: {interval}",);
+        }
+    }
+
+    #[test]
+    fn nullable_interval_contains_value() {
+        // Test cases: (interval, value, expected) => interval.contains_value(value) = expected
+        #[rustfmt::skip]
+        let test_cases = vec![
+            (NullableInterval::TRUE, ScalarValue::Boolean(Some(true)), true),
+            (NullableInterval::TRUE, ScalarValue::Boolean(Some(false)), false),
+            (NullableInterval::TRUE, ScalarValue::Boolean(None), false),
+            (NullableInterval::TRUE, ScalarValue::Null, false),
+            (NullableInterval::TRUE, ScalarValue::UInt32(None), false),
+            (NullableInterval::FALSE, ScalarValue::Boolean(Some(true)), false),
+            (NullableInterval::FALSE, ScalarValue::Boolean(Some(false)), true),
+            (NullableInterval::FALSE, ScalarValue::Boolean(None), false),
+            (NullableInterval::FALSE, ScalarValue::Null, false),
+            (NullableInterval::FALSE, ScalarValue::UInt32(None), false),
+            (NullableInterval::UNKNOWN, ScalarValue::Boolean(Some(true)), false),
+            (NullableInterval::UNKNOWN, ScalarValue::Boolean(Some(false)), false),
+            (NullableInterval::UNKNOWN, ScalarValue::Boolean(None), true),
+            (NullableInterval::UNKNOWN, ScalarValue::Null, true),
+            (NullableInterval::UNKNOWN, ScalarValue::UInt32(None), false),
+            (NullableInterval::TRUE_OR_FALSE, ScalarValue::Boolean(Some(true)), true),
+            (NullableInterval::TRUE_OR_FALSE, ScalarValue::Boolean(Some(false)), true),
+            (NullableInterval::TRUE_OR_FALSE, ScalarValue::Boolean(None), false),
+            (NullableInterval::TRUE_OR_FALSE, ScalarValue::Null, false),
+            (NullableInterval::TRUE_OR_FALSE, ScalarValue::UInt32(None), false),
+            (NullableInterval::TRUE_OR_UNKNOWN, ScalarValue::Boolean(Some(true)), true),
+            (NullableInterval::TRUE_OR_UNKNOWN, ScalarValue::Boolean(Some(false)), false),
+            (NullableInterval::TRUE_OR_UNKNOWN, ScalarValue::Boolean(None), true),
+            (NullableInterval::TRUE_OR_UNKNOWN, ScalarValue::Null, true),
+            (NullableInterval::TRUE_OR_UNKNOWN, ScalarValue::UInt32(None), false),
+            (NullableInterval::FALSE_OR_UNKNOWN, ScalarValue::Boolean(Some(true)), false),
+            (NullableInterval::FALSE_OR_UNKNOWN, ScalarValue::Boolean(Some(false)), true),
+            (NullableInterval::FALSE_OR_UNKNOWN, ScalarValue::Boolean(None), true),
+            (NullableInterval::FALSE_OR_UNKNOWN, ScalarValue::Null, true),
+            (NullableInterval::FALSE_OR_UNKNOWN, ScalarValue::UInt32(None), false),
+            (NullableInterval::ANY_TRUTH_VALUE, ScalarValue::Boolean(Some(true)), true),
+            (NullableInterval::ANY_TRUTH_VALUE, ScalarValue::Boolean(Some(false)), true),
+            (NullableInterval::ANY_TRUTH_VALUE, ScalarValue::Boolean(None), true),
+            (NullableInterval::ANY_TRUTH_VALUE, ScalarValue::Null, true),
+            (NullableInterval::ANY_TRUTH_VALUE, ScalarValue::UInt32(None), false),
+        ];
+
+        for (interval, value, expected) in test_cases {
+            let result = interval.contains_value(value.clone()).unwrap();
+            assert_eq!(
+                result, expected,
+                "Failed for interval: {interval} and value {value:?}",
+            );
+        }
+    }
 }
diff --git a/datafusion/expr-common/src/lib.rs b/datafusion/expr-common/src/lib.rs
index a4f6414a8c51d..2be066beaad24 100644
--- a/datafusion/expr-common/src/lib.rs
+++ b/datafusion/expr-common/src/lib.rs
@@ -31,6 +31,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 pub mod accumulator;
 pub mod casts;
diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs
index 6ee1c4a2a40c6..90bd1415003cd 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -24,7 +24,7 @@ use crate::type_coercion::aggregates::NUMERICS;
 use arrow::datatypes::{DataType, Decimal128Type, DecimalType, IntervalUnit, TimeUnit};
 use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use indexmap::IndexSet;
 use itertools::Itertools;
 
@@ -337,7 +337,7 @@ pub enum TypeSignatureClass {
     Float,
     Decimal,
     Numeric,
-    /// Encompasses both the native Binary as well as arbitrarily sized FixedSizeBinary types
+    /// Encompasses both the native Binary/LargeBinary types as well as arbitrarily sized FixedSizeBinary types
     Binary,
 }
 
@@ -558,9 +558,11 @@ impl TypeSignature {
                 vec![Self::join_types(types, ", ")]
             }
             TypeSignature::Any(arg_count) => {
-                vec![std::iter::repeat_n("Any", *arg_count)
-                    .collect::<Vec<&str>>()
-                    .join(", ")]
+                vec![
+                    std::iter::repeat_n("Any", *arg_count)
+                        .collect::<Vec<&str>>()
+                        .join(", "),
+                ]
             }
             TypeSignature::UserDefined => {
                 vec!["UserDefined".to_string()]
@@ -607,87 +609,103 @@ impl TypeSignature {
         match self {
             TypeSignature::Exact(types) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .zip(types.iter())
-                        .map(|(name, typ)| format!("{name}: {typ}"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .zip(types.iter())
+                            .map(|(name, typ)| format!("{name}: {typ}"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     vec![Self::join_types(types, ", ")]
                 }
             }
             TypeSignature::Any(count) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .take(*count)
-                        .map(|name| format!("{name}: Any"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .take(*count)
+                            .map(|name| format!("{name}: Any"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
-                    vec![std::iter::repeat_n("Any", *count)
-                        .collect::<Vec<&str>>()
-                        .join(", ")]
+                    vec![
+                        std::iter::repeat_n("Any", *count)
+                            .collect::<Vec<&str>>()
+                            .join(", "),
+                    ]
                 }
             }
             TypeSignature::Uniform(count, types) => {
                 if let Some(names) = parameter_names {
                     let type_str = Self::join_types(types, "/");
-                    vec![names
-                        .iter()
-                        .take(*count)
-                        .map(|name| format!("{name}: {type_str}"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .take(*count)
+                            .map(|name| format!("{name}: {type_str}"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     self.to_string_repr()
                 }
             }
             TypeSignature::Coercible(coercions) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .zip(coercions.iter())
-                        .map(|(name, coercion)| format!("{name}: {coercion}"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .zip(coercions.iter())
+                            .map(|(name, coercion)| format!("{name}: {coercion}"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     vec![Self::join_types(coercions, ", ")]
                 }
             }
             TypeSignature::Comparable(count) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .take(*count)
-                        .map(|name| format!("{name}: Comparable"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .take(*count)
+                            .map(|name| format!("{name}: Comparable"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     self.to_string_repr()
                 }
             }
             TypeSignature::Numeric(count) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .take(*count)
-                        .map(|name| format!("{name}: Numeric"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .take(*count)
+                            .map(|name| format!("{name}: Numeric"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     self.to_string_repr()
                 }
             }
             TypeSignature::String(count) => {
                 if let Some(names) = parameter_names {
-                    vec![names
-                        .iter()
-                        .take(*count)
-                        .map(|name| format!("{name}: String"))
-                        .collect::<Vec<_>>()
-                        .join(", ")]
+                    vec![
+                        names
+                            .iter()
+                            .take(*count)
+                            .map(|name| format!("{name}: String"))
+                            .collect::<Vec<_>>()
+                            .join(", "),
+                    ]
                 } else {
                     self.to_string_repr()
                 }
@@ -697,28 +715,34 @@ impl TypeSignature {
                 if let Some(names) = parameter_names {
                     match array_sig {
                         ArrayFunctionSignature::Array { arguments, .. } => {
-                            vec![names
-                                .iter()
-                                .zip(arguments.iter())
-                                .map(|(name, arg_type)| format!("{name}: {arg_type}"))
-                                .collect::<Vec<_>>()
-                                .join(", ")]
+                            vec![
+                                names
+                                    .iter()
+                                    .zip(arguments.iter())
+                                    .map(|(name, arg_type)| format!("{name}: {arg_type}"))
+                                    .collect::<Vec<_>>()
+                                    .join(", "),
+                            ]
                         }
                         ArrayFunctionSignature::RecursiveArray => {
-                            vec![names
-                                .iter()
-                                .take(1)
-                                .map(|name| format!("{name}: recursive_array"))
-                                .collect::<Vec<_>>()
-                                .join(", ")]
+                            vec![
+                                names
+                                    .iter()
+                                    .take(1)
+                                    .map(|name| format!("{name}: recursive_array"))
+                                    .collect::<Vec<_>>()
+                                    .join(", "),
+                            ]
                         }
                         ArrayFunctionSignature::MapArray => {
-                            vec![names
-                                .iter()
-                                .take(1)
-                                .map(|name| format!("{name}: map_array"))
-                                .collect::<Vec<_>>()
-                                .join(", ")]
+                            vec![
+                                names
+                                    .iter()
+                                    .take(1)
+                                    .map(|name| format!("{name}: map_array"))
+                                    .collect::<Vec<_>>()
+                                    .join(", "),
+                            ]
                         }
                     }
                 } else {
@@ -1538,10 +1562,12 @@ mod tests {
         .with_parameter_names(vec!["count".to_string()]); // Only 1 name for 2 args
 
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("does not match signature arity"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("does not match signature arity")
+        );
     }
 
     #[test]
@@ -1553,10 +1579,12 @@ mod tests {
         .with_parameter_names(vec!["count".to_string(), "count".to_string()]);
 
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Duplicate parameter name"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Duplicate parameter name")
+        );
     }
 
     #[test]
@@ -1565,10 +1593,12 @@ mod tests {
             .with_parameter_names(vec!["arg".to_string()]);
 
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("variable arity signature"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("variable arity signature")
+        );
     }
 
     #[test]
diff --git a/datafusion/expr-common/src/statistics.rs b/datafusion/expr-common/src/statistics.rs
index 5c5e397e74e76..6c8cef35b3a71 100644
--- a/datafusion/expr-common/src/statistics.rs
+++ b/datafusion/expr-common/src/statistics.rs
@@ -17,14 +17,17 @@
 
 use std::f64::consts::LN_2;
 
-use crate::interval_arithmetic::{apply_operator, Interval};
+use crate::interval_arithmetic::{Interval, apply_operator};
 use crate::operator::Operator;
 use crate::type_coercion::binary::binary_numeric_coercion;
 
 use arrow::array::ArrowNativeTypeOp;
 use arrow::datatypes::DataType;
 use datafusion_common::rounding::alter_fp_rounding_mode;
-use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{
+    Result, ScalarValue, assert_eq_or_internal_err, assert_ne_or_internal_err,
+    assert_or_internal_err, internal_err, not_impl_err,
+};
 
 /// This object defines probabilistic distributions that encode uncertain
 /// information about a single, scalar value. Currently, we support five core
@@ -159,9 +162,9 @@ impl Distribution {
     /// - A [`Uniform`] distribution's range is simply its interval.
     /// - An [`Exponential`] distribution's range is `[offset, +∞)`.
     /// - A [`Gaussian`] distribution's range is unbounded.
-    /// - A [`Bernoulli`] distribution's range is [`Interval::UNCERTAIN`], if
-    ///   `p` is neither `0` nor `1`. Otherwise, it is [`Interval::CERTAINLY_FALSE`]
-    ///   and [`Interval::CERTAINLY_TRUE`], respectively.
+    /// - A [`Bernoulli`] distribution's range is [`Interval::TRUE_OR_FALSE`], if
+    ///   `p` is neither `0` nor `1`. Otherwise, it is [`Interval::FALSE`]
+    ///   and [`Interval::TRUE`], respectively.
     /// - A [`Generic`] distribution is unbounded by default, but more information
     ///   may be present.
     pub fn range(&self) -> Result<Interval> {
@@ -275,11 +278,11 @@ pub struct GenericDistribution {
 
 impl UniformDistribution {
     fn try_new(interval: Interval) -> Result<Self> {
-        if interval.data_type().eq(&DataType::Boolean) {
-            return internal_err!(
-                "Construction of a boolean `Uniform` distribution is prohibited, create a `Bernoulli` distribution instead."
-            );
-        }
+        assert_ne_or_internal_err!(
+            interval.data_type(),
+            DataType::Boolean,
+            "Construction of a boolean `Uniform` distribution is prohibited, create a `Bernoulli` distribution instead."
+        );
 
         Ok(Self { interval })
     }
@@ -337,21 +340,29 @@ impl ExponentialDistribution {
         positive_tail: bool,
     ) -> Result<Self> {
         let dt = rate.data_type();
-        if offset.data_type() != dt {
-            internal_err!("Rate and offset must have the same data type")
-        } else if offset.is_null() {
-            internal_err!("Offset of an `ExponentialDistribution` cannot be null")
-        } else if rate.is_null() {
-            internal_err!("Rate of an `ExponentialDistribution` cannot be null")
-        } else if rate.le(&ScalarValue::new_zero(&dt)?) {
-            internal_err!("Rate of an `ExponentialDistribution` must be positive")
-        } else {
-            Ok(Self {
-                rate,
-                offset,
-                positive_tail,
-            })
-        }
+        assert_eq_or_internal_err!(
+            offset.data_type(),
+            dt,
+            "Rate and offset must have the same data type"
+        );
+        assert_or_internal_err!(
+            !offset.is_null(),
+            "Offset of an `ExponentialDistribution` cannot be null"
+        );
+        assert_or_internal_err!(
+            !rate.is_null(),
+            "Rate of an `ExponentialDistribution` cannot be null"
+        );
+        let zero = ScalarValue::new_zero(&dt)?;
+        assert_or_internal_err!(
+            !rate.le(&zero),
+            "Rate of an `ExponentialDistribution` must be positive"
+        );
+        Ok(Self {
+            rate,
+            offset,
+            positive_tail,
+        })
     }
 
     pub fn data_type(&self) -> DataType {
@@ -412,15 +423,21 @@ impl ExponentialDistribution {
 impl GaussianDistribution {
     fn try_new(mean: ScalarValue, variance: ScalarValue) -> Result<Self> {
         let dt = mean.data_type();
-        if variance.data_type() != dt {
-            internal_err!("Mean and variance must have the same data type")
-        } else if variance.is_null() {
-            internal_err!("Variance of a `GaussianDistribution` cannot be null")
-        } else if variance.lt(&ScalarValue::new_zero(&dt)?) {
-            internal_err!("Variance of a `GaussianDistribution` must be positive")
-        } else {
-            Ok(Self { mean, variance })
-        }
+        assert_eq_or_internal_err!(
+            variance.data_type(),
+            dt,
+            "Mean and variance must have the same data type"
+        );
+        assert_or_internal_err!(
+            !variance.is_null(),
+            "Variance of a `GaussianDistribution` cannot be null"
+        );
+        let zero = ScalarValue::new_zero(&dt)?;
+        assert_or_internal_err!(
+            !variance.lt(&zero),
+            "Variance of a `GaussianDistribution` must be positive"
+        );
+        Ok(Self { mean, variance })
     }
 
     pub fn data_type(&self) -> DataType {
@@ -447,19 +464,16 @@ impl GaussianDistribution {
 impl BernoulliDistribution {
     fn try_new(p: ScalarValue) -> Result<Self> {
         if p.is_null() {
-            Ok(Self { p })
-        } else {
-            let dt = p.data_type();
-            let zero = ScalarValue::new_zero(&dt)?;
-            let one = ScalarValue::new_one(&dt)?;
-            if p.ge(&zero) && p.le(&one) {
-                Ok(Self { p })
-            } else {
-                internal_err!(
-                    "Success probability of a `BernoulliDistribution` must be in [0, 1]"
-                )
-            }
+            return Ok(Self { p });
         }
+        let dt = p.data_type();
+        let zero = ScalarValue::new_zero(&dt)?;
+        let one = ScalarValue::new_one(&dt)?;
+        assert_or_internal_err!(
+            p.ge(&zero) && p.le(&one),
+            "Success probability of a `BernoulliDistribution` must be in [0, 1]"
+        );
+        Ok(Self { p })
     }
 
     pub fn data_type(&self) -> DataType {
@@ -505,11 +519,11 @@ impl BernoulliDistribution {
         // Unwraps are safe as the constructor guarantees that the data type
         // supports zero and one values.
         if ScalarValue::new_zero(&dt).unwrap().eq(&self.p) {
-            Interval::CERTAINLY_FALSE
+            Interval::FALSE
         } else if ScalarValue::new_one(&dt).unwrap().eq(&self.p) {
-            Interval::CERTAINLY_TRUE
+            Interval::TRUE
         } else {
-            Interval::UNCERTAIN
+            Interval::TRUE_OR_FALSE
         }
     }
 }
@@ -521,11 +535,11 @@ impl GenericDistribution {
         variance: ScalarValue,
         range: Interval,
     ) -> Result<Self> {
-        if range.data_type().eq(&DataType::Boolean) {
-            return internal_err!(
-                "Construction of a boolean `Generic` distribution is prohibited, create a `Bernoulli` distribution instead."
-            );
-        }
+        assert_ne_or_internal_err!(
+            range.data_type(),
+            DataType::Boolean,
+            "Construction of a boolean `Generic` distribution is prohibited, create a `Bernoulli` distribution instead."
+        );
 
         let validate_location = |m: &ScalarValue| -> Result<bool> {
             // Checks whether the given location estimate is within the range.
@@ -536,20 +550,24 @@ impl GenericDistribution {
             }
         };
 
-        if !validate_location(&mean)?
-            || !validate_location(&median)?
-            || (!variance.is_null()
-                && variance.lt(&ScalarValue::new_zero(&variance.data_type())?))
-        {
-            internal_err!("Tried to construct an invalid `GenericDistribution` instance")
+        let locations_valid = validate_location(&mean)? && validate_location(&median)?;
+        let variance_non_negative = if variance.is_null() {
+            true
         } else {
-            Ok(Self {
-                mean,
-                median,
-                variance,
-                range,
-            })
-        }
+            let zero = ScalarValue::new_zero(&variance.data_type())?;
+            !variance.lt(&zero)
+        };
+        assert_or_internal_err!(
+            locations_valid && variance_non_negative,
+            "Tried to construct an invalid `GenericDistribution` instance"
+        );
+
+        Ok(Self {
+            mean,
+            median,
+            variance,
+            range,
+        })
     }
 
     pub fn data_type(&self) -> DataType {
@@ -718,11 +736,11 @@ pub fn create_bernoulli_from_comparison(
     }
     let (li, ri) = (left.range()?, right.range()?);
     let range_evaluation = apply_operator(op, &li, &ri)?;
-    if range_evaluation.eq(&Interval::CERTAINLY_FALSE) {
+    if range_evaluation.eq(&Interval::FALSE) {
         Distribution::new_bernoulli(ScalarValue::from(0.0))
-    } else if range_evaluation.eq(&Interval::CERTAINLY_TRUE) {
+    } else if range_evaluation.eq(&Interval::TRUE) {
         Distribution::new_bernoulli(ScalarValue::from(1.0))
-    } else if range_evaluation.eq(&Interval::UNCERTAIN) {
+    } else if range_evaluation.eq(&Interval::TRUE_OR_FALSE) {
         Distribution::new_bernoulli(ScalarValue::try_from(&DataType::Float64)?)
     } else {
         internal_err!("This function must be called with a comparison operator")
@@ -860,11 +878,11 @@ pub fn compute_variance(
 #[cfg(test)]
 mod tests {
     use super::{
+        BernoulliDistribution, Distribution, GaussianDistribution, UniformDistribution,
         combine_bernoullis, combine_gaussians, compute_mean, compute_median,
         compute_variance, create_bernoulli_from_comparison, new_generic_from_binary_op,
-        BernoulliDistribution, Distribution, GaussianDistribution, UniformDistribution,
     };
-    use crate::interval_arithmetic::{apply_operator, Interval};
+    use crate::interval_arithmetic::{Interval, apply_operator};
     use crate::operator::Operator;
 
     use arrow::datatypes::DataType;
@@ -879,7 +897,7 @@ mod tests {
             })
         );
 
-        assert!(Distribution::new_uniform(Interval::UNCERTAIN).is_err());
+        assert!(Distribution::new_uniform(Interval::TRUE_OR_FALSE).is_err());
         Ok(())
     }
 
@@ -992,7 +1010,7 @@ mod tests {
                     ScalarValue::Null,
                     ScalarValue::Null,
                     ScalarValue::Null,
-                    Interval::UNCERTAIN,
+                    Interval::TRUE_OR_FALSE,
                 ),
                 false,
             ),
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
index 55a8843394b51..01d093950d471 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -18,7 +18,7 @@
 use crate::signature::TypeSignature;
 use arrow::datatypes::{DataType, FieldRef};
 
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 
 // TODO: remove usage of these (INTEGERS and NUMERICS) in favour of signatures
 //       see https://github.com/apache/datafusion/issues/18092
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 122e0f987b6f9..de16e9e01073e 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -22,18 +22,20 @@ use std::sync::Arc;
 
 use crate::operator::Operator;
 
-use arrow::array::{new_empty_array, Array};
+use arrow::array::{Array, new_empty_array};
 use arrow::compute::can_cast_types;
+use arrow::datatypes::IntervalUnit::MonthDayNano;
+use arrow::datatypes::TimeUnit::*;
 use arrow::datatypes::{
-    DataType, Field, FieldRef, Fields, TimeUnit, DECIMAL128_MAX_PRECISION,
-    DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
     DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION,
-    DECIMAL64_MAX_SCALE,
+    DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DataType, Field, FieldRef, Fields,
+    TimeUnit,
 };
 use datafusion_common::types::NativeType;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Diagnostic,
-    Result, Span, Spans,
+    Diagnostic, Result, Span, Spans, exec_err, internal_err, not_impl_err,
+    plan_datafusion_err, plan_err,
 };
 use itertools::Itertools;
 
@@ -184,8 +186,8 @@ impl<'a> BinaryTypeCoercer<'a> {
     }
 
     fn signature_inner(&'a self, lhs: &DataType, rhs: &DataType) -> Result<Signature> {
-        use arrow::datatypes::DataType::*;
         use Operator::*;
+        use arrow::datatypes::DataType::*;
         let result = match self.op {
         Eq |
         NotEq |
@@ -266,6 +268,18 @@ impl<'a> BinaryTypeCoercer<'a> {
                     rhs: rhs.clone(),
                     ret,
                 })
+            } else if let Some((lhs, rhs)) = temporal_math_coercion(lhs, rhs) {
+                // Temporal arithmetic, e.g. Date32 + int64, Timestamp + duration, etc
+                let ret = self.get_result(&lhs, &rhs).map_err(|e| {
+                    plan_datafusion_err!(
+                        "Cannot get result type for temporal operation {} {} {}: {e}", self.lhs, self.op, self.rhs
+                    )
+                })?;
+                Ok(Signature {
+                    lhs,
+                    rhs,
+                    ret,
+                })
             } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) {
                 // Temporal arithmetic by first coercing to a common time representation
                 // e.g. Date32 - Timestamp
@@ -751,7 +765,11 @@ pub fn try_type_union_resolution_with_struct(
             let keys = fields.iter().map(|f| f.name().to_owned()).join(",");
             if let Some(ref k) = keys_string {
                 if *k != keys {
-                    return exec_err!("Expect same keys for struct type but got mismatched pair {} and {}", *k, keys);
+                    return exec_err!(
+                        "Expect same keys for struct type but got mismatched pair {} and {}",
+                        *k,
+                        keys
+                    );
                 }
             } else {
                 keys_string = Some(keys);
@@ -765,7 +783,9 @@ pub fn try_type_union_resolution_with_struct(
     {
         fields.iter().map(|f| f.data_type().to_owned()).collect()
     } else {
-        return internal_err!("Struct type is checked is the previous function, so this should be unreachable");
+        return internal_err!(
+            "Struct type is checked is the previous function, so this should be unreachable"
+        );
     };
 
     for data_type in data_types.iter().skip(1) {
@@ -838,6 +858,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
     }
     binary_numeric_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, true))
+        .or_else(|| ree_comparison_coercion(lhs_type, rhs_type, true))
         .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| list_coercion(lhs_type, rhs_type))
@@ -867,6 +888,7 @@ pub fn comparison_coercion_numeric(
     }
     binary_numeric_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_comparison_coercion_numeric(lhs_type, rhs_type, true))
+        .or_else(|| ree_comparison_coercion_numeric(lhs_type, rhs_type, true))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
         .or_else(|| string_numeric_coercion_as_numeric(lhs_type, rhs_type))
@@ -931,13 +953,13 @@ fn string_temporal_coercion(
                 match temporal {
                     Date32 | Date64 => Some(temporal.clone()),
                     Time32(_) | Time64(_) => {
-                        if is_time_with_valid_unit(temporal.to_owned()) {
+                        if is_time_with_valid_unit(temporal) {
                             Some(temporal.to_owned())
                         } else {
                             None
                         }
                     }
-                    Timestamp(_, tz) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())),
+                    Timestamp(_, tz) => Some(Timestamp(Nanosecond, tz.clone())),
                     _ => None,
                 }
             }
@@ -1423,6 +1445,73 @@ fn dictionary_comparison_coercion_numeric(
     )
 }
 
+/// Coercion rules for RunEndEncoded: the type that both lhs and rhs
+/// can be casted to for the purpose of a computation.
+///
+/// Not all operators support REE, if `preserve_ree` is true
+/// REE will be preserved if possible
+///
+/// The `coerce_fn` parameter determines which comparison coercion function to use
+/// for comparing the REE value types.
+fn ree_comparison_coercion_generic(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_ree: bool,
+    coerce_fn: fn(&DataType, &DataType) -> Option<DataType>,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (RunEndEncoded(_, lhs_values_field), RunEndEncoded(_, rhs_values_field)) => {
+            coerce_fn(lhs_values_field.data_type(), rhs_values_field.data_type())
+        }
+        (ree @ RunEndEncoded(_, values_field), other_type)
+        | (other_type, ree @ RunEndEncoded(_, values_field))
+            if preserve_ree && values_field.data_type() == other_type =>
+        {
+            Some(ree.clone())
+        }
+        (RunEndEncoded(_, values_field), _) => {
+            coerce_fn(values_field.data_type(), rhs_type)
+        }
+        (_, RunEndEncoded(_, values_field)) => {
+            coerce_fn(lhs_type, values_field.data_type())
+        }
+        _ => None,
+    }
+}
+
+/// Coercion rules for RunEndEncoded: the type that both lhs and rhs
+/// can be casted to for the purpose of a computation.
+///
+/// Not all operators support REE, if `preserve_ree` is true
+/// REE will be preserved if possible
+fn ree_comparison_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_ree: bool,
+) -> Option<DataType> {
+    ree_comparison_coercion_generic(lhs_type, rhs_type, preserve_ree, comparison_coercion)
+}
+
+/// Coercion rules for RunEndEncoded with numeric preference: similar to
+/// [`ree_comparison_coercion`] but uses [`comparison_coercion_numeric`]
+/// which prefers numeric types over strings when both are present.
+///
+/// This is used by [`comparison_coercion_numeric`] to maintain consistent
+/// numeric-preferring semantics when dealing with REE types.
+fn ree_comparison_coercion_numeric(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_ree: bool,
+) -> Option<DataType> {
+    ree_comparison_coercion_generic(
+        lhs_type,
+        rhs_type,
+        preserve_ree,
+        comparison_coercion_numeric,
+    )
+}
+
 /// Coercion rules for string concat.
 /// This is a union of string coercion rules and specified rules:
 /// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
@@ -1601,12 +1690,13 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
 }
 
 /// Coercion rules for like operations.
-/// This is a union of string coercion rules and dictionary coercion rules
+/// This is a union of string coercion rules, dictionary coercion rules, and REE coercion rules
 pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     string_coercion(lhs_type, rhs_type)
         .or_else(|| list_coercion(lhs_type, rhs_type))
         .or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
         .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false))
+        .or_else(|| ree_comparison_coercion(lhs_type, rhs_type, false))
         .or_else(|| regex_null_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
 }
@@ -1633,13 +1723,13 @@ pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTy
 /// Checks if the TimeUnit associated with a Time32 or Time64 type is consistent,
 /// as Time32 can only be used to Second and Millisecond accuracy, while Time64
 /// is exclusively used to Microsecond and Nanosecond accuracy
-fn is_time_with_valid_unit(datatype: DataType) -> bool {
+fn is_time_with_valid_unit(datatype: &DataType) -> bool {
     matches!(
         datatype,
-        DataType::Time32(TimeUnit::Second)
-            | DataType::Time32(TimeUnit::Millisecond)
-            | DataType::Time64(TimeUnit::Microsecond)
-            | DataType::Time64(TimeUnit::Nanosecond)
+        &DataType::Time32(Second)
+            | &DataType::Time32(Millisecond)
+            | &DataType::Time64(Microsecond)
+            | &DataType::Time64(Nanosecond)
     )
 }
 
@@ -1725,6 +1815,73 @@ fn temporal_coercion_strict_timezone(
     }
 }
 
+fn temporal_math_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<(DataType, DataType)> {
+    use DataType::*;
+
+    match (lhs_type, rhs_type) {
+        // Coerce Date + int -> Date + Interval
+        (Date32, Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64) => {
+            Some((Date32, Interval(MonthDayNano)))
+        }
+        (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, Date32) => {
+            Some((Interval(MonthDayNano), Date32))
+        }
+        (Date64, Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64) => {
+            Some((Date64, Interval(MonthDayNano)))
+        }
+        (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, Date64) => {
+            Some((Interval(MonthDayNano), Date64))
+        }
+        // Coerce Date + time -> timestamp + Duration
+        (Date32, Time32(_)) => Some((Timestamp(Nanosecond, None), Duration(Nanosecond))),
+        (Time32(_), Date32) => Some((Duration(Nanosecond), Timestamp(Nanosecond, None))),
+
+        (Date32, Time64(_)) => Some((Timestamp(Nanosecond, None), Duration(Nanosecond))),
+        (Time64(_), Date32) => Some((Duration(Nanosecond), Timestamp(Nanosecond, None))),
+
+        (Date64, Time32(_)) => Some((Timestamp(Nanosecond, None), Duration(Nanosecond))),
+        (Time32(_), Date64) => Some((Duration(Nanosecond), Timestamp(Nanosecond, None))),
+
+        (Date64, Time64(_)) => Some((Timestamp(Nanosecond, None), Duration(Nanosecond))),
+        (Time64(_), Date64) => Some((Duration(Nanosecond), Timestamp(Nanosecond, None))),
+
+        // Coerce Duration to match Timestamp's unit,
+        // e.g. Timestamp(ms) + Duration(s) → Timestamp(ms) + Duration(ms)
+        (Timestamp(ts_unit, tz), Duration(_)) => {
+            Some((Timestamp(*ts_unit, tz.clone()), Duration(*ts_unit)))
+        }
+        (Duration(_), Timestamp(ts_unit, tz)) => {
+            Some((Duration(*ts_unit), Timestamp(*ts_unit, tz.clone())))
+        }
+        // time - time -> Interval
+        (Time32(_) | Time64(_), Time32(_) | Time64(_)) => {
+            Some((Interval(MonthDayNano), Interval(MonthDayNano)))
+        }
+        // time + interval -> Interval
+        (Time32(_) | Time64(_), Interval(_)) => {
+            Some((Interval(MonthDayNano), Interval(MonthDayNano)))
+        }
+        (Interval(_), Time32(_) | Time64(_)) => {
+            Some((Interval(MonthDayNano), Interval(MonthDayNano)))
+        }
+        // Interval * number => Interval
+        (
+            Interval(_),
+            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 | Float16
+            | Float32 | Float64,
+        ) => Some((Interval(MonthDayNano), Interval(MonthDayNano))),
+        (
+            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 | Float16
+            | Float32 | Float64,
+            Interval(_),
+        ) => Some((Interval(MonthDayNano), Interval(MonthDayNano))),
+        _ => None,
+    }
+}
+
 fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     use arrow::datatypes::IntervalUnit::*;
@@ -1734,7 +1891,19 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
         (Interval(_) | Duration(_), Interval(_) | Duration(_)) => {
             Some(Interval(MonthDayNano))
         }
+        (Date32, Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64)
+        | (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, Date32) => {
+            Some(Date32)
+        }
+        (Date64, Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64)
+        | (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, Date64) => {
+            Some(Date64)
+        }
         (Date64, Date32) | (Date32, Date64) => Some(Date64),
+        (Date32, Time32(_)) | (Time32(_), Date32) => Some(Timestamp(Nanosecond, None)),
+        (Date32, Time64(_)) | (Time64(_), Date32) => Some(Timestamp(Nanosecond, None)),
+        (Date64, Time32(_)) | (Time32(_), Date64) => Some(Timestamp(Nanosecond, None)),
+        (Date64, Time64(_)) | (Time64(_), Date64) => Some(Timestamp(Nanosecond, None)),
         (Timestamp(_, None), Date64) | (Date64, Timestamp(_, None)) => {
             Some(Timestamp(Nanosecond, None))
         }
diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs
index 63945a4dabd0c..bb9d44953b9f9 100644
--- a/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs
+++ b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs
@@ -25,20 +25,23 @@ fn test_coercion_error() -> Result<()> {
     let result_type = coercer.get_input_types();
 
     let e = result_type.unwrap_err();
-    assert_eq!(e.strip_backtrace(), "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types");
+    assert_eq!(
+        e.strip_backtrace(),
+        "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types"
+    );
     Ok(())
 }
 
 #[test]
 fn test_date_timestamp_arithmetic_error() -> Result<()> {
     let (lhs, rhs) = BinaryTypeCoercer::new(
-        &DataType::Timestamp(TimeUnit::Nanosecond, None),
+        &DataType::Timestamp(Nanosecond, None),
         &Operator::Minus,
-        &DataType::Timestamp(TimeUnit::Millisecond, None),
+        &DataType::Timestamp(Millisecond, None),
     )
     .get_input_types()?;
-    assert_eq!(lhs, DataType::Timestamp(TimeUnit::Millisecond, None));
-    assert_eq!(rhs, DataType::Timestamp(TimeUnit::Millisecond, None));
+    assert_eq!(lhs, DataType::Timestamp(Millisecond, None));
+    assert_eq!(rhs, DataType::Timestamp(Millisecond, None));
 
     let err =
         BinaryTypeCoercer::new(&DataType::Date32, &Operator::Plus, &DataType::Date64)
@@ -146,14 +149,18 @@ fn test_type_coercion_arithmetic() -> Result<()> {
     // (_, Float32) | (Float32, _) => Some(Float32)
     test_coercion_binary_rule_multiple!(
         Float32,
-        [Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8],
+        [
+            Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8
+        ],
         Operator::Plus,
         Float32
     );
     // (_, Float16) | (Float16, _) => Some(Float16)
     test_coercion_binary_rule_multiple!(
         Float16,
-        [Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8],
+        [
+            Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8
+        ],
         Operator::Plus,
         Float16
     );
diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs
index 5401264e43e39..5d1b3bea75b0a 100644
--- a/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs
+++ b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs
@@ -122,51 +122,51 @@ fn test_type_coercion() -> Result<()> {
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Time32(TimeUnit::Second),
+        DataType::Time32(Second),
         Operator::Eq,
-        DataType::Time32(TimeUnit::Second)
+        DataType::Time32(Second)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Time32(TimeUnit::Millisecond),
+        DataType::Time32(Millisecond),
         Operator::Eq,
-        DataType::Time32(TimeUnit::Millisecond)
+        DataType::Time32(Millisecond)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Time64(TimeUnit::Microsecond),
+        DataType::Time64(Microsecond),
         Operator::Eq,
-        DataType::Time64(TimeUnit::Microsecond)
+        DataType::Time64(Microsecond)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Time64(TimeUnit::Nanosecond),
+        DataType::Time64(Nanosecond),
         Operator::Eq,
-        DataType::Time64(TimeUnit::Nanosecond)
+        DataType::Time64(Nanosecond)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Timestamp(TimeUnit::Second, None),
+        DataType::Timestamp(Second, None),
         Operator::Lt,
-        DataType::Timestamp(TimeUnit::Nanosecond, None)
+        DataType::Timestamp(Nanosecond, None)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Timestamp(TimeUnit::Millisecond, None),
+        DataType::Timestamp(Millisecond, None),
         Operator::Lt,
-        DataType::Timestamp(TimeUnit::Nanosecond, None)
+        DataType::Timestamp(Nanosecond, None)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Timestamp(TimeUnit::Microsecond, None),
+        DataType::Timestamp(Microsecond, None),
         Operator::Lt,
-        DataType::Timestamp(TimeUnit::Nanosecond, None)
+        DataType::Timestamp(Nanosecond, None)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
-        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        DataType::Timestamp(Nanosecond, None),
         Operator::Lt,
-        DataType::Timestamp(TimeUnit::Nanosecond, None)
+        DataType::Timestamp(Nanosecond, None)
     );
     test_coercion_binary_rule!(
         DataType::Utf8,
@@ -552,28 +552,28 @@ fn test_type_coercion_compare() -> Result<()> {
     // Timestamps
     let utc: Option<Arc<str>> = Some("UTC".into());
     test_coercion_binary_rule!(
-        DataType::Timestamp(TimeUnit::Second, utc.clone()),
-        DataType::Timestamp(TimeUnit::Second, utc.clone()),
+        DataType::Timestamp(Second, utc.clone()),
+        DataType::Timestamp(Second, utc.clone()),
         Operator::Eq,
-        DataType::Timestamp(TimeUnit::Second, utc.clone())
+        DataType::Timestamp(Second, utc.clone())
     );
     test_coercion_binary_rule!(
-        DataType::Timestamp(TimeUnit::Second, utc.clone()),
-        DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
+        DataType::Timestamp(Second, utc.clone()),
+        DataType::Timestamp(Second, Some("Europe/Brussels".into())),
         Operator::Eq,
-        DataType::Timestamp(TimeUnit::Second, utc.clone())
+        DataType::Timestamp(Second, utc.clone())
     );
     test_coercion_binary_rule!(
-        DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())),
-        DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
+        DataType::Timestamp(Second, Some("America/New_York".into())),
+        DataType::Timestamp(Second, Some("Europe/Brussels".into())),
         Operator::Eq,
-        DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into()))
+        DataType::Timestamp(Second, Some("America/New_York".into()))
     );
     test_coercion_binary_rule!(
-        DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
-        DataType::Timestamp(TimeUnit::Second, utc),
+        DataType::Timestamp(Second, Some("Europe/Brussels".into())),
+        DataType::Timestamp(Second, utc),
         Operator::Eq,
-        DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into()))
+        DataType::Timestamp(Second, Some("Europe/Brussels".into()))
     );
 
     // list
@@ -634,7 +634,7 @@ fn test_type_coercion_compare() -> Result<()> {
     );
 
     let inner_timestamp_field = Arc::new(Field::new_list_field(
-        DataType::Timestamp(TimeUnit::Microsecond, None),
+        DataType::Timestamp(Microsecond, None),
         true,
     ));
     let result_type = BinaryTypeCoercer::new(
@@ -778,8 +778,14 @@ fn test_decimal_cross_variant_comparison_coercion() -> Result<()> {
         for op in comparison_op_types {
             let (lhs, rhs) =
                 BinaryTypeCoercer::new(&lhs_type, &op, &rhs_type).get_input_types()?;
-            assert_eq!(expected_type, lhs, "Coercion of type {lhs_type:?} with {rhs_type:?} resulted in unexpected type: {lhs:?}");
-            assert_eq!(expected_type, rhs, "Coercion of type {rhs_type:?} with {lhs_type:?} resulted in unexpected type: {rhs:?}");
+            assert_eq!(
+                expected_type, lhs,
+                "Coercion of type {lhs_type:?} with {rhs_type:?} resulted in unexpected type: {lhs:?}"
+            );
+            assert_eq!(
+                expected_type, rhs,
+                "Coercion of type {rhs_type:?} with {lhs_type:?} resulted in unexpected type: {rhs:?}"
+            );
         }
     }
 
diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs b/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs
index 6d21d795e4b72..e4653d4955eb0 100644
--- a/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs
+++ b/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs
@@ -77,3 +77,4 @@ mod arithmetic;
 mod comparison;
 mod dictionary;
 mod null_coercion;
+mod run_end_encoded;
diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/run_end_encoded.rs b/datafusion/expr-common/src/type_coercion/binary/tests/run_end_encoded.rs
new file mode 100644
index 0000000000000..9997db7a82688
--- /dev/null
+++ b/datafusion/expr-common/src/type_coercion/binary/tests/run_end_encoded.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::*;
+
+#[test]
+fn test_ree_type_coercion() {
+    use DataType::*;
+
+    let lhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Int32, false)),
+    );
+    let rhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Int16, false)),
+    );
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, true),
+        Some(Int32)
+    );
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, false),
+        Some(Int32)
+    );
+
+    // Since we can coerce values of Int16 to Utf8 can support this: Coercion of Int16 to Utf8
+    let lhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Utf8, false)),
+    );
+    let rhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Int16, false)),
+    );
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, true),
+        Some(Utf8)
+    );
+
+    // Since we can coerce values of Utf8 to Binary can support this
+    let lhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Utf8, false)),
+    );
+    let rhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Binary, false)),
+    );
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, true),
+        Some(Binary)
+    );
+    let lhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Utf8, false)),
+    );
+    let rhs_type = Utf8;
+    // Don't preserve REE
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, false),
+        Some(Utf8)
+    );
+    // Preserve REE
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, true),
+        Some(lhs_type.clone())
+    );
+
+    let lhs_type = Utf8;
+    let rhs_type = RunEndEncoded(
+        Arc::new(Field::new("run_ends", Int8, false)),
+        Arc::new(Field::new("values", Utf8, false)),
+    );
+    // Don't preserve REE
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, false),
+        Some(Utf8)
+    );
+    // Preserve REE
+    assert_eq!(
+        ree_comparison_coercion(&lhs_type, &rhs_type, true),
+        Some(rhs_type.clone())
+    );
+}
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 11d6ca1533db3..75aa59595bed5 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -57,7 +57,7 @@ datafusion-functions-window-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 indexmap = { workspace = true }
 itertools = { workspace = true }
-paste = "^1.0"
+paste = { workspace = true }
 recursive = { workspace = true, optional = true }
 serde_json = { workspace = true }
 sqlparser = { workspace = true, optional = true }
diff --git a/datafusion/expr/src/arguments.rs b/datafusion/expr/src/arguments.rs
index 5653993db98fe..f10cf50f60b24 100644
--- a/datafusion/expr/src/arguments.rs
+++ b/datafusion/expr/src/arguments.rs
@@ -18,8 +18,21 @@
 //! Argument resolution logic for named function parameters
 
 use crate::Expr;
-use datafusion_common::{plan_err, Result};
-use std::collections::HashMap;
+use datafusion_common::{Result, plan_err};
+
+/// Represents a named function argument with its original case and quote information.
+///
+/// This struct preserves whether an identifier was quoted in the SQL, which determines
+/// whether case-sensitive or case-insensitive matching should be used per SQL standards.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ArgumentName {
+    /// The argument name in its original case as it appeared in the SQL
+    pub value: String,
+    /// Whether the identifier was quoted (e.g., "STR" vs STR)
+    /// - true: quoted identifier, requires case-sensitive matching
+    /// - false: unquoted identifier, uses case-insensitive matching
+    pub is_quoted: bool,
+}
 
 /// Resolves function arguments, handling named and positional notation.
 ///
@@ -50,7 +63,7 @@ use std::collections::HashMap;
 pub fn resolve_function_arguments(
     param_names: &[String],
     args: Vec<Expr>,
-    arg_names: Vec<Option<String>>,
+    arg_names: Vec<Option<ArgumentName>>,
 ) -> Result<Vec<Expr>> {
     if args.len() != arg_names.len() {
         return plan_err!(
@@ -71,7 +84,7 @@ pub fn resolve_function_arguments(
 }
 
 /// Validates that positional arguments come before named arguments
-fn validate_argument_order(arg_names: &[Option<String>]) -> Result<()> {
+fn validate_argument_order(arg_names: &[Option<ArgumentName>]) -> Result<()> {
     let mut seen_named = false;
     for (i, arg_name) in arg_names.iter().enumerate() {
         match arg_name {
@@ -93,15 +106,8 @@ fn validate_argument_order(arg_names: &[Option<String>]) -> Result<()> {
 fn reorder_named_arguments(
     param_names: &[String],
     args: Vec<Expr>,
-    arg_names: Vec<Option<String>>,
+    arg_names: Vec<Option<ArgumentName>>,
 ) -> Result<Vec<Expr>> {
-    // Build HashMap for O(1) parameter name lookups
-    let param_index_map: HashMap<&str, usize> = param_names
-        .iter()
-        .enumerate()
-        .map(|(idx, name)| (name.as_str(), idx))
-        .collect();
-
     let positional_count = arg_names.iter().filter(|n| n.is_none()).count();
 
     // Capture args length before consuming the vector
@@ -120,19 +126,35 @@ fn reorder_named_arguments(
     let mut result: Vec<Option<Expr>> = vec![None; expected_arg_count];
 
     for (i, (arg, arg_name)) in args.into_iter().zip(arg_names).enumerate() {
-        if let Some(name) = arg_name {
-            // Named argument - O(1) lookup in HashMap
-            let param_index =
-                param_index_map.get(name.as_str()).copied().ok_or_else(|| {
+        if let Some(arg_name) = arg_name {
+            // Named argument - find parameter index using linear search
+            // Match based on SQL identifier rules:
+            // - Quoted identifiers: case-sensitive (exact match)
+            // - Unquoted identifiers: case-insensitive match
+            let param_index = param_names
+                .iter()
+                .position(|p| {
+                    if arg_name.is_quoted {
+                        // Quoted: exact case match
+                        p == &arg_name.value
+                    } else {
+                        // Unquoted: case-insensitive match
+                        p.eq_ignore_ascii_case(&arg_name.value)
+                    }
+                })
+                .ok_or_else(|| {
                     datafusion_common::plan_datafusion_err!(
                         "Unknown parameter name '{}'. Valid parameters are: [{}]",
-                        name,
+                        arg_name.value,
                         param_names.join(", ")
                     )
                 })?;
 
             if result[param_index].is_some() {
-                return plan_err!("Parameter '{}' specified multiple times", name);
+                return plan_err!(
+                    "Parameter '{}' specified multiple times",
+                    arg_name.value
+                );
             }
 
             result[param_index] = Some(arg);
@@ -175,12 +197,111 @@ mod tests {
         let param_names = vec!["a".to_string(), "b".to_string()];
 
         let args = vec![lit(1), lit("hello")];
-        let arg_names = vec![Some("a".to_string()), Some("b".to_string())];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "b".to_string(),
+                is_quoted: false,
+            }),
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
         assert_eq!(result.len(), 2);
     }
 
+    #[test]
+    fn test_case_insensitive_parameter_matching() {
+        // Parameter names in function signature (lowercase)
+        let param_names = vec!["startpos".to_string(), "length".to_string()];
+
+        // Unquoted arguments with different casing should match case-insensitively
+        let args = vec![lit(1), lit(10)];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "STARTPOS".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "LENGTH".to_string(),
+                is_quoted: false,
+            }),
+        ];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], lit(1));
+        assert_eq!(result[1], lit(10));
+
+        // Test with reordering and different cases
+        let args2 = vec![lit(20), lit(5)];
+        let arg_names2 = vec![
+            Some(ArgumentName {
+                value: "Length".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "StartPos".to_string(),
+                is_quoted: false,
+            }),
+        ];
+
+        let result2 =
+            resolve_function_arguments(&param_names, args2, arg_names2).unwrap();
+        assert_eq!(result2.len(), 2);
+        assert_eq!(result2[0], lit(5)); // startpos
+        assert_eq!(result2[1], lit(20)); // length
+    }
+
+    #[test]
+    fn test_quoted_parameter_case_sensitive() {
+        // Parameter names in function signature (lowercase)
+        let param_names = vec!["str".to_string(), "start_pos".to_string()];
+
+        // Quoted identifiers with wrong case should fail
+        let args = vec![lit("hello"), lit(1)];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "STR".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "start_pos".to_string(),
+                is_quoted: true,
+            }),
+        ];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names);
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Unknown parameter")
+        );
+
+        // Quoted identifiers with correct case should succeed
+        let args2 = vec![lit("hello"), lit(1)];
+        let arg_names2 = vec![
+            Some(ArgumentName {
+                value: "str".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "start_pos".to_string(),
+                is_quoted: true,
+            }),
+        ];
+
+        let result2 =
+            resolve_function_arguments(&param_names, args2, arg_names2).unwrap();
+        assert_eq!(result2.len(), 2);
+        assert_eq!(result2[0], lit("hello"));
+        assert_eq!(result2[1], lit(1));
+    }
+
     #[test]
     fn test_named_reordering() {
         let param_names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
@@ -188,9 +309,18 @@ mod tests {
         // Call with: func(c => 3.0, a => 1, b => "hello")
         let args = vec![lit(3.0), lit(1), lit("hello")];
         let arg_names = vec![
-            Some("c".to_string()),
-            Some("a".to_string()),
-            Some("b".to_string()),
+            Some(ArgumentName {
+                value: "c".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "b".to_string(),
+                is_quoted: false,
+            }),
         ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
@@ -208,7 +338,17 @@ mod tests {
 
         // Call with: func(1, c => 3.0, b => "hello")
         let args = vec![lit(1), lit(3.0), lit("hello")];
-        let arg_names = vec![None, Some("c".to_string()), Some("b".to_string())];
+        let arg_names = vec![
+            None,
+            Some(ArgumentName {
+                value: "c".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "b".to_string(),
+                is_quoted: false,
+            }),
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
 
@@ -225,14 +365,22 @@ mod tests {
 
         // Call with: func(a => 1, "hello") - ERROR
         let args = vec![lit(1), lit("hello")];
-        let arg_names = vec![Some("a".to_string()), None];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+            None,
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Positional argument"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Positional argument")
+        );
     }
 
     #[test]
@@ -241,14 +389,25 @@ mod tests {
 
         // Call with: func(x => 1, b => "hello") - ERROR
         let args = vec![lit(1), lit("hello")];
-        let arg_names = vec![Some("x".to_string()), Some("b".to_string())];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "x".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "b".to_string(),
+                is_quoted: false,
+            }),
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Unknown parameter"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Unknown parameter")
+        );
     }
 
     #[test]
@@ -257,14 +416,25 @@ mod tests {
 
         // Call with: func(a => 1, a => 2) - ERROR
         let args = vec![lit(1), lit(2)];
-        let arg_names = vec![Some("a".to_string()), Some("a".to_string())];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("specified multiple times"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("specified multiple times")
+        );
     }
 
     #[test]
@@ -273,13 +443,232 @@ mod tests {
 
         // Call with: func(a => 1, c => 3.0) - missing 'b'
         let args = vec![lit(1), lit(3.0)];
-        let arg_names = vec![Some("a".to_string()), Some("c".to_string())];
+        let arg_names = vec![
+            Some(ArgumentName {
+                value: "a".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "c".to_string(),
+                is_quoted: false,
+            }),
+        ];
 
         let result = resolve_function_arguments(&param_names, args, arg_names);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Missing required parameter"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Missing required parameter")
+        );
+    }
+
+    #[test]
+    fn test_mixed_case_signature_unquoted_matching() {
+        // Test with mixed-case signature parameters (lowercase, camelCase, UPPERCASE)
+        // This proves case-insensitive matching works for unquoted identifiers
+        let param_names = vec![
+            "prefix".to_string(),   // lowercase
+            "startPos".to_string(), // camelCase
+            "LENGTH".to_string(),   // UPPERCASE
+        ];
+
+        // Test 1: All lowercase unquoted arguments should match
+        let args1 = vec![lit("a"), lit(1), lit(5)];
+        let arg_names1 = vec![
+            Some(ArgumentName {
+                value: "prefix".to_string(),
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "startpos".to_string(), // lowercase version of startPos
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "length".to_string(), // lowercase version of LENGTH
+                is_quoted: false,
+            }),
+        ];
+
+        let result1 =
+            resolve_function_arguments(&param_names, args1, arg_names1).unwrap();
+        assert_eq!(result1.len(), 3);
+        assert_eq!(result1[0], lit("a"));
+        assert_eq!(result1[1], lit(1));
+        assert_eq!(result1[2], lit(5));
+
+        // Test 2: All uppercase unquoted arguments should match
+        let args2 = vec![lit("b"), lit(2), lit(10)];
+        let arg_names2 = vec![
+            Some(ArgumentName {
+                value: "PREFIX".to_string(), // uppercase version of prefix
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "STARTPOS".to_string(), // uppercase version of startPos
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "LENGTH".to_string(), // matches UPPERCASE
+                is_quoted: false,
+            }),
+        ];
+
+        let result2 =
+            resolve_function_arguments(&param_names, args2, arg_names2).unwrap();
+        assert_eq!(result2.len(), 3);
+        assert_eq!(result2[0], lit("b"));
+        assert_eq!(result2[1], lit(2));
+        assert_eq!(result2[2], lit(10));
+
+        // Test 3: Mixed case unquoted arguments should match
+        let args3 = vec![lit("c"), lit(3), lit(15)];
+        let arg_names3 = vec![
+            Some(ArgumentName {
+                value: "Prefix".to_string(), // Title case
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "StartPos".to_string(), // matches camelCase
+                is_quoted: false,
+            }),
+            Some(ArgumentName {
+                value: "Length".to_string(), // Title case
+                is_quoted: false,
+            }),
+        ];
+
+        let result3 =
+            resolve_function_arguments(&param_names, args3, arg_names3).unwrap();
+        assert_eq!(result3.len(), 3);
+        assert_eq!(result3[0], lit("c"));
+        assert_eq!(result3[1], lit(3));
+        assert_eq!(result3[2], lit(15));
+    }
+
+    #[test]
+    fn test_mixed_case_signature_quoted_matching() {
+        // Test that quoted identifiers require exact case match with signature
+        let param_names = vec![
+            "prefix".to_string(),   // lowercase
+            "startPos".to_string(), // camelCase
+            "LENGTH".to_string(),   // UPPERCASE
+        ];
+
+        // Test 1: Quoted with wrong case should fail for "prefix"
+        let args_wrong_prefix = vec![lit("a"), lit(1), lit(5)];
+        let arg_names_wrong_prefix = vec![
+            Some(ArgumentName {
+                value: "PREFIX".to_string(), // Wrong case
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "startPos".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "LENGTH".to_string(),
+                is_quoted: true,
+            }),
+        ];
+
+        let result = resolve_function_arguments(
+            &param_names,
+            args_wrong_prefix,
+            arg_names_wrong_prefix,
+        );
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Unknown parameter")
+        );
+
+        // Test 2: Quoted with wrong case should fail for "startPos"
+        let args_wrong_startpos = vec![lit("a"), lit(1), lit(5)];
+        let arg_names_wrong_startpos = vec![
+            Some(ArgumentName {
+                value: "prefix".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "STARTPOS".to_string(), // Wrong case
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "LENGTH".to_string(),
+                is_quoted: true,
+            }),
+        ];
+
+        let result2 = resolve_function_arguments(
+            &param_names,
+            args_wrong_startpos,
+            arg_names_wrong_startpos,
+        );
+        assert!(result2.is_err());
+        assert!(
+            result2
+                .unwrap_err()
+                .to_string()
+                .contains("Unknown parameter")
+        );
+
+        // Test 3: Quoted with wrong case should fail for "LENGTH"
+        let args_wrong_length = vec![lit("a"), lit(1), lit(5)];
+        let arg_names_wrong_length = vec![
+            Some(ArgumentName {
+                value: "prefix".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "startPos".to_string(),
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "length".to_string(), // Wrong case
+                is_quoted: true,
+            }),
+        ];
+
+        let result3 = resolve_function_arguments(
+            &param_names,
+            args_wrong_length,
+            arg_names_wrong_length,
+        );
+        assert!(result3.is_err());
+        assert!(
+            result3
+                .unwrap_err()
+                .to_string()
+                .contains("Unknown parameter")
+        );
+
+        // Test 4: Quoted with exact case should succeed
+        let args_correct = vec![lit("a"), lit(1), lit(5)];
+        let arg_names_correct = vec![
+            Some(ArgumentName {
+                value: "prefix".to_string(), // Exact match
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "startPos".to_string(), // Exact match
+                is_quoted: true,
+            }),
+            Some(ArgumentName {
+                value: "LENGTH".to_string(), // Exact match
+                is_quoted: true,
+            }),
+        ];
+
+        let result4 =
+            resolve_function_arguments(&param_names, args_correct, arg_names_correct)
+                .unwrap();
+        assert_eq!(result4.len(), 3);
+        assert_eq!(result4[0], lit("a"));
+        assert_eq!(result4[1], lit(1));
+        assert_eq!(result4[2], lit(5));
     }
 }
diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs
index 561ef1dc15e7d..8afdfda68dea0 100644
--- a/datafusion/expr/src/async_udf.rs
+++ b/datafusion/expr/src/async_udf.rs
@@ -146,8 +146,8 @@ mod tests {
     use datafusion_expr_common::{columnar_value::ColumnarValue, signature::Signature};
 
     use crate::{
-        async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl},
         ScalarFunctionArgs, ScalarUDFImpl,
+        async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl},
     };
 
     #[derive(Debug, PartialEq, Eq, Hash, Clone)]
diff --git a/datafusion/expr/src/conditional_expressions.rs b/datafusion/expr/src/conditional_expressions.rs
index d02f522910c19..10a9fd6948e4f 100644
--- a/datafusion/expr/src/conditional_expressions.rs
+++ b/datafusion/expr/src/conditional_expressions.rs
@@ -17,9 +17,9 @@
 
 //! Conditional expressions
 use crate::expr::Case;
-use crate::{expr_schema::ExprSchemable, Expr};
+use crate::{Expr, expr_schema::ExprSchemable};
 use arrow::datatypes::DataType;
-use datafusion_common::{plan_err, DFSchema, HashSet, Result};
+use datafusion_common::{DFSchema, HashSet, Result, plan_err};
 use itertools::Itertools as _;
 
 /// Helper struct for building [Expr::Case]
diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs
index d8a8c6bb49e19..20d8f82bf48a2 100644
--- a/datafusion/expr/src/execution_props.rs
+++ b/datafusion/expr/src/execution_props.rs
@@ -17,9 +17,9 @@
 
 use crate::var_provider::{VarProvider, VarType};
 use chrono::{DateTime, TimeZone, Utc};
+use datafusion_common::HashMap;
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::HashMap;
 use std::sync::Arc;
 
 /// Holds per-query execution properties and data (such as statement
@@ -102,6 +102,7 @@ impl ExecutionProps {
     }
 
     /// Returns the provider for the `var_type`, if any
+    #[expect(clippy::needless_pass_by_value)]
     pub fn get_var_provider(
         &self,
         var_type: VarType,
@@ -124,6 +125,9 @@ mod test {
     #[test]
     fn debug() {
         let props = ExecutionProps::new();
-        assert_eq!("ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", format!("{props:?}"));
+        assert_eq!(
+            "ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }",
+            format!("{props:?}")
+        );
     }
 }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 13160d573ab4d..c7d825ce1d52f 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -41,8 +41,8 @@ use datafusion_common::{
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 #[cfg(feature = "sql")]
 use sqlparser::ast::{
-    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
-    RenameSelectItem, ReplaceSelectElement,
+    ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem,
+    ReplaceSelectElement, display_comma_separated,
 };
 
 // Moved in 51.0.0 to datafusion_common
@@ -316,7 +316,7 @@ pub enum Expr {
     /// A named reference to a qualified field in a schema.
     Column(Column),
     /// A named reference to a variable in a registry.
-    ScalarVariable(DataType, Vec<String>),
+    ScalarVariable(FieldRef, Vec<String>),
     /// A constant value along with associated [`FieldMetadata`].
     Literal(ScalarValue, Option<FieldMetadata>),
     /// A binary expression such as "age > 21"
@@ -479,7 +479,7 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr {
 /// that may be missing in the physical data but present in the logical schema.
 /// See the [default_column_values.rs] example implementation.
 ///
-/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs
+/// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs
 pub type SchemaFieldMetadata = std::collections::HashMap<String, String>;
 
 /// Intersects multiple metadata instances for UNION operations.
@@ -2104,7 +2104,7 @@ impl Expr {
 
 impl Normalizeable for Expr {
     fn can_normalize(&self) -> bool {
-        #[allow(clippy::match_like_matches_macro)]
+        #[expect(clippy::match_like_matches_macro)]
         match self {
             Expr::BinaryExpr(BinaryExpr {
                 op:
@@ -2529,8 +2529,8 @@ impl HashNode for Expr {
             Expr::Column(column) => {
                 column.hash(state);
             }
-            Expr::ScalarVariable(data_type, name) => {
-                data_type.hash(state);
+            Expr::ScalarVariable(field, name) => {
+                field.hash(state);
                 name.hash(state);
             }
             Expr::Literal(scalar_value, _) => {
@@ -2681,24 +2681,23 @@ impl HashNode for Expr {
 // Modifies expr to match the DataType, metadata, and nullability of other if it is
 // a placeholder with previously unspecified type information (i.e., most placeholders)
 fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
-    if let Expr::Placeholder(Placeholder { id: _, field }) = expr {
-        if field.is_none() {
-            let other_field = other.to_field(schema);
-            match other_field {
-                Err(e) => {
-                    Err(e.context(format!(
-                        "Can not find type of {other} needed to infer type of {expr}"
-                    )))?;
-                }
-                Ok((_, other_field)) => {
-                    // We can't infer the nullability of the future parameter that might
-                    // be bound, so ensure this is set to true
-                    *field =
-                        Some(other_field.as_ref().clone().with_nullable(true).into());
-                }
+    if let Expr::Placeholder(Placeholder { id: _, field }) = expr
+        && field.is_none()
+    {
+        let other_field = other.to_field(schema);
+        match other_field {
+            Err(e) => {
+                Err(e.context(format!(
+                    "Can not find type of {other} needed to infer type of {expr}"
+                )))?;
+            }
+            Ok((_, other_field)) => {
+                // We can't infer the nullability of the future parameter that might
+                // be bound, so ensure this is set to true
+                *field = Some(other_field.as_ref().clone().with_nullable(true).into());
             }
-        };
-    }
+        }
+    };
     Ok(())
 }
 
@@ -3511,8 +3510,8 @@ pub fn physical_name(expr: &Expr) -> Result<String> {
 mod test {
     use crate::expr_fn::col;
     use crate::{
-        case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
-        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
+        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, case,
+        lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
     };
     use arrow::datatypes::{Field, Schema};
     use sqlparser::ast;
@@ -3628,11 +3627,11 @@ mod test {
     #[test]
     fn infer_placeholder_with_metadata() {
         // name == $1, where name is a non-nullable string
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)
-                .with_metadata(
-                    [("some_key".to_string(), "some_value".to_string())].into(),
-                )]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("name", DataType::Utf8, false).with_metadata(
+                [("some_key".to_string(), "some_value".to_string())].into(),
+            ),
+        ]));
         let df_schema = DFSchema::try_from(schema).unwrap();
 
         let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index c777c4978f99a..4254602d7c555 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -28,9 +28,9 @@ use crate::function::{
 use crate::ptr_eq::PtrEq;
 use crate::select_expr::SelectExpr;
 use crate::{
-    conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery,
     AggregateUDF, Expr, LimitEffect, LogicalPlan, Operator, PartitionEvaluator,
     ScalarFunctionArgs, ScalarFunctionImplementation, ScalarUDF, Signature, Volatility,
+    conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery,
 };
 use crate::{
     AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl,
@@ -39,7 +39,7 @@ use arrow::compute::kernels::cast_utils::{
     parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
 };
 use arrow::datatypes::{DataType, Field, FieldRef};
-use datafusion_common::{plan_err, Column, Result, ScalarValue, Spans, TableReference};
+use datafusion_common::{Column, Result, ScalarValue, Spans, TableReference, plan_err};
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -341,6 +341,11 @@ pub fn is_null(expr: Expr) -> Expr {
     Expr::IsNull(Box::new(expr))
 }
 
+/// Create is not null expression
+pub fn is_not_null(expr: Expr) -> Expr {
+    Expr::IsNotNull(Box::new(expr))
+}
+
 /// Create is true expression
 pub fn is_true(expr: Expr) -> Expr {
     Expr::IsTrue(Box::new(expr))
diff --git a/datafusion/expr/src/expr_rewriter/guarantees.rs b/datafusion/expr/src/expr_rewriter/guarantees.rs
new file mode 100644
index 0000000000000..30c79f6529ba3
--- /dev/null
+++ b/datafusion/expr/src/expr_rewriter/guarantees.rs
@@ -0,0 +1,668 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Rewrite expressions based on external expression value range guarantees.
+
+use crate::{Between, BinaryExpr, Expr, expr::InList, lit};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_common::{DataFusionError, HashMap, Result, ScalarValue};
+use datafusion_expr_common::interval_arithmetic::{Interval, NullableInterval};
+use std::borrow::Cow;
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// See [`rewrite_with_guarantees`] for more information
+pub struct GuaranteeRewriter<'a> {
+    guarantees: HashMap<&'a Expr, &'a NullableInterval>,
+}
+
+impl<'a> GuaranteeRewriter<'a> {
+    pub fn new(
+        guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
+    ) -> Self {
+        Self {
+            guarantees: guarantees.into_iter().map(|(k, v)| (k, v)).collect(),
+        }
+    }
+}
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval] that represents the known possible
+/// values of the expression.
+///
+/// Rewriting expressions using this type of guarantee can make the work of other expression
+/// simplifications, like const evaluation, easier.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+///
+/// If the set of guarantees will be used to rewrite more than one expression, consider using
+/// [rewrite_with_guarantees_map] instead.
+///
+/// A full example of using this rewrite rule can be found in
+/// [`ExprSimplifier::with_guarantees()`](https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html#method.with_guarantees).
+pub fn rewrite_with_guarantees<'a>(
+    expr: Expr,
+    guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
+) -> Result<Transformed<Expr>> {
+    let guarantees_map: HashMap<&Expr, &NullableInterval> =
+        guarantees.into_iter().map(|(k, v)| (k, v)).collect();
+    rewrite_with_guarantees_map(expr, &guarantees_map)
+}
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval]. The interval represents the known
+/// possible values of the column.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+pub fn rewrite_with_guarantees_map<'a>(
+    expr: Expr,
+    guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    if guarantees.is_empty() {
+        return Ok(Transformed::no(expr));
+    }
+
+    expr.transform_up(|e| rewrite_expr(e, guarantees))
+}
+
+impl TreeNodeRewriter for GuaranteeRewriter<'_> {
+    type Node = Expr;
+
+    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
+        if self.guarantees.is_empty() {
+            return Ok(Transformed::no(expr));
+        }
+
+        rewrite_expr(expr, &self.guarantees)
+    }
+}
+
+fn rewrite_expr(
+    expr: Expr,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    // If an expression collapses to a single value, replace it with a literal
+    if let Some(interval) = guarantees.get(&expr)
+        && let Some(value) = interval.single_value()
+    {
+        return Ok(Transformed::yes(lit(value)));
+    }
+
+    let result = match expr {
+        Expr::IsNull(inner) => match guarantees.get(inner.as_ref()) {
+            Some(NullableInterval::Null { .. }) => Transformed::yes(lit(true)),
+            Some(NullableInterval::NotNull { .. }) => Transformed::yes(lit(false)),
+            _ => Transformed::no(Expr::IsNull(inner)),
+        },
+        Expr::IsNotNull(inner) => match guarantees.get(inner.as_ref()) {
+            Some(NullableInterval::Null { .. }) => Transformed::yes(lit(false)),
+            Some(NullableInterval::NotNull { .. }) => Transformed::yes(lit(true)),
+            _ => Transformed::no(Expr::IsNotNull(inner)),
+        },
+        Expr::Between(b) => rewrite_between(b, guarantees)?,
+        Expr::BinaryExpr(b) => rewrite_binary_expr(b, guarantees)?,
+        Expr::InList(i) => rewrite_inlist(i, guarantees)?,
+        expr => Transformed::no(expr),
+    };
+    Ok(result)
+}
+
+fn rewrite_between(
+    between: Between,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    let (Some(expr_interval), Expr::Literal(low, _), Expr::Literal(high, _)) = (
+        guarantees.get(between.expr.as_ref()),
+        between.low.as_ref(),
+        between.high.as_ref(),
+    ) else {
+        return Ok(Transformed::no(Expr::Between(between)));
+    };
+
+    // Ensure that, if low or high are null, their type matches the other bound
+    let low = ensure_typed_null(low, high)?;
+    let high = ensure_typed_null(high, &low)?;
+
+    let Ok(between_interval) = Interval::try_new(low, high) else {
+        // If we can't create an interval from the literals, be conservative and simply leave
+        // the expression unmodified.
+        return Ok(Transformed::no(Expr::Between(between)));
+    };
+
+    if between_interval.lower().is_null() && between_interval.upper().is_null() {
+        return Ok(Transformed::yes(lit(between_interval.lower().clone())));
+    }
+
+    let expr_interval = match expr_interval {
+        NullableInterval::Null { datatype } => {
+            // Value is guaranteed to be null, so we can simplify to null.
+            return Ok(Transformed::yes(lit(
+                ScalarValue::try_new_null(datatype).unwrap_or(ScalarValue::Null)
+            )));
+        }
+        NullableInterval::MaybeNull { .. } => {
+            // Value may or may not be null, so we can't simplify the expression.
+            return Ok(Transformed::no(Expr::Between(between)));
+        }
+        NullableInterval::NotNull { values } => values,
+    };
+
+    let result = if between_interval.lower().is_null() {
+        // <expr> (NOT) BETWEEN NULL AND <high>
+        let upper_bound = Interval::from(between_interval.upper().clone());
+        if expr_interval.gt(&upper_bound)?.eq(&Interval::TRUE) {
+            // if <expr> > high, then certainly false
+            Transformed::yes(lit(between.negated))
+        } else if expr_interval.lt_eq(&upper_bound)?.eq(&Interval::TRUE) {
+            // if <expr> <= high, then certainly null
+            Transformed::yes(lit(ScalarValue::try_new_null(&expr_interval.data_type())
+                .unwrap_or(ScalarValue::Null)))
+        } else {
+            // otherwise unknown
+            Transformed::no(Expr::Between(between))
+        }
+    } else if between_interval.upper().is_null() {
+        // <expr> (NOT) BETWEEN <low> AND NULL
+        let lower_bound = Interval::from(between_interval.lower().clone());
+        if expr_interval.lt(&lower_bound)?.eq(&Interval::TRUE) {
+            // if <expr> < low, then certainly false
+            Transformed::yes(lit(between.negated))
+        } else if expr_interval.gt_eq(&lower_bound)?.eq(&Interval::TRUE) {
+            // if <expr> >= low, then certainly null
+            Transformed::yes(lit(ScalarValue::try_new_null(&expr_interval.data_type())
+                .unwrap_or(ScalarValue::Null)))
+        } else {
+            // otherwise unknown
+            Transformed::no(Expr::Between(between))
+        }
+    } else {
+        let contains = between_interval.contains(expr_interval)?;
+        if contains.eq(&Interval::TRUE) {
+            Transformed::yes(lit(!between.negated))
+        } else if contains.eq(&Interval::FALSE) {
+            Transformed::yes(lit(between.negated))
+        } else {
+            Transformed::no(Expr::Between(between))
+        }
+    };
+    Ok(result)
+}
+
+fn ensure_typed_null(
+    value: &ScalarValue,
+    other: &ScalarValue,
+) -> Result<ScalarValue, DataFusionError> {
+    Ok(
+        if value.data_type().is_null() && !other.data_type().is_null() {
+            ScalarValue::try_new_null(&other.data_type())?
+        } else {
+            value.clone()
+        },
+    )
+}
+
+fn rewrite_binary_expr(
+    binary: BinaryExpr,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>, DataFusionError> {
+    // The left or right side of expression might either have a guarantee
+    // or be a literal. Either way, we can resolve them to a NullableInterval.
+    let left_interval = guarantees
+        .get(binary.left.as_ref())
+        .map(|interval| Cow::Borrowed(*interval))
+        .or_else(|| {
+            if let Expr::Literal(value, _) = binary.left.as_ref() {
+                Some(Cow::Owned(value.clone().into()))
+            } else {
+                None
+            }
+        });
+    let right_interval = guarantees
+        .get(binary.right.as_ref())
+        .map(|interval| Cow::Borrowed(*interval))
+        .or_else(|| {
+            if let Expr::Literal(value, _) = binary.right.as_ref() {
+                Some(Cow::Owned(value.clone().into()))
+            } else {
+                None
+            }
+        });
+
+    if let (Some(left_interval), Some(right_interval)) = (left_interval, right_interval) {
+        let result = left_interval.apply_operator(&binary.op, right_interval.as_ref())?;
+        if result.is_certainly_true() {
+            return Ok(Transformed::yes(lit(true)));
+        } else if result.is_certainly_false() {
+            return Ok(Transformed::yes(lit(false)));
+        }
+    }
+    Ok(Transformed::no(Expr::BinaryExpr(binary)))
+}
+
+fn rewrite_inlist(
+    inlist: InList,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>, DataFusionError> {
+    let Some(interval) = guarantees.get(inlist.expr.as_ref()) else {
+        return Ok(Transformed::no(Expr::InList(inlist)));
+    };
+
+    let InList {
+        expr,
+        list,
+        negated,
+    } = inlist;
+
+    // Can remove items from the list that don't match the guarantee
+    let list: Vec<Expr> = list
+        .into_iter()
+        .filter_map(|expr| {
+            if let Expr::Literal(item, _) = &expr {
+                match interval.contains(NullableInterval::from(item.clone())) {
+                    // If we know for certain the value isn't in the column's interval,
+                    // we can skip checking it.
+                    Ok(interval) if interval.is_certainly_false() => None,
+                    Ok(_) => Some(Ok(expr)),
+                    Err(e) => Some(Err(e)),
+                }
+            } else {
+                Some(Ok(expr))
+            }
+        })
+        .collect::<Result<_, DataFusionError>>()?;
+
+    Ok(Transformed::yes(Expr::InList(InList {
+        expr,
+        list,
+        negated,
+    })))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::{Operator, col};
+    use datafusion_common::ScalarValue;
+    use datafusion_common::tree_node::TransformedResult;
+
+    #[test]
+    fn test_not_null_guarantee() {
+        // IsNull / IsNotNull can be rewritten to true / false
+        let guarantees = [
+            // Note: AlwaysNull case handled by test_column_single_value test,
+            // since it's a special case of a column with a single value.
+            (
+                col("x"),
+                NullableInterval::NotNull {
+                    values: Interval::make(Some(1), Some(3)).unwrap(),
+                },
+            ),
+        ];
+
+        let is_null_cases = vec![
+            // x IS NULL => guaranteed false
+            (col("x").is_null(), Some(lit(false))),
+            // x IS NOT NULL => guaranteed true
+            (col("x").is_not_null(), Some(lit(true))),
+            // [1, 3] BETWEEN 0 AND 10 => guaranteed true
+            (col("x").between(lit(0), lit(10)), Some(lit(true))),
+            // x BETWEEN 1 AND -2 => unknown (actually guaranteed false)
+            (col("x").between(lit(1), lit(-2)), None),
+            // [1, 3] BETWEEN NULL AND 0 => guaranteed false
+            (
+                col("x").between(lit(ScalarValue::Null), lit(0)),
+                Some(lit(false)),
+            ),
+            // [1, 3] BETWEEN NULL AND 1 => unknown
+            (col("x").between(lit(ScalarValue::Null), lit(1)), None),
+            // [1, 3] BETWEEN NULL AND 2 => unknown
+            (col("x").between(lit(ScalarValue::Null), lit(2)), None),
+            // [1, 3] BETWEEN NULL AND 3 => guaranteed NULL
+            (
+                col("x").between(lit(ScalarValue::Null), lit(3)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] BETWEEN NULL AND 4 => guaranteed NULL
+            (
+                col("x").between(lit(ScalarValue::Null), lit(4)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] BETWEEN 1 AND NULL => guaranteed NULL
+            (
+                col("x").between(lit(0), lit(ScalarValue::Null)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] BETWEEN 1 AND NULL => guaranteed NULL
+            (
+                col("x").between(lit(1), lit(ScalarValue::Null)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] BETWEEN 2 AND NULL => unknown
+            (col("x").between(lit(2), lit(ScalarValue::Null)), None),
+            // [1, 3] BETWEEN 3 AND NULL => unknown
+            (col("x").between(lit(3), lit(ScalarValue::Null)), None),
+            // [1, 3] BETWEEN 4 AND NULL => guaranteed false
+            (
+                col("x").between(lit(4), lit(ScalarValue::Null)),
+                Some(lit(false)),
+            ),
+            // [1, 3] NOT BETWEEN NULL AND 0 => guaranteed false
+            (
+                col("x").not_between(lit(ScalarValue::Null), lit(0)),
+                Some(lit(true)),
+            ),
+            // [1, 3] NOT BETWEEN NULL AND 1 => unknown
+            (col("x").not_between(lit(ScalarValue::Null), lit(1)), None),
+            // [1, 3] NOT BETWEEN NULL AND 2 => unknown
+            (col("x").not_between(lit(ScalarValue::Null), lit(2)), None),
+            // [1, 3] NOT BETWEEN NULL AND 3 => guaranteed NULL
+            (
+                col("x").not_between(lit(ScalarValue::Null), lit(3)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] NOT BETWEEN NULL AND 4 => guaranteed NULL
+            (
+                col("x").not_between(lit(ScalarValue::Null), lit(4)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] NOT BETWEEN 1 AND NULL => guaranteed NULL
+            (
+                col("x").not_between(lit(0), lit(ScalarValue::Null)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] NOT BETWEEN 1 AND NULL => guaranteed NULL
+            (
+                col("x").not_between(lit(1), lit(ScalarValue::Null)),
+                Some(lit(ScalarValue::Int32(None))),
+            ),
+            // [1, 3] NOT BETWEEN 2 AND NULL => unknown
+            (col("x").not_between(lit(2), lit(ScalarValue::Null)), None),
+            // [1, 3] NOT BETWEEN 3 AND NULL => unknown
+            (col("x").not_between(lit(3), lit(ScalarValue::Null)), None),
+            // [1, 3] NOT BETWEEN 4 AND NULL => guaranteed false
+            (
+                col("x").not_between(lit(4), lit(ScalarValue::Null)),
+                Some(lit(true)),
+            ),
+        ];
+
+        for case in is_null_cases {
+            let output = rewrite_with_guarantees(case.0.clone(), guarantees.iter())
+                .data()
+                .unwrap();
+            let expected = match case.1 {
+                None => case.0.clone(),
+                Some(expected) => expected,
+            };
+
+            assert_eq!(output, expected, "Failed for {}", case.0);
+        }
+    }
+
+    fn validate_simplified_cases<T>(
+        guarantees: &[(Expr, NullableInterval)],
+        cases: &[(Expr, T)],
+    ) where
+        ScalarValue: From<T>,
+        T: Clone,
+    {
+        for (expr, expected_value) in cases {
+            let output = rewrite_with_guarantees(expr.clone(), guarantees.iter())
+                .data()
+                .unwrap();
+            let expected = lit(ScalarValue::from(expected_value.clone()));
+            assert_eq!(
+                output, expected,
+                "{expr} simplified to {output}, but expected {expected}"
+            );
+        }
+    }
+
+    fn validate_unchanged_cases(guarantees: &[(Expr, NullableInterval)], cases: &[Expr]) {
+        for expr in cases {
+            let output = rewrite_with_guarantees(expr.clone(), guarantees.iter())
+                .data()
+                .unwrap();
+            assert_eq!(
+                &output, expr,
+                "{expr} was simplified to {output}, but expected it to be unchanged"
+            );
+        }
+    }
+
+    #[test]
+    fn test_inequalities_non_null_unbounded() {
+        let guarantees = [
+            // y ∈ [2021-01-01, ∞) (not null)
+            (
+                col("x"),
+                NullableInterval::NotNull {
+                    values: Interval::try_new(
+                        ScalarValue::Date32(Some(18628)),
+                        ScalarValue::Date32(None),
+                    )
+                    .unwrap(),
+                },
+            ),
+        ];
+
+        // (original_expr, expected_simplification)
+        let simplified_cases = &[
+            (col("x").lt(lit(ScalarValue::Date32(Some(18628)))), false),
+            (col("x").lt_eq(lit(ScalarValue::Date32(Some(17000)))), false),
+            (col("x").gt(lit(ScalarValue::Date32(Some(18627)))), true),
+            (col("x").gt_eq(lit(ScalarValue::Date32(Some(18628)))), true),
+            (col("x").eq(lit(ScalarValue::Date32(Some(17000)))), false),
+            (col("x").not_eq(lit(ScalarValue::Date32(Some(17000)))), true),
+            (
+                col("x").between(
+                    lit(ScalarValue::Date32(Some(16000))),
+                    lit(ScalarValue::Date32(Some(17000))),
+                ),
+                false,
+            ),
+            (
+                col("x").not_between(
+                    lit(ScalarValue::Date32(Some(16000))),
+                    lit(ScalarValue::Date32(Some(17000))),
+                ),
+                true,
+            ),
+            (
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(col("x")),
+                    op: Operator::IsDistinctFrom,
+                    right: Box::new(lit(ScalarValue::Null)),
+                }),
+                true,
+            ),
+            (
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(col("x")),
+                    op: Operator::IsDistinctFrom,
+                    right: Box::new(lit(ScalarValue::Date32(Some(17000)))),
+                }),
+                true,
+            ),
+        ];
+
+        validate_simplified_cases(&guarantees, simplified_cases);
+
+        let unchanged_cases = &[
+            col("x").lt(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").lt_eq(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").gt(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").gt_eq(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").eq(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").not_eq(lit(ScalarValue::Date32(Some(19000)))),
+            col("x").between(
+                lit(ScalarValue::Date32(Some(18000))),
+                lit(ScalarValue::Date32(Some(19000))),
+            ),
+            col("x").not_between(
+                lit(ScalarValue::Date32(Some(18000))),
+                lit(ScalarValue::Date32(Some(19000))),
+            ),
+        ];
+
+        validate_unchanged_cases(&guarantees, unchanged_cases);
+    }
+
+    #[test]
+    fn test_inequalities_maybe_null() {
+        let guarantees = [
+            // x ∈ ("abc", "def"]? (maybe null)
+            (
+                col("x"),
+                NullableInterval::MaybeNull {
+                    values: Interval::try_new(
+                        ScalarValue::from("abc"),
+                        ScalarValue::from("def"),
+                    )
+                    .unwrap(),
+                },
+            ),
+        ];
+
+        // (original_expr, expected_simplification)
+        let simplified_cases = &[
+            (
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(col("x")),
+                    op: Operator::IsDistinctFrom,
+                    right: Box::new(lit("z")),
+                }),
+                true,
+            ),
+            (
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(col("x")),
+                    op: Operator::IsNotDistinctFrom,
+                    right: Box::new(lit("z")),
+                }),
+                false,
+            ),
+        ];
+
+        validate_simplified_cases(&guarantees, simplified_cases);
+
+        let unchanged_cases = &[
+            col("x").lt(lit("z")),
+            col("x").lt_eq(lit("z")),
+            col("x").gt(lit("a")),
+            col("x").gt_eq(lit("a")),
+            col("x").eq(lit("abc")),
+            col("x").not_eq(lit("a")),
+            col("x").between(lit("a"), lit("z")),
+            col("x").not_between(lit("a"), lit("z")),
+            Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(col("x")),
+                op: Operator::IsDistinctFrom,
+                right: Box::new(lit(ScalarValue::Null)),
+            }),
+        ];
+
+        validate_unchanged_cases(&guarantees, unchanged_cases);
+    }
+
+    #[test]
+    fn test_column_single_value() {
+        let scalars = [
+            ScalarValue::Null,
+            ScalarValue::Int32(Some(1)),
+            ScalarValue::Boolean(Some(true)),
+            ScalarValue::Boolean(None),
+            ScalarValue::from("abc"),
+            ScalarValue::LargeUtf8(Some("def".to_string())),
+            ScalarValue::Date32(Some(18628)),
+            ScalarValue::Date32(None),
+            ScalarValue::Decimal128(Some(1000), 19, 2),
+        ];
+
+        for scalar in scalars {
+            let guarantees = [(col("x"), NullableInterval::from(scalar.clone()))];
+
+            let output = rewrite_with_guarantees(col("x"), guarantees.iter())
+                .data()
+                .unwrap();
+            assert_eq!(output, Expr::Literal(scalar.clone(), None));
+        }
+    }
+
+    #[test]
+    fn test_in_list() {
+        let guarantees = [
+            // x ∈ [1, 10] (not null)
+            (
+                col("x"),
+                NullableInterval::NotNull {
+                    values: Interval::try_new(
+                        ScalarValue::Int32(Some(1)),
+                        ScalarValue::Int32(Some(10)),
+                    )
+                    .unwrap(),
+                },
+            ),
+        ];
+
+        // These cases should be simplified so the list doesn't contain any
+        // values the guarantee says are outside the range.
+        // (column_name, starting_list, negated, expected_list)
+        let cases = &[
+            // x IN (9, 11) => x IN (9)
+            ("x", vec![9, 11], false, vec![9]),
+            // x IN (10, 2) => x IN (10, 2)
+            ("x", vec![10, 2], false, vec![10, 2]),
+            // x NOT IN (9, 11) => x NOT IN (9)
+            ("x", vec![9, 11], true, vec![9]),
+            // x NOT IN (0, 22) => x NOT IN ()
+            ("x", vec![0, 22], true, vec![]),
+        ];
+
+        for (column_name, starting_list, negated, expected_list) in cases {
+            let expr = col(*column_name).in_list(
+                starting_list
+                    .iter()
+                    .map(|v| lit(ScalarValue::Int32(Some(*v))))
+                    .collect(),
+                *negated,
+            );
+            let output = rewrite_with_guarantees(expr.clone(), guarantees.iter())
+                .data()
+                .unwrap();
+            let expected_list = expected_list
+                .iter()
+                .map(|v| lit(ScalarValue::Int32(Some(*v))))
+                .collect();
+            assert_eq!(
+                output,
+                Expr::InList(InList {
+                    expr: Box::new(col(*column_name)),
+                    list: expected_list,
+                    negated: *negated,
+                })
+            );
+        }
+    }
+}
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 9c3c5df7007ff..a0faca76e91e4 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -26,12 +26,17 @@ use crate::expr::{Alias, Sort, Unnest};
 use crate::logical_plan::Projection;
 use crate::{Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder};
 
+use datafusion_common::TableReference;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::TableReference;
 use datafusion_common::{Column, DFSchema, Result};
 
+mod guarantees;
+pub use guarantees::GuaranteeRewriter;
+pub use guarantees::rewrite_with_guarantees;
+pub use guarantees::rewrite_with_guarantees_map;
 mod order_by;
+
 pub use order_by::rewrite_sort_cols_by_aggs;
 
 /// Trait for rewriting [`Expr`]s into function calls.
@@ -255,7 +260,11 @@ fn coerce_exprs_for_schema(
                     }
                     #[expect(deprecated)]
                     Expr::Wildcard { .. } => Ok(expr),
-                    _ => expr.cast_to(new_type, src_schema),
+                    _ => {
+                        // maintain the original name when casting
+                        let name = dst_schema.field(idx).name();
+                        Ok(expr.cast_to(new_type, src_schema)?.alias(name))
+                    }
                 }
             } else {
                 Ok(expr)
@@ -355,10 +364,10 @@ mod test {
 
     use super::*;
     use crate::literal::lit_with_metadata;
-    use crate::{col, lit, Cast};
+    use crate::{Cast, col, lit};
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::tree_node::TreeNodeRewriter;
     use datafusion_common::ScalarValue;
+    use datafusion_common::tree_node::TreeNodeRewriter;
 
     #[derive(Default)]
     struct RecordingRewriter {
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 6db95555502da..ec22be525464b 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -19,7 +19,7 @@
 
 use crate::expr::Alias;
 use crate::expr_rewriter::normalize_col;
-use crate::{expr::Sort, Cast, Expr, LogicalPlan, TryCast};
+use crate::{Cast, Expr, LogicalPlan, TryCast, expr::Sort};
 
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
@@ -52,7 +52,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result<Expr> {
     // on top of them)
     if plan_inputs.len() == 1 {
         let proj_exprs = plan.expressions();
-        rewrite_in_terms_of_projection(expr, proj_exprs, plan_inputs[0])
+        rewrite_in_terms_of_projection(expr, &proj_exprs, plan_inputs[0])
     } else {
         Ok(expr)
     }
@@ -71,7 +71,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result<Expr> {
 /// 2. t produces an output schema with two columns "a", "b + c"
 fn rewrite_in_terms_of_projection(
     expr: Expr,
-    proj_exprs: Vec<Expr>,
+    proj_exprs: &[Expr],
     input: &LogicalPlan,
 ) -> Result<Expr> {
     // assumption is that each item in exprs, such as "b + c" is
@@ -104,7 +104,7 @@ fn rewrite_in_terms_of_projection(
 
         // look for the column named the same as this expr
         let mut found = None;
-        for proj_expr in &proj_exprs {
+        for proj_expr in proj_exprs {
             proj_expr.apply(|e| {
                 if expr_match(&search_col, e) {
                     found = Some(e.clone());
@@ -152,8 +152,8 @@ mod test {
     use arrow::datatypes::{DataType, Field, Schema};
 
     use crate::{
-        cast, col, lit, logical_plan::builder::LogicalTableSource, try_cast,
-        LogicalPlanBuilder,
+        LogicalPlanBuilder, cast, col, lit, logical_plan::builder::LogicalTableSource,
+        try_cast,
     };
 
     use super::*;
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 9e8d6080b82c8..691a8c508f801 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Between, Expr, Like};
+use super::{Between, Expr, Like, predicate_bounds};
 use crate::expr::{
     AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList,
     InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
@@ -25,13 +25,14 @@ use crate::type_coercion::functions::{
     data_types_with_scalar_udf, fields_with_aggregate_udf, fields_with_window_udf,
 };
 use crate::udf::ReturnFieldArgs;
-use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
+use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, utils};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::datatype::FieldExt;
 use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
-    not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema,
-    Result, Spans, TableReference,
+    Column, DataFusionError, ExprSchema, Result, ScalarValue, Spans, TableReference,
+    not_impl_err, plan_datafusion_err, plan_err,
 };
 use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
@@ -58,8 +59,12 @@ pub trait ExprSchemable {
     fn cast_to(self, cast_to_type: &DataType, schema: &dyn ExprSchema) -> Result<Expr>;
 
     /// Given a schema, return the type and nullability of the expr
+    #[deprecated(
+        since = "51.0.0",
+        note = "Use `to_field().1.is_nullable` and `to_field().1.data_type()` directly instead"
+    )]
     fn data_type_and_nullable(&self, schema: &dyn ExprSchema)
-        -> Result<(DataType, bool)>;
+    -> Result<(DataType, bool)>;
 }
 
 impl ExprSchemable for Expr {
@@ -116,7 +121,7 @@ impl ExprSchemable for Expr {
             Expr::Negative(expr) => expr.get_type(schema),
             Expr::Column(c) => Ok(schema.data_type(c)?.clone()),
             Expr::OuterReferenceColumn(field, _) => Ok(field.data_type().clone()),
-            Expr::ScalarVariable(ty, _) => Ok(ty.clone()),
+            Expr::ScalarVariable(field, _) => Ok(field.data_type().clone()),
             Expr::Literal(l, _) => Ok(l.data_type()),
             Expr::Case(case) => {
                 for (_, then_expr) in &case.when_then_expr {
@@ -150,7 +155,7 @@ impl ExprSchemable for Expr {
                 }
             }
             Expr::ScalarFunction(_func) => {
-                let (return_type, _) = self.data_type_and_nullable(schema)?;
+                let return_type = self.to_field(schema)?.1.data_type().clone();
                 Ok(return_type)
             }
             Expr::WindowFunction(window_function) => self
@@ -203,11 +208,7 @@ impl ExprSchemable for Expr {
             Expr::ScalarSubquery(subquery) => {
                 Ok(subquery.subquery.schema().field(0).data_type().clone())
             }
-            Expr::BinaryExpr(BinaryExpr {
-                ref left,
-                ref right,
-                ref op,
-            }) => BinaryTypeCoercer::new(
+            Expr::BinaryExpr(BinaryExpr { left, right, op }) => BinaryTypeCoercer::new(
                 &left.get_type(schema)?,
                 op,
                 &right.get_type(schema)?,
@@ -282,15 +283,65 @@ impl ExprSchemable for Expr {
             Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()),
             Expr::Literal(value, _) => Ok(value.is_null()),
             Expr::Case(case) => {
-                // This expression is nullable if any of the input expressions are nullable
-                let then_nullable = case
+                let nullable_then = case
                     .when_then_expr
                     .iter()
-                    .map(|(_, t)| t.nullable(input_schema))
-                    .collect::<Result<Vec<_>>>()?;
-                if then_nullable.contains(&true) {
-                    Ok(true)
+                    .filter_map(|(w, t)| {
+                        let is_nullable = match t.nullable(input_schema) {
+                            Err(e) => return Some(Err(e)),
+                            Ok(n) => n,
+                        };
+
+                        // Branches with a then expression that is not nullable do not impact the
+                        // nullability of the case expression.
+                        if !is_nullable {
+                            return None;
+                        }
+
+                        // For case-with-expression assume all 'then' expressions are reachable
+                        if case.expr.is_some() {
+                            return Some(Ok(()));
+                        }
+
+                        // For branches with a nullable 'then' expression, try to determine
+                        // if the 'then' expression is ever reachable in the situation where
+                        // it would evaluate to null.
+                        let bounds = match predicate_bounds::evaluate_bounds(
+                            w,
+                            Some(unwrap_certainly_null_expr(t)),
+                            input_schema,
+                        ) {
+                            Err(e) => return Some(Err(e)),
+                            Ok(b) => b,
+                        };
+
+                        let can_be_true = match bounds
+                            .contains_value(ScalarValue::Boolean(Some(true)))
+                        {
+                            Err(e) => return Some(Err(e)),
+                            Ok(b) => b,
+                        };
+
+                        if !can_be_true {
+                            // If the derived 'when' expression can never evaluate to true, the
+                            // 'then' expression is not reachable when it would evaluate to NULL.
+                            // The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
+                            None
+                        } else {
+                            // The branch might be taken
+                            Some(Ok(()))
+                        }
+                    })
+                    .next();
+
+                if let Some(nullable_then) = nullable_then {
+                    // There is at least one reachable nullable 'then' expression, so the case
+                    // expression itself is nullable.
+                    // Use `Result::map` to propagate the error from `nullable_then` if there is one.
+                    nullable_then.map(|_| true)
                 } else if let Some(e) = &case.else_expr {
+                    // There are no reachable nullable 'then' expressions, so all we still need to
+                    // check is the 'else' expression's nullability.
                     e.nullable(input_schema)
                 } else {
                     // CASE produces NULL if there is no `else` expr
@@ -300,7 +351,9 @@ impl ExprSchemable for Expr {
             }
             Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema),
             Expr::ScalarFunction(_func) => {
-                let (_, nullable) = self.data_type_and_nullable(input_schema)?;
+                let field = self.to_field(input_schema)?.1;
+
+                let nullable = field.is_nullable();
                 Ok(nullable)
             }
             Expr::AggregateFunction(AggregateFunction { func, .. }) => {
@@ -312,12 +365,8 @@ impl ExprSchemable for Expr {
                     window_function,
                 )
                 .map(|(_, nullable)| nullable),
-            Expr::Placeholder(Placeholder { id: _, field }) => {
-                Ok(field.as_ref().map(|f| f.is_nullable()).unwrap_or(true))
-            }
-            Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) => {
-                Ok(true)
-            }
+            Expr::ScalarVariable(field, _) => Ok(field.is_nullable()),
+            Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) => Ok(true),
             Expr::IsNull(_)
             | Expr::IsNotNull(_)
             | Expr::IsTrue(_)
@@ -331,11 +380,9 @@ impl ExprSchemable for Expr {
             Expr::ScalarSubquery(subquery) => {
                 Ok(subquery.subquery.schema().field(0).is_nullable())
             }
-            Expr::BinaryExpr(BinaryExpr {
-                ref left,
-                ref right,
-                ..
-            }) => Ok(left.nullable(input_schema)? || right.nullable(input_schema)?),
+            Expr::BinaryExpr(BinaryExpr { left, right, .. }) => {
+                Ok(left.nullable(input_schema)? || right.nullable(input_schema)?)
+            }
             Expr::Like(Like { expr, pattern, .. })
             | Expr::SimilarTo(Like { expr, pattern, .. }) => {
                 Ok(expr.nullable(input_schema)? || pattern.nullable(input_schema)?)
@@ -429,7 +476,7 @@ impl ExprSchemable for Expr {
         schema: &dyn ExprSchema,
     ) -> Result<(Option<TableReference>, Arc<Field>)> {
         let (relation, schema_name) = self.qualified_name();
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         let field = match self {
             Expr::Alias(Alias {
                 expr,
@@ -437,30 +484,26 @@ impl ExprSchemable for Expr {
                 metadata,
                 ..
             }) => {
-                let field = expr.to_field(schema).map(|(_, f)| f.as_ref().clone())?;
-
                 let mut combined_metadata = expr.metadata(schema)?;
                 if let Some(metadata) = metadata {
                     combined_metadata.extend(metadata.clone());
                 }
 
-                Ok(Arc::new(combined_metadata.add_to_field(field)))
+                Ok(expr
+                    .to_field(schema)
+                    .map(|(_, f)| f)?
+                    .with_field_metadata(&combined_metadata))
             }
             Expr::Negative(expr) => expr.to_field(schema).map(|(_, f)| f),
-            Expr::Column(c) => schema.field_from_column(c).map(|f| Arc::new(f.clone())),
+            Expr::Column(c) => schema.field_from_column(c).map(Arc::clone),
             Expr::OuterReferenceColumn(field, _) => {
-                Ok(Arc::new(field.as_ref().clone().with_name(&schema_name)))
-            }
-            Expr::ScalarVariable(ty, _) => {
-                Ok(Arc::new(Field::new(&schema_name, ty.clone(), true)))
-            }
-            Expr::Literal(l, metadata) => {
-                let mut field = Field::new(&schema_name, l.data_type(), l.is_null());
-                if let Some(metadata) = metadata {
-                    field = metadata.add_to_field(field);
-                }
-                Ok(Arc::new(field))
+                Ok(Arc::clone(field).renamed(&schema_name))
             }
+            Expr::ScalarVariable(field, _) => Ok(Arc::clone(field).renamed(&schema_name)),
+            Expr::Literal(l, metadata) => Ok(Arc::new(
+                Field::new(&schema_name, l.data_type(), l.is_null())
+                    .with_field_metadata_opt(metadata.as_ref()),
+            )),
             Expr::IsNull(_)
             | Expr::IsNotNull(_)
             | Expr::IsTrue(_)
@@ -475,14 +518,15 @@ impl ExprSchemable for Expr {
             Expr::ScalarSubquery(subquery) => {
                 Ok(Arc::clone(&subquery.subquery.schema().fields()[0]))
             }
-            Expr::BinaryExpr(BinaryExpr {
-                ref left,
-                ref right,
-                ref op,
-            }) => {
-                let (lhs_type, lhs_nullable) = left.data_type_and_nullable(schema)?;
-                let (rhs_type, rhs_nullable) = right.data_type_and_nullable(schema)?;
-                let mut coercer = BinaryTypeCoercer::new(&lhs_type, op, &rhs_type);
+            Expr::BinaryExpr(BinaryExpr { left, right, op }) => {
+                let (left_field, right_field) =
+                    (left.to_field(schema)?.1, right.to_field(schema)?.1);
+
+                let (lhs_type, lhs_nullable) =
+                    (left_field.data_type(), left_field.is_nullable());
+                let (rhs_type, rhs_nullable) =
+                    (right_field.data_type(), right_field.is_nullable());
+                let mut coercer = BinaryTypeCoercer::new(lhs_type, op, rhs_type);
                 coercer.set_lhs_spans(left.spans().cloned().unwrap_or_default());
                 coercer.set_rhs_spans(right.spans().cloned().unwrap_or_default());
                 Ok(Arc::new(Field::new(
@@ -562,8 +606,7 @@ impl ExprSchemable for Expr {
                 let new_fields = fields
                     .into_iter()
                     .zip(new_data_types)
-                    .map(|(f, d)| f.as_ref().clone().with_data_type(d))
-                    .map(Arc::new)
+                    .map(|(f, d)| f.retyped(d))
                     .collect::<Vec<FieldRef>>();
 
                 let arguments = args
@@ -583,12 +626,11 @@ impl ExprSchemable for Expr {
             // _ => Ok((self.get_type(schema)?, self.nullable(schema)?)),
             Expr::Cast(Cast { expr, data_type }) => expr
                 .to_field(schema)
-                .map(|(_, f)| f.as_ref().clone().with_data_type(data_type.clone()))
-                .map(Arc::new),
+                .map(|(_, f)| f.retyped(data_type.clone())),
             Expr::Placeholder(Placeholder {
                 id: _,
                 field: Some(field),
-            }) => Ok(field.as_ref().clone().with_name(&schema_name).into()),
+            }) => Ok(Arc::clone(field).renamed(&schema_name)),
             Expr::Like(_)
             | Expr::SimilarTo(_)
             | Expr::Not(_)
@@ -609,7 +651,8 @@ impl ExprSchemable for Expr {
 
         Ok((
             relation,
-            Arc::new(field.as_ref().clone().with_name(schema_name)),
+            // todo avoid this rename / use the name above
+            field.renamed(&schema_name),
         ))
     }
 
@@ -642,6 +685,16 @@ impl ExprSchemable for Expr {
     }
 }
 
+/// Returns the innermost [Expr] that is provably null if `expr` is null.
+fn unwrap_certainly_null_expr(expr: &Expr) -> &Expr {
+    match expr {
+        Expr::Not(e) => unwrap_certainly_null_expr(e),
+        Expr::Negative(e) => unwrap_certainly_null_expr(e),
+        Expr::Cast(e) => unwrap_certainly_null_expr(e.expr.as_ref()),
+        _ => expr,
+    }
+}
+
 impl Expr {
     /// Common method for window functions that applies type coercion
     /// to all arguments of the window function to check if it matches
@@ -773,9 +826,9 @@ mod tests {
     use std::collections::HashMap;
 
     use super::*;
-    use crate::{col, lit, out_ref_col_with_metadata};
+    use crate::{and, col, lit, not, or, out_ref_col_with_metadata, when};
 
-    use datafusion_common::{internal_err, DFSchema, ScalarValue};
+    use datafusion_common::{DFSchema, ScalarValue, assert_or_internal_err};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
@@ -788,9 +841,10 @@ mod tests {
     fn expr_schema_nullability() {
         let expr = col("foo").eq(lit(1));
         assert!(!expr.nullable(&MockExprSchema::new()).unwrap());
-        assert!(expr
-            .nullable(&MockExprSchema::new().with_nullable(true))
-            .unwrap());
+        assert!(
+            expr.nullable(&MockExprSchema::new().with_nullable(true))
+                .unwrap()
+        );
 
         test_is_expr_nullable!(is_null);
         test_is_expr_nullable!(is_not_null);
@@ -826,6 +880,137 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn assert_nullability(expr: &Expr, schema: &dyn ExprSchema, expected: bool) {
+        assert_eq!(
+            expr.nullable(schema).unwrap(),
+            expected,
+            "Nullability of '{expr}' should be {expected}"
+        );
+    }
+
+    fn assert_not_nullable(expr: &Expr, schema: &dyn ExprSchema) {
+        assert_nullability(expr, schema, false);
+    }
+
+    fn assert_nullable(expr: &Expr, schema: &dyn ExprSchema) {
+        assert_nullability(expr, schema, true);
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let nullable_schema = MockExprSchema::new()
+            .with_data_type(DataType::Int32)
+            .with_nullable(true);
+
+        let not_nullable_schema = MockExprSchema::new()
+            .with_data_type(DataType::Int32)
+            .with_nullable(false);
+
+        // CASE WHEN x IS NOT NULL THEN x ELSE 0
+        let e = when(col("x").is_not_null(), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN NOT x IS NULL THEN x ELSE 0
+        let e = when(not(col("x").is_null()), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN X = 5 THEN x ELSE 0
+        let e = when(col("x").eq(lit(5)), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS NOT NULL AND x = 5 THEN x ELSE 0
+        let e = when(and(col("x").is_not_null(), col("x").eq(lit(5))), col("x"))
+            .otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x = 5 AND x IS NOT NULL THEN x ELSE 0
+        let e = when(and(col("x").eq(lit(5)), col("x").is_not_null()), col("x"))
+            .otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS NOT NULL OR x = 5 THEN x ELSE 0
+        let e = when(or(col("x").is_not_null(), col("x").eq(lit(5))), col("x"))
+            .otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x = 5 OR x IS NOT NULL THEN x ELSE 0
+        let e = when(or(col("x").eq(lit(5)), col("x").is_not_null()), col("x"))
+            .otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN (x = 5 AND x IS NOT NULL) OR (x = bar AND x IS NOT NULL) THEN x ELSE 0
+        let e = when(
+            or(
+                and(col("x").eq(lit(5)), col("x").is_not_null()),
+                and(col("x").eq(col("bar")), col("x").is_not_null()),
+            ),
+            col("x"),
+        )
+        .otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x = 5 OR x IS NULL THEN x ELSE 0
+        let e = when(or(col("x").eq(lit(5)), col("x").is_null()), col("x"))
+            .otherwise(lit(0))?;
+        assert_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS TRUE THEN x ELSE 0
+        let e = when(col("x").is_true(), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS NOT TRUE THEN x ELSE 0
+        let e = when(col("x").is_not_true(), col("x")).otherwise(lit(0))?;
+        assert_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS FALSE THEN x ELSE 0
+        let e = when(col("x").is_false(), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS NOT FALSE THEN x ELSE 0
+        let e = when(col("x").is_not_false(), col("x")).otherwise(lit(0))?;
+        assert_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS UNKNOWN THEN x ELSE 0
+        let e = when(col("x").is_unknown(), col("x")).otherwise(lit(0))?;
+        assert_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x IS NOT UNKNOWN THEN x ELSE 0
+        let e = when(col("x").is_not_unknown(), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN x LIKE 'x' THEN x ELSE 0
+        let e = when(col("x").like(lit("x")), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN 0 THEN x ELSE 0
+        let e = when(lit(0), col("x")).otherwise(lit(0))?;
+        assert_not_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        // CASE WHEN 1 THEN x ELSE 0
+        let e = when(lit(1), col("x")).otherwise(lit(0))?;
+        assert_nullable(&e, &nullable_schema);
+        assert_not_nullable(&e, &not_nullable_schema);
+
+        Ok(())
+    }
+
     #[test]
     fn test_inlist_nullability() {
         let get_schema = |nullable| {
@@ -838,9 +1023,10 @@ mod tests {
         assert!(!expr.nullable(&get_schema(false)).unwrap());
         assert!(expr.nullable(&get_schema(true)).unwrap());
         // Testing nullable() returns an error.
-        assert!(expr
-            .nullable(&get_schema(false).with_error_on_nullable(true))
-            .is_err());
+        assert!(
+            expr.nullable(&get_schema(false).with_error_on_nullable(true))
+                .is_err()
+        );
 
         let null = lit(ScalarValue::Int32(None));
         let expr = col("foo").in_list(vec![null, lit(1)], false);
@@ -934,16 +1120,18 @@ mod tests {
             ),
         ));
 
+        let field = expr.to_field(&schema).unwrap().1;
         assert_eq!(
-            expr.data_type_and_nullable(&schema).unwrap(),
-            (DataType::Utf8, true)
+            (field.data_type(), field.is_nullable()),
+            (&DataType::Utf8, true)
         );
         assert_eq!(placeholder_meta, expr.metadata(&schema).unwrap());
 
         let expr_alias = expr.alias("a placeholder by any other name");
+        let expr_alias_field = expr_alias.to_field(&schema).unwrap().1;
         assert_eq!(
-            expr_alias.data_type_and_nullable(&schema).unwrap(),
-            (DataType::Utf8, true)
+            (expr_alias_field.data_type(), expr_alias_field.is_nullable()),
+            (&DataType::Utf8, true)
         );
         assert_eq!(placeholder_meta, expr_alias.metadata(&schema).unwrap());
 
@@ -952,38 +1140,41 @@ mod tests {
             "".to_string(),
             Some(Field::new("", DataType::Utf8, false).into()),
         ));
+        let expr_field = expr.to_field(&schema).unwrap().1;
         assert_eq!(
-            expr.data_type_and_nullable(&schema).unwrap(),
-            (DataType::Utf8, false)
+            (expr_field.data_type(), expr_field.is_nullable()),
+            (&DataType::Utf8, false)
         );
+
         let expr_alias = expr.alias("a placeholder by any other name");
+        let expr_alias_field = expr_alias.to_field(&schema).unwrap().1;
         assert_eq!(
-            expr_alias.data_type_and_nullable(&schema).unwrap(),
-            (DataType::Utf8, false)
+            (expr_alias_field.data_type(), expr_alias_field.is_nullable()),
+            (&DataType::Utf8, false)
         );
     }
 
     #[derive(Debug)]
     struct MockExprSchema {
-        field: Field,
+        field: FieldRef,
         error_on_nullable: bool,
     }
 
     impl MockExprSchema {
         fn new() -> Self {
             Self {
-                field: Field::new("mock_field", DataType::Null, false),
+                field: Arc::new(Field::new("mock_field", DataType::Null, false)),
                 error_on_nullable: false,
             }
         }
 
         fn with_nullable(mut self, nullable: bool) -> Self {
-            self.field = self.field.with_nullable(nullable);
+            Arc::make_mut(&mut self.field).set_nullable(nullable);
             self
         }
 
         fn with_data_type(mut self, data_type: DataType) -> Self {
-            self.field = self.field.with_data_type(data_type);
+            Arc::make_mut(&mut self.field).set_data_type(data_type);
             self
         }
 
@@ -993,22 +1184,37 @@ mod tests {
         }
 
         fn with_metadata(mut self, metadata: FieldMetadata) -> Self {
-            self.field = metadata.add_to_field(self.field);
+            self.field =
+                Arc::new(metadata.add_to_field(Arc::unwrap_or_clone(self.field)));
             self
         }
     }
 
     impl ExprSchema for MockExprSchema {
         fn nullable(&self, _col: &Column) -> Result<bool> {
-            if self.error_on_nullable {
-                internal_err!("nullable error")
-            } else {
-                Ok(self.field.is_nullable())
-            }
+            assert_or_internal_err!(!self.error_on_nullable, "nullable error");
+            Ok(self.field.is_nullable())
         }
 
-        fn field_from_column(&self, _col: &Column) -> Result<&Field> {
+        fn field_from_column(&self, _col: &Column) -> Result<&FieldRef> {
             Ok(&self.field)
         }
     }
+
+    #[test]
+    fn test_scalar_variable() {
+        let mut meta = HashMap::new();
+        meta.insert("bar".to_string(), "buzz".to_string());
+        let meta = FieldMetadata::from(meta);
+
+        let field = Field::new("foo", DataType::Int32, true);
+        let field = meta.add_to_field(field);
+        let field = Arc::new(field);
+
+        let expr = Expr::ScalarVariable(field, vec!["foo".to_string()]);
+
+        let schema = MockExprSchema::new();
+
+        assert_eq!(meta, expr.metadata(&schema).unwrap());
+    }
 }
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 2b7cc9d46ad34..b02254bb7d486 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 //! [DataFusion](https://github.com/apache/datafusion)
 //! is an extensible query execution framework that uses
@@ -70,6 +72,7 @@ pub mod async_udf;
 pub mod statistics {
     pub use datafusion_expr_common::statistics::*;
 }
+mod predicate_bounds;
 pub mod ptr_eq;
 pub mod test;
 pub mod tree_node;
@@ -81,16 +84,16 @@ pub mod window_frame;
 pub mod window_state;
 
 pub use datafusion_doc::{
-    aggregate_doc_sections, scalar_doc_sections, window_doc_sections, DocSection,
-    Documentation, DocumentationBuilder,
+    DocSection, Documentation, DocumentationBuilder, aggregate_doc_sections,
+    scalar_doc_sections, window_doc_sections,
 };
 pub use datafusion_expr_common::accumulator::Accumulator;
 pub use datafusion_expr_common::columnar_value::ColumnarValue;
 pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 pub use datafusion_expr_common::operator::Operator;
 pub use datafusion_expr_common::signature::{
-    ArrayFunctionArgument, ArrayFunctionSignature, Coercion, Signature, TypeSignature,
-    TypeSignatureClass, Volatility, TIMEZONE_WILDCARD,
+    ArrayFunctionArgument, ArrayFunctionSignature, Coercion, Signature,
+    TIMEZONE_WILDCARD, TypeSignature, TypeSignatureClass, Volatility,
 };
 pub use datafusion_expr_common::type_coercion::binary;
 pub use expr::{
@@ -104,7 +107,7 @@ pub use function::{
     ScalarFunctionImplementation, StateTypeFunction,
 };
 pub use literal::{
-    lit, lit_timestamp_nano, lit_with_metadata, Literal, TimestampLiteral,
+    Literal, TimestampLiteral, lit, lit_timestamp_nano, lit_with_metadata,
 };
 pub use logical_plan::*;
 pub use partition_evaluator::PartitionEvaluator;
@@ -112,10 +115,10 @@ pub use partition_evaluator::PartitionEvaluator;
 pub use sqlparser;
 pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
 pub use udaf::{
+    AggregateUDF, AggregateUDFImpl, ReversedUDAF, SetMonotonicity, StatisticsArgs,
     udaf_default_display_name, udaf_default_human_display, udaf_default_return_field,
     udaf_default_schema_name, udaf_default_window_function_display_name,
-    udaf_default_window_function_schema_name, AggregateUDF, AggregateUDFImpl,
-    ReversedUDAF, SetMonotonicity, StatisticsArgs,
+    udaf_default_window_function_schema_name,
 };
 pub use udf::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl};
 pub use udwf::{LimitEffect, ReversedUDWF, WindowUDF, WindowUDFImpl};
diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs
index 335d7b471f5fe..2e2980d607648 100644
--- a/datafusion/expr/src/literal.rs
+++ b/datafusion/expr/src/literal.rs
@@ -18,13 +18,15 @@
 //! Literal module contains foundational types that are used to represent literals in DataFusion.
 
 use crate::Expr;
-use datafusion_common::{metadata::FieldMetadata, ScalarValue};
+use datafusion_common::{ScalarValue, metadata::FieldMetadata};
 
 /// Create a literal expression
+#[expect(clippy::needless_pass_by_value)]
 pub fn lit<T: Literal>(n: T) -> Expr {
     n.lit()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> Expr {
     let Some(metadata) = metadata else {
         return n.lit();
@@ -45,6 +47,7 @@ pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> E
 }
 
 /// Create a literal timestamp expression
+#[expect(clippy::needless_pass_by_value)]
 pub fn lit_timestamp_nano<T: TimestampLiteral>(n: T) -> Expr {
     n.lit_timestamp_nano()
 }
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index b9afd894d77d3..6f654428e41a1 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -44,8 +44,8 @@ use crate::utils::{
     group_window_expr_by_sort_keys,
 };
 use crate::{
-    and, binary_expr, lit, DmlStatement, ExplainOption, Expr, ExprSchemable, Operator,
-    RecursiveQuery, Statement, TableProviderFilterPushDown, TableSource, WriteOp,
+    DmlStatement, ExplainOption, Expr, ExprSchemable, Operator, RecursiveQuery,
+    Statement, TableProviderFilterPushDown, TableSource, WriteOp, and, binary_expr, lit,
 };
 
 use super::dml::InsertOp;
@@ -55,9 +55,10 @@ use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
-    exec_err, get_target_functional_dependencies, internal_datafusion_err, not_impl_err,
-    plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef,
-    NullEquality, Result, ScalarValue, TableReference, ToDFSchema, UnnestOptions,
+    Column, Constraints, DFSchema, DFSchemaRef, NullEquality, Result, ScalarValue,
+    TableReference, ToDFSchema, UnnestOptions, exec_err,
+    get_target_functional_dependencies, internal_datafusion_err, plan_datafusion_err,
+    plan_err,
 };
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
 
@@ -179,19 +180,14 @@ impl LogicalPlanBuilder {
         recursive_term: LogicalPlan,
         is_distinct: bool,
     ) -> Result<Self> {
-        // TODO: we need to do a bunch of validation here. Maybe more.
-        if is_distinct {
-            return not_impl_err!(
-                "Recursive queries with a distinct 'UNION' (in which the previous iteration's results will be de-duplicated) is not supported"
-            );
-        }
         // Ensure that the static term and the recursive term have the same number of fields
         let static_fields_len = self.plan.schema().fields().len();
         let recursive_fields_len = recursive_term.schema().fields().len();
         if static_fields_len != recursive_fields_len {
             return plan_err!(
                 "Non-recursive term and recursive term must have the same number of columns ({} != {})",
-                static_fields_len, recursive_fields_len
+                static_fields_len,
+                recursive_fields_len
             );
         }
         // Ensure that the recursive term has the same field types as the static term
@@ -312,7 +308,11 @@ impl LogicalPlanBuilder {
                 let metadata = value.metadata(&schema)?;
                 if let Some(ref cm) = common_metadata {
                     if &metadata != cm {
-                        return plan_err!("Inconsistent metadata across values list at row {i} column {j}. Was {:?} but found {:?}", cm, metadata);
+                        return plan_err!(
+                            "Inconsistent metadata across values list at row {i} column {j}. Was {:?} but found {:?}",
+                            cm,
+                            metadata
+                        );
                     }
                 } else {
                     common_metadata = Some(metadata.clone());
@@ -326,7 +326,9 @@ impl LogicalPlanBuilder {
                     // get common type of each column values.
                     let data_types = vec![prev_type.clone(), data_type.clone()];
                     let Some(new_type) = type_union_resolution(&data_types) else {
-                        return plan_err!("Inconsistent data type across values list at row {i} column {j}. Was {prev_type} but found {data_type}");
+                        return plan_err!(
+                            "Inconsistent data type across values list at row {i} column {j}. Was {prev_type} but found {data_type}"
+                        );
                     };
                     common_type = Some(new_type);
                 } else {
@@ -513,29 +515,27 @@ impl LogicalPlanBuilder {
             TableScan::try_new(table_name, table_source, projection, filters, fetch)?;
 
         // Inline TableScan
-        if table_scan.filters.is_empty() {
-            if let Some(p) = table_scan.source.get_logical_plan() {
-                let sub_plan = p.into_owned();
-
-                if let Some(proj) = table_scan.projection {
-                    let projection_exprs = proj
-                        .into_iter()
-                        .map(|i| {
-                            Expr::Column(Column::from(
-                                sub_plan.schema().qualified_field(i),
-                            ))
-                        })
-                        .collect::<Vec<_>>();
-                    return Self::new(sub_plan)
-                        .project(projection_exprs)?
-                        .alias(table_scan.table_name);
-                }
+        if table_scan.filters.is_empty()
+            && let Some(p) = table_scan.source.get_logical_plan()
+        {
+            let sub_plan = p.into_owned();
 
-                // Ensures that the reference to the inlined table remains the
-                // same, meaning we don't have to change any of the parent nodes
-                // that reference this table.
-                return Self::new(sub_plan).alias(table_scan.table_name);
+            if let Some(proj) = table_scan.projection {
+                let projection_exprs = proj
+                    .into_iter()
+                    .map(|i| {
+                        Expr::Column(Column::from(sub_plan.schema().qualified_field(i)))
+                    })
+                    .collect::<Vec<_>>();
+                return Self::new(sub_plan)
+                    .project(projection_exprs)?
+                    .alias(table_scan.table_name);
             }
+
+            // Ensures that the reference to the inlined table remains the
+            // same, meaning we don't have to change any of the parent nodes
+            // that reference this table.
+            return Self::new(sub_plan).alias(table_scan.table_name);
         }
 
         Ok(Self::new(LogicalPlan::TableScan(table_scan)))
@@ -771,7 +771,9 @@ impl LogicalPlanBuilder {
             .map(|col| col.flat_name())
             .collect::<String>();
 
-        plan_err!("For SELECT DISTINCT, ORDER BY expressions {missing_col_names} must appear in select list")
+        plan_err!(
+            "For SELECT DISTINCT, ORDER BY expressions {missing_col_names} must appear in select list"
+        )
     }
 
     /// Apply a sort by provided expressions with default direction
@@ -1350,6 +1352,15 @@ impl LogicalPlanBuilder {
             );
         }
 
+        // Requalify sides if needed to avoid duplicate qualified field names
+        // (e.g., when both sides reference the same table)
+        let left_builder = LogicalPlanBuilder::from(left_plan);
+        let right_builder = LogicalPlanBuilder::from(right_plan);
+        let (left_builder, right_builder, _requalified) =
+            requalify_sides_if_needed(left_builder, right_builder)?;
+        let left_plan = left_builder.build()?;
+        let right_plan = right_builder.build()?;
+
         let join_keys = left_plan
             .schema()
             .fields()
@@ -1729,23 +1740,61 @@ pub fn requalify_sides_if_needed(
 ) -> Result<(LogicalPlanBuilder, LogicalPlanBuilder, bool)> {
     let left_cols = left.schema().columns();
     let right_cols = right.schema().columns();
-    if left_cols.iter().any(|l| {
-        right_cols.iter().any(|r| {
-            l == r || (l.name == r.name && (l.relation.is_none() || r.relation.is_none()))
-        })
-    }) {
-        // These names have no connection to the original plan, but they'll make the columns
-        // (mostly) unique.
-        Ok((
-            left.alias(TableReference::bare("left"))?,
-            right.alias(TableReference::bare("right"))?,
-            true,
-        ))
-    } else {
-        Ok((left, right, false))
+
+    // Requalify if merging the schemas would cause an error during join.
+    // This can happen in several cases:
+    // 1. Duplicate qualified fields: both sides have same relation.name
+    // 2. Duplicate unqualified fields: both sides have same unqualified name
+    // 3. Ambiguous reference: one side qualified, other unqualified, same name
+    //
+    // Implementation note: This uses a simple O(n*m) nested loop rather than
+    // a HashMap-based O(n+m) approach. The nested loop is preferred because:
+    // - Schemas are typically small (in TPCH benchmark, max is 16 columns),
+    //   so n*m is negligible
+    // - Early return on first conflict makes common case very fast
+    // - Code is simpler and easier to reason about
+    // - Called only during plan construction, not in execution hot path
+    for l in &left_cols {
+        for r in &right_cols {
+            if l.name != r.name {
+                continue;
+            }
+
+            // Same name - check if this would cause a conflict
+            match (&l.relation, &r.relation) {
+                // Both qualified with same relation - duplicate qualified field
+                (Some(l_rel), Some(r_rel)) if l_rel == r_rel => {
+                    return Ok((
+                        left.alias(TableReference::bare("left"))?,
+                        right.alias(TableReference::bare("right"))?,
+                        true,
+                    ));
+                }
+                // Both unqualified - duplicate unqualified field
+                (None, None) => {
+                    return Ok((
+                        left.alias(TableReference::bare("left"))?,
+                        right.alias(TableReference::bare("right"))?,
+                        true,
+                    ));
+                }
+                // One qualified, one not - ambiguous reference
+                (Some(_), None) | (None, Some(_)) => {
+                    return Ok((
+                        left.alias(TableReference::bare("left"))?,
+                        right.alias(TableReference::bare("right"))?,
+                        true,
+                    ));
+                }
+                // Different qualifiers - OK, no conflict
+                _ => {}
+            }
+        }
     }
-}
 
+    // No conflicts found
+    Ok((left, right, false))
+}
 /// Add additional "synthetic" group by expressions based on functional
 /// dependencies.
 ///
@@ -1926,15 +1975,14 @@ fn replace_columns(
     replace: &PlannedReplaceSelectItem,
 ) -> Result<Vec<Expr>> {
     for expr in exprs.iter_mut() {
-        if let Expr::Column(Column { name, .. }) = expr {
-            if let Some((_, new_expr)) = replace
+        if let Expr::Column(Column { name, .. }) = expr
+            && let Some((_, new_expr)) = replace
                 .items()
                 .iter()
                 .zip(replace.expressions().iter())
                 .find(|(item, _)| item.column_name.value == *name)
-            {
-                *expr = new_expr.clone().alias(name.clone())
-            }
+        {
+            *expr = new_expr.clone().alias(name.clone())
         }
     }
     Ok(exprs)
@@ -2828,11 +2876,13 @@ mod tests {
                 .into_iter()
                 .collect();
         let metadata2 = FieldMetadata::from(metadata2);
-        assert!(LogicalPlanBuilder::values(vec![
-            vec![lit_with_metadata(1, Some(metadata.clone()))],
-            vec![lit_with_metadata(2, Some(metadata2.clone()))],
-        ])
-        .is_err());
+        assert!(
+            LogicalPlanBuilder::values(vec![
+                vec![lit_with_metadata(1, Some(metadata.clone()))],
+                vec![lit_with_metadata(2, Some(metadata2.clone()))],
+            ])
+            .is_err()
+        );
 
         Ok(())
     }
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 74fe7a2d009d0..8a46e842a861e 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -132,7 +132,7 @@ impl DdlStatement {
             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                 match self.0 {
                     DdlStatement::CreateExternalTable(CreateExternalTable {
-                        ref name,
+                        name,
                         constraints,
                         ..
                     }) => {
@@ -186,7 +186,10 @@ impl DdlStatement {
                         cascade,
                         ..
                     }) => {
-                        write!(f, "DropCatalogSchema: {name:?} if not exist:={if_exists} cascade:={cascade}")
+                        write!(
+                            f,
+                            "DropCatalogSchema: {name:?} if not exist:={if_exists} cascade:={cascade}"
+                        )
                     }
                     DdlStatement::CreateFunction(CreateFunction { name, .. }) => {
                         write!(f, "CreateFunction: name {name:?}")
@@ -234,6 +237,158 @@ pub struct CreateExternalTable {
     pub column_defaults: HashMap<String, Expr>,
 }
 
+impl CreateExternalTable {
+    /// Creates a builder for [`CreateExternalTable`] with required fields.
+    ///
+    /// # Arguments
+    /// * `name` - The table name
+    /// * `location` - The physical location of the table files
+    /// * `file_type` - The file type (e.g., "parquet", "csv", "json")
+    /// * `schema` - The table schema
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion_expr::CreateExternalTable;
+    /// # use datafusion_common::{DFSchema, TableReference};
+    /// # use std::sync::Arc;
+    /// let table = CreateExternalTable::builder(
+    ///     TableReference::bare("my_table"),
+    ///     "/path/to/data",
+    ///     "parquet",
+    ///     Arc::new(DFSchema::empty())
+    /// ).build();
+    /// ```
+    pub fn builder(
+        name: impl Into<TableReference>,
+        location: impl Into<String>,
+        file_type: impl Into<String>,
+        schema: DFSchemaRef,
+    ) -> CreateExternalTableBuilder {
+        CreateExternalTableBuilder {
+            name: name.into(),
+            location: location.into(),
+            file_type: file_type.into(),
+            schema,
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            or_replace: false,
+            temporary: false,
+            definition: None,
+            order_exprs: vec![],
+            unbounded: false,
+            options: HashMap::new(),
+            constraints: Default::default(),
+            column_defaults: HashMap::new(),
+        }
+    }
+}
+
+/// Builder for [`CreateExternalTable`] that provides a fluent API for construction.
+///
+/// Created via [`CreateExternalTable::builder`].
+#[derive(Debug, Clone)]
+pub struct CreateExternalTableBuilder {
+    name: TableReference,
+    location: String,
+    file_type: String,
+    schema: DFSchemaRef,
+    table_partition_cols: Vec<String>,
+    if_not_exists: bool,
+    or_replace: bool,
+    temporary: bool,
+    definition: Option<String>,
+    order_exprs: Vec<Vec<Sort>>,
+    unbounded: bool,
+    options: HashMap<String, String>,
+    constraints: Constraints,
+    column_defaults: HashMap<String, Expr>,
+}
+
+impl CreateExternalTableBuilder {
+    /// Set the partition columns
+    pub fn with_partition_cols(mut self, cols: Vec<String>) -> Self {
+        self.table_partition_cols = cols;
+        self
+    }
+
+    /// Set the if_not_exists flag
+    pub fn with_if_not_exists(mut self, if_not_exists: bool) -> Self {
+        self.if_not_exists = if_not_exists;
+        self
+    }
+
+    /// Set the or_replace flag
+    pub fn with_or_replace(mut self, or_replace: bool) -> Self {
+        self.or_replace = or_replace;
+        self
+    }
+
+    /// Set the temporary flag
+    pub fn with_temporary(mut self, temporary: bool) -> Self {
+        self.temporary = temporary;
+        self
+    }
+
+    /// Set the SQL definition
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
+        self
+    }
+
+    /// Set the order expressions
+    pub fn with_order_exprs(mut self, order_exprs: Vec<Vec<Sort>>) -> Self {
+        self.order_exprs = order_exprs;
+        self
+    }
+
+    /// Set the unbounded flag
+    pub fn with_unbounded(mut self, unbounded: bool) -> Self {
+        self.unbounded = unbounded;
+        self
+    }
+
+    /// Set the table options
+    pub fn with_options(mut self, options: HashMap<String, String>) -> Self {
+        self.options = options;
+        self
+    }
+
+    /// Set the table constraints
+    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
+        self.constraints = constraints;
+        self
+    }
+
+    /// Set the column defaults
+    pub fn with_column_defaults(
+        mut self,
+        column_defaults: HashMap<String, Expr>,
+    ) -> Self {
+        self.column_defaults = column_defaults;
+        self
+    }
+
+    /// Build the [`CreateExternalTable`]
+    pub fn build(self) -> CreateExternalTable {
+        CreateExternalTable {
+            schema: self.schema,
+            name: self.name,
+            location: self.location,
+            file_type: self.file_type,
+            table_partition_cols: self.table_partition_cols,
+            if_not_exists: self.if_not_exists,
+            or_replace: self.or_replace,
+            temporary: self.temporary,
+            definition: self.definition,
+            order_exprs: self.order_exprs,
+            unbounded: self.unbounded,
+            options: self.options,
+            constraints: self.constraints,
+            column_defaults: self.column_defaults,
+        }
+    }
+}
+
 // Hashing refers to a subset of fields considered in PartialEq.
 impl Hash for CreateExternalTable {
     fn hash<H: Hasher>(&self, state: &mut H) {
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index b60126335598f..480974b055d11 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -21,17 +21,17 @@ use std::collections::HashMap;
 use std::fmt;
 
 use crate::{
-    expr_vec_fmt, Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Expr,
-    Filter, Join, Limit, LogicalPlan, Partitioning, Projection, RecursiveQuery,
-    Repartition, Sort, Subquery, SubqueryAlias, TableProviderFilterPushDown, TableScan,
-    Unnest, Values, Window,
+    Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Expr, Filter, Join,
+    Limit, LogicalPlan, Partitioning, Projection, RecursiveQuery, Repartition, Sort,
+    Subquery, SubqueryAlias, TableProviderFilterPushDown, TableScan, Unnest, Values,
+    Window, expr_vec_fmt,
 };
 
 use crate::dml::CopyTo;
 use arrow::datatypes::Schema;
 use datafusion_common::display::GraphvizBuilder;
 use datafusion_common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
-use datafusion_common::{internal_datafusion_err, Column, DataFusionError};
+use datafusion_common::{Column, DataFusionError, internal_datafusion_err};
 use serde_json::json;
 
 /// Formats plans with a single line per node. For example:
@@ -319,7 +319,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     "Is Distinct": is_distinct,
                 })
             }
-            LogicalPlan::Values(Values { ref values, .. }) => {
+            LogicalPlan::Values(Values { values, .. }) => {
                 let str_values = values
                     .iter()
                     // limit to only 5 values to avoid horrible display
@@ -344,10 +344,10 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                 })
             }
             LogicalPlan::TableScan(TableScan {
-                ref source,
-                ref table_name,
-                ref filters,
-                ref fetch,
+                source,
+                table_name,
+                filters,
+                fetch,
                 ..
             }) => {
                 let mut object = json!({
@@ -403,7 +403,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
 
                 object
             }
-            LogicalPlan::Projection(Projection { ref expr, .. }) => {
+            LogicalPlan::Projection(Projection { expr, .. }) => {
                 json!({
                     "Node Type": "Projection",
                     "Expressions": expr.iter().map(|e| e.to_string()).collect::<Vec<_>>()
@@ -443,25 +443,22 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                 })
             }
             LogicalPlan::Filter(Filter {
-                predicate: ref expr,
-                ..
+                predicate: expr, ..
             }) => {
                 json!({
                     "Node Type": "Filter",
                     "Condition": format!("{}", expr)
                 })
             }
-            LogicalPlan::Window(Window {
-                ref window_expr, ..
-            }) => {
+            LogicalPlan::Window(Window { window_expr, .. }) => {
                 json!({
                     "Node Type": "WindowAggr",
                     "Expressions": expr_vec_fmt!(window_expr)
                 })
             }
             LogicalPlan::Aggregate(Aggregate {
-                ref group_expr,
-                ref aggr_expr,
+                group_expr,
+                aggr_expr,
                 ..
             }) => {
                 json!({
@@ -483,7 +480,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                 object
             }
             LogicalPlan::Join(Join {
-                on: ref keys,
+                on: keys,
                 filter,
                 join_constraint,
                 join_type,
@@ -534,11 +531,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     })
                 }
             },
-            LogicalPlan::Limit(Limit {
-                ref skip,
-                ref fetch,
-                ..
-            }) => {
+            LogicalPlan::Limit(Limit { skip, fetch, .. }) => {
                 let mut object = serde_json::json!(
                     {
                         "Node Type": "Limit",
@@ -557,7 +550,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     "Node Type": "Subquery"
                 })
             }
-            LogicalPlan::SubqueryAlias(SubqueryAlias { ref alias, .. }) => {
+            LogicalPlan::SubqueryAlias(SubqueryAlias { alias, .. }) => {
                 json!({
                     "Node Type": "Subquery",
                     "Alias": alias.table(),
diff --git a/datafusion/expr/src/logical_plan/invariants.rs b/datafusion/expr/src/logical_plan/invariants.rs
index ccdf9e444b8fd..762491a255cbc 100644
--- a/datafusion/expr/src/logical_plan/invariants.rs
+++ b/datafusion/expr/src/logical_plan/invariants.rs
@@ -16,16 +16,15 @@
 // under the License.
 
 use datafusion_common::{
-    internal_err, plan_err,
+    DFSchemaRef, Result, assert_or_internal_err, plan_err,
     tree_node::{TreeNode, TreeNodeRecursion},
-    DFSchemaRef, Result,
 };
 
 use crate::{
+    Aggregate, Expr, Filter, Join, JoinType, LogicalPlan, Window,
     expr::{Exists, InSubquery},
     expr_rewriter::strip_outer_reference,
     utils::{collect_subquery_cols, split_conjunction},
-    Aggregate, Expr, Filter, Join, JoinType, LogicalPlan, Window,
 };
 
 use super::Extension;
@@ -114,15 +113,13 @@ fn assert_valid_semantic_plan(plan: &LogicalPlan) -> Result<()> {
 pub fn assert_expected_schema(schema: &DFSchemaRef, plan: &LogicalPlan) -> Result<()> {
     let compatible = plan.schema().logically_equivalent_names_and_types(schema);
 
-    if !compatible {
-        internal_err!(
-            "Failed due to a difference in schemas: original schema: {:?}, new schema: {:?}",
-            schema,
-            plan.schema()
-        )
-    } else {
-        Ok(())
-    }
+    assert_or_internal_err!(
+        compatible,
+        "Failed due to a difference in schemas: original schema: {:?}, new schema: {:?}",
+        schema,
+        plan.schema()
+    );
+    Ok(())
 }
 
 /// Asserts that the subqueries are structured properly with valid node placement.
@@ -200,9 +197,12 @@ pub fn check_subquery_expr(
                 }
             }?;
             match outer_plan {
-                LogicalPlan::Projection(_)
-                | LogicalPlan::Filter(_) => Ok(()),
-                LogicalPlan::Aggregate(Aggregate { group_expr, aggr_expr, .. }) => {
+                LogicalPlan::Projection(_) | LogicalPlan::Filter(_) => Ok(()),
+                LogicalPlan::Aggregate(Aggregate {
+                    group_expr,
+                    aggr_expr,
+                    ..
+                }) => {
                     if group_expr.contains(expr) && !aggr_expr.contains(expr) {
                         // TODO revisit this validation logic
                         plan_err!(
@@ -214,7 +214,7 @@ pub fn check_subquery_expr(
                 }
                 _ => plan_err!(
                     "Correlated scalar subquery can only be used in Projection, Filter, Aggregate plan nodes"
-                )
+                ),
             }?;
         }
         check_correlations_in_subquery(inner_plan)
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index 7de2fd117487a..c2b01868c97f3 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -21,15 +21,15 @@ pub mod display;
 pub mod dml;
 mod extension;
 pub(crate) mod invariants;
-pub use invariants::{assert_expected_schema, check_subquery_expr, InvariantLevel};
+pub use invariants::{InvariantLevel, assert_expected_schema, check_subquery_expr};
 mod plan;
 mod statement;
 pub mod tree_node;
 
 pub use builder::{
+    LogicalPlanBuilder, LogicalPlanBuilderOptions, LogicalTableSource, UNNAMED_TABLE,
     build_join_schema, requalify_sides_if_needed, table_scan, union,
-    wrap_projection_for_join_if_necessary, LogicalPlanBuilder, LogicalPlanBuilderOptions,
-    LogicalTableSource, UNNAMED_TABLE,
+    wrap_projection_for_join_if_necessary,
 };
 pub use ddl::{
     CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
@@ -38,15 +38,17 @@ pub use ddl::{
 };
 pub use dml::{DmlStatement, WriteOp};
 pub use plan::{
-    projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct,
-    DistinctOn, EmptyRelation, Explain, ExplainOption, Extension, FetchType, Filter,
-    Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType,
-    Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
+    Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct, DistinctOn,
+    EmptyRelation, Explain, ExplainOption, Extension, FetchType, Filter, Join,
+    JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Projection,
+    RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
     SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
+    projection_schema,
 };
 pub use statement::{
-    Deallocate, Execute, Prepare, SetVariable, Statement, TransactionAccessMode,
-    TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
+    Deallocate, Execute, Prepare, ResetVariable, SetVariable, Statement,
+    TransactionAccessMode, TransactionConclusion, TransactionEnd,
+    TransactionIsolationLevel, TransactionStart,
 };
 
 pub use datafusion_common::format::ExplainFormat;
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 0b89a5250902e..4219c24bfc9c9 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -23,19 +23,19 @@ use std::fmt::{self, Debug, Display, Formatter};
 use std::hash::{Hash, Hasher};
 use std::sync::{Arc, LazyLock};
 
+use super::DdlStatement;
 use super::dml::CopyTo;
 use super::invariants::{
-    assert_always_invariants_at_current_node, assert_executable_invariants,
-    InvariantLevel,
+    InvariantLevel, assert_always_invariants_at_current_node,
+    assert_executable_invariants,
 };
-use super::DdlStatement;
 use crate::builder::{unique_field_aliases, unnest_with_options};
 use crate::expr::{
-    intersect_metadata_for_union, Alias, Placeholder, Sort as SortExpr, WindowFunction,
-    WindowFunctionParams,
+    Alias, Placeholder, Sort as SortExpr, WindowFunction, WindowFunctionParams,
+    intersect_metadata_for_union,
 };
 use crate::expr_rewriter::{
-    create_col_from_scalar_expr, normalize_cols, normalize_sorts, NamePreserver,
+    NamePreserver, create_col_from_scalar_expr, normalize_cols, normalize_sorts,
 };
 use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
@@ -45,10 +45,9 @@ use crate::utils::{
     grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction,
 };
 use crate::{
-    build_join_schema, expr_vec_fmt, requalify_sides_if_needed, BinaryExpr,
-    CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable, LogicalPlanBuilder,
-    Operator, Prepare, TableProviderFilterPushDown, TableSource,
-    WindowFunctionDefinition,
+    BinaryExpr, CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable,
+    LogicalPlanBuilder, Operator, Prepare, TableProviderFilterPushDown, TableSource,
+    WindowFunctionDefinition, build_join_schema, expr_vec_fmt, requalify_sides_if_needed,
 };
 
 use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
@@ -59,10 +58,10 @@ use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
 };
 use datafusion_common::{
-    aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
-    DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
-    FunctionalDependencies, NullEquality, ParamValues, Result, ScalarValue, Spans,
-    TableReference, UnnestOptions,
+    Column, Constraints, DFSchema, DFSchemaRef, DataFusionError, Dependency,
+    FunctionalDependence, FunctionalDependencies, NullEquality, ParamValues, Result,
+    ScalarValue, Spans, TableReference, UnnestOptions, aggregate_functional_dependencies,
+    assert_eq_or_internal_err, assert_or_internal_err, internal_err, plan_err,
 };
 use indexmap::IndexSet;
 
@@ -925,7 +924,9 @@ impl LogicalPlan {
                 let mut iter = expr.into_iter();
                 while let Some(left) = iter.next() {
                     let Some(right) = iter.next() else {
-                        internal_err!("Expected a pair of expressions to construct the join on expression")?
+                        internal_err!(
+                            "Expected a pair of expressions to construct the join on expression"
+                        )?
                     };
 
                     // SimplifyExpression rule may add alias to the equi_expr.
@@ -965,13 +966,13 @@ impl LogicalPlan {
             }
             LogicalPlan::Limit(Limit { skip, fetch, .. }) => {
                 let old_expr_len = skip.iter().chain(fetch.iter()).count();
-                if old_expr_len != expr.len() {
-                    return internal_err!(
-                        "Invalid number of new Limit expressions: expected {}, got {}",
-                        old_expr_len,
-                        expr.len()
-                    );
-                }
+                assert_eq_or_internal_err!(
+                    old_expr_len,
+                    expr.len(),
+                    "Invalid number of new Limit expressions: expected {}, got {}",
+                    old_expr_len,
+                    expr.len()
+                );
                 // `LogicalPlan::expressions()` returns in [skip, fetch] order, so we can pop from the end.
                 let new_fetch = fetch.as_ref().and_then(|_| expr.pop());
                 let new_skip = skip.as_ref().and_then(|_| expr.pop());
@@ -1053,7 +1054,10 @@ impl LogicalPlan {
                         let input = self.only_input(inputs)?;
                         let sort_expr = expr.split_off(on_expr.len() + select_expr.len());
                         let select_expr = expr.split_off(on_expr.len());
-                        assert!(sort_expr.is_empty(), "with_new_exprs for Distinct does not support sort expressions");
+                        assert!(
+                            sort_expr.is_empty(),
+                            "with_new_exprs for Distinct does not support sort expressions"
+                        );
                         Distinct::On(DistinctOn::try_new(
                             expr,
                             select_expr,
@@ -1158,9 +1162,11 @@ impl LogicalPlan {
     #[inline]
     #[expect(clippy::needless_pass_by_value)] // expr is moved intentionally to ensure it's not used again
     fn assert_no_expressions(&self, expr: Vec<Expr>) -> Result<()> {
-        if !expr.is_empty() {
-            return internal_err!("{self:?} should have no exprs, got {:?}", expr);
-        }
+        assert_or_internal_err!(
+            expr.is_empty(),
+            "{self:?} should have no exprs, got {:?}",
+            expr
+        );
         Ok(())
     }
 
@@ -1168,33 +1174,35 @@ impl LogicalPlan {
     #[inline]
     #[expect(clippy::needless_pass_by_value)] // inputs is moved intentionally to ensure it's not used again
     fn assert_no_inputs(&self, inputs: Vec<LogicalPlan>) -> Result<()> {
-        if !inputs.is_empty() {
-            return internal_err!("{self:?} should have no inputs, got: {:?}", inputs);
-        }
+        assert_or_internal_err!(
+            inputs.is_empty(),
+            "{self:?} should have no inputs, got: {:?}",
+            inputs
+        );
         Ok(())
     }
 
     /// Helper for [Self::with_new_exprs] to use when exactly one expression is expected.
     #[inline]
     fn only_expr(&self, mut expr: Vec<Expr>) -> Result<Expr> {
-        if expr.len() != 1 {
-            return internal_err!(
-                "{self:?} should have exactly one expr, got {:?}",
-                expr
-            );
-        }
+        assert_eq_or_internal_err!(
+            expr.len(),
+            1,
+            "{self:?} should have exactly one expr, got {:?}",
+            &expr
+        );
         Ok(expr.remove(0))
     }
 
     /// Helper for [Self::with_new_exprs] to use when exactly one input is expected.
     #[inline]
     fn only_input(&self, mut inputs: Vec<LogicalPlan>) -> Result<LogicalPlan> {
-        if inputs.len() != 1 {
-            return internal_err!(
-                "{self:?} should have exactly one input, got {:?}",
-                inputs
-            );
-        }
+        assert_eq_or_internal_err!(
+            inputs.len(),
+            1,
+            "{self:?} should have exactly one input, got {:?}",
+            &inputs
+        );
         Ok(inputs.remove(0))
     }
 
@@ -1204,12 +1212,12 @@ impl LogicalPlan {
         &self,
         mut inputs: Vec<LogicalPlan>,
     ) -> Result<(LogicalPlan, LogicalPlan)> {
-        if inputs.len() != 2 {
-            return internal_err!(
-                "{self:?} should have exactly two inputs, got {:?}",
-                inputs
-            );
-        }
+        assert_eq_or_internal_err!(
+            inputs.len(),
+            2,
+            "{self:?} should have exactly two inputs, got {:?}",
+            &inputs
+        );
         let right = inputs.remove(1);
         let left = inputs.remove(0);
         Ok((left, right))
@@ -1471,11 +1479,30 @@ impl LogicalPlan {
                     // Preserve name to avoid breaking column references to this expression
                     Ok(transformed_expr.update_data(|expr| original_name.restore(expr)))
                 }
-            })
+            })?
+            .map_data(|plan| plan.update_schema_data_type())
         })
         .map(|res| res.data)
     }
 
+    /// Recompute schema fields' data type after replacing params, ensuring fields data type can be
+    /// updated according to the new parameters.
+    ///
+    /// Unlike `recompute_schema()`, this method rebuilds VALUES plans entirely to properly infer
+    /// types types from literal values after placeholder substitution.
+    fn update_schema_data_type(self) -> Result<LogicalPlan> {
+        match self {
+            // Build `LogicalPlan::Values` from the values for type inference.
+            // We can't use `recompute_schema` because it skips recomputing for
+            // `LogicalPlan::Values`.
+            LogicalPlan::Values(Values { values, schema: _ }) => {
+                LogicalPlanBuilder::values(values)?.build()
+            }
+            // other plans can just use `recompute_schema` directly.
+            plan => plan.recompute_schema(),
+        }
+    }
+
     /// Walk the logical plan, find any `Placeholder` tokens, and return a set of their names.
     pub fn get_parameter_names(&self) -> Result<HashSet<String>> {
         let mut param_names = HashSet::new();
@@ -1744,16 +1771,19 @@ impl LogicalPlan {
         impl Display for Wrapper<'_> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
                 match self.0 {
-                    LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row, schema: _ }) => {
+                    LogicalPlan::EmptyRelation(EmptyRelation {
+                        produce_one_row,
+                        schema: _,
+                    }) => {
                         let rows = if *produce_one_row { 1 } else { 0 };
                         write!(f, "EmptyRelation: rows={rows}")
-                    },
+                    }
                     LogicalPlan::RecursiveQuery(RecursiveQuery {
                         is_distinct, ..
                     }) => {
                         write!(f, "RecursiveQuery: is_distinct={is_distinct}")
                     }
-                    LogicalPlan::Values(Values { ref values, .. }) => {
+                    LogicalPlan::Values(Values { values, .. }) => {
                         let str_values: Vec<_> = values
                             .iter()
                             // limit to only 5 values to avoid horrible display
@@ -1773,11 +1803,11 @@ impl LogicalPlan {
                     }
 
                     LogicalPlan::TableScan(TableScan {
-                        ref source,
-                        ref table_name,
-                        ref projection,
-                        ref filters,
-                        ref fetch,
+                        source,
+                        table_name,
+                        projection,
+                        filters,
+                        fetch,
                         ..
                     }) => {
                         let projected_fields = match projection {
@@ -1847,7 +1877,7 @@ impl LogicalPlan {
 
                         Ok(())
                     }
-                    LogicalPlan::Projection(Projection { ref expr, .. }) => {
+                    LogicalPlan::Projection(Projection { expr, .. }) => {
                         write!(f, "Projection:")?;
                         for (i, expr_item) in expr.iter().enumerate() {
                             if i > 0 {
@@ -1873,18 +1903,19 @@ impl LogicalPlan {
                             .collect::<Vec<String>>()
                             .join(", ");
 
-                        write!(f, "CopyTo: format={} output_url={output_url} options: ({op_str})", file_type.get_ext())
+                        write!(
+                            f,
+                            "CopyTo: format={} output_url={output_url} options: ({op_str})",
+                            file_type.get_ext()
+                        )
                     }
                     LogicalPlan::Ddl(ddl) => {
                         write!(f, "{}", ddl.display())
                     }
                     LogicalPlan::Filter(Filter {
-                        predicate: ref expr,
-                        ..
+                        predicate: expr, ..
                     }) => write!(f, "Filter: {expr}"),
-                    LogicalPlan::Window(Window {
-                        ref window_expr, ..
-                    }) => {
+                    LogicalPlan::Window(Window { window_expr, .. }) => {
                         write!(
                             f,
                             "WindowAggr: windowExpr=[[{}]]",
@@ -1892,8 +1923,8 @@ impl LogicalPlan {
                         )
                     }
                     LogicalPlan::Aggregate(Aggregate {
-                        ref group_expr,
-                        ref aggr_expr,
+                        group_expr,
+                        aggr_expr,
                         ..
                     }) => write!(
                         f,
@@ -1916,7 +1947,7 @@ impl LogicalPlan {
                         Ok(())
                     }
                     LogicalPlan::Join(Join {
-                        on: ref keys,
+                        on: keys,
                         filter,
                         join_constraint,
                         join_type,
@@ -1928,7 +1959,10 @@ impl LogicalPlan {
                             .as_ref()
                             .map(|expr| format!(" Filter: {expr}"))
                             .unwrap_or_else(|| "".to_string());
-                        let join_type = if filter.is_none() && keys.is_empty() && matches!(join_type, JoinType::Inner) {
+                        let join_type = if filter.is_none()
+                            && keys.is_empty()
+                            && matches!(join_type, JoinType::Inner)
+                        {
                             "Cross".to_string()
                         } else {
                             join_type.to_string()
@@ -1985,22 +2019,25 @@ impl LogicalPlan {
                         // Attempt to display `skip` and `fetch` as literals if possible, otherwise as expressions.
                         let skip_str = match limit.get_skip_type() {
                             Ok(SkipType::Literal(n)) => n.to_string(),
-                            _ => limit.skip.as_ref().map_or_else(|| "None".to_string(), |x| x.to_string()),
+                            _ => limit
+                                .skip
+                                .as_ref()
+                                .map_or_else(|| "None".to_string(), |x| x.to_string()),
                         };
                         let fetch_str = match limit.get_fetch_type() {
                             Ok(FetchType::Literal(Some(n))) => n.to_string(),
                             Ok(FetchType::Literal(None)) => "None".to_string(),
-                            _ => limit.fetch.as_ref().map_or_else(|| "None".to_string(), |x| x.to_string())
+                            _ => limit
+                                .fetch
+                                .as_ref()
+                                .map_or_else(|| "None".to_string(), |x| x.to_string()),
                         };
-                        write!(
-                            f,
-                            "Limit: skip={skip_str}, fetch={fetch_str}",
-                        )
+                        write!(f, "Limit: skip={skip_str}, fetch={fetch_str}",)
                     }
                     LogicalPlan::Subquery(Subquery { .. }) => {
                         write!(f, "Subquery:")
                     }
-                    LogicalPlan::SubqueryAlias(SubqueryAlias { ref alias, .. }) => {
+                    LogicalPlan::SubqueryAlias(SubqueryAlias { alias, .. }) => {
                         write!(f, "SubqueryAlias: {alias}")
                     }
                     LogicalPlan::Statement(statement) => {
@@ -2018,7 +2055,11 @@ impl LogicalPlan {
                             "DistinctOn: on_expr=[[{}]], select_expr=[[{}]], sort_expr=[[{}]]",
                             expr_vec_fmt!(on_expr),
                             expr_vec_fmt!(select_expr),
-                            if let Some(sort_expr) = sort_expr { expr_vec_fmt!(sort_expr) } else { "".to_string() },
+                            if let Some(sort_expr) = sort_expr {
+                                expr_vec_fmt!(sort_expr)
+                            } else {
+                                "".to_string()
+                            },
                         ),
                     },
                     LogicalPlan::Explain { .. } => write!(f, "Explain"),
@@ -2031,22 +2072,31 @@ impl LogicalPlan {
                     LogicalPlan::Unnest(Unnest {
                         input: plan,
                         list_type_columns: list_col_indices,
-                        struct_type_columns: struct_col_indices, .. }) => {
+                        struct_type_columns: struct_col_indices,
+                        ..
+                    }) => {
                         let input_columns = plan.schema().columns();
                         let list_type_columns = list_col_indices
                             .iter()
-                            .map(|(i,unnest_info)|
-                                format!("{}|depth={}", &input_columns[*i].to_string(),
-                                unnest_info.depth))
+                            .map(|(i, unnest_info)| {
+                                format!(
+                                    "{}|depth={}",
+                                    &input_columns[*i].to_string(),
+                                    unnest_info.depth
+                                )
+                            })
                             .collect::<Vec<String>>();
                         let struct_type_columns = struct_col_indices
                             .iter()
                             .map(|i| &input_columns[*i])
                             .collect::<Vec<&Column>>();
                         // get items from input_columns indexed by list_col_indices
-                        write!(f, "Unnest: lists[{}] structs[{}]",
-                        expr_vec_fmt!(list_type_columns),
-                        expr_vec_fmt!(struct_type_columns))
+                        write!(
+                            f,
+                            "Unnest: lists[{}] structs[{}]",
+                            expr_vec_fmt!(list_type_columns),
+                            expr_vec_fmt!(struct_type_columns)
+                        )
                     }
                 }
             }
@@ -2188,7 +2238,11 @@ impl Projection {
         if !expr.iter().any(|e| matches!(e, Expr::Wildcard { .. }))
             && expr.len() != schema.fields().len()
         {
-            return plan_err!("Projection has mismatch between number of expressions ({}) and number of fields in schema ({})", expr.len(), schema.fields().len());
+            return plan_err!(
+                "Projection has mismatch between number of expressions ({}) and number of fields in schema ({})",
+                expr.len(),
+                schema.fields().len()
+            );
         }
         Ok(Self {
             expr,
@@ -2375,12 +2429,12 @@ impl Filter {
         // Note that it is not always possible to resolve the predicate expression during plan
         // construction (such as with correlated subqueries) so we make a best effort here and
         // ignore errors resolving the expression against the schema.
-        if let Ok(predicate_type) = predicate.get_type(input.schema()) {
-            if !Filter::is_allowed_filter_type(&predicate_type) {
-                return plan_err!(
-                    "Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
-                );
-            }
+        if let Ok(predicate_type) = predicate.get_type(input.schema())
+            && !Filter::is_allowed_filter_type(&predicate_type)
+        {
+            return plan_err!(
+                "Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
+            );
         }
 
         Ok(Self {
@@ -3181,6 +3235,7 @@ impl PartialOrd for Analyze {
 // TODO(clippy): This clippy `allow` should be removed if
 // the manual `PartialEq` is removed in favor of a derive.
 // (see `PartialEq` the impl for details.)
+#[allow(clippy::allow_attributes)]
 #[allow(clippy::derived_hash_with_manual_eq)]
 #[derive(Debug, Clone, Eq, Hash)]
 pub struct Extension {
@@ -3481,6 +3536,7 @@ impl Aggregate {
     ///
     /// This method should only be called when you are absolutely sure that the schema being
     /// provided is correct for the aggregate. If in doubt, call [try_new](Self::try_new) instead.
+    #[expect(clippy::needless_pass_by_value)]
     pub fn try_new_with_schema(
         input: Arc<LogicalPlan>,
         group_expr: Vec<Expr>,
@@ -4247,16 +4303,17 @@ mod tests {
     use super::*;
     use crate::builder::LogicalTableSource;
     use crate::logical_plan::table_scan;
+    use crate::select_expr::SelectExpr;
     use crate::test::function_stub::{count, count_udaf};
     use crate::{
-        binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery,
-        GroupingSet,
+        GroupingSet, binary_expr, col, exists, in_subquery, lit, placeholder,
+        scalar_subquery,
     };
     use datafusion_common::metadata::ScalarAndMetadata;
     use datafusion_common::tree_node::{
         TransformedResult, TreeNodeRewriter, TreeNodeVisitor,
     };
-    use datafusion_common::{not_impl_err, Constraint, ScalarValue};
+    use datafusion_common::{Constraint, ScalarValue, not_impl_err};
     use insta::{assert_debug_snapshot, assert_snapshot};
     use std::hash::DefaultHasher;
 
@@ -4825,6 +4882,35 @@ mod tests {
             .expect_err("prepared field metadata mismatch unexpectedly succeeded");
     }
 
+    #[test]
+    fn test_replace_placeholder_empty_relation_valid_schema() {
+        // SELECT $1, $2;
+        let plan = LogicalPlanBuilder::empty(false)
+            .project(vec![
+                SelectExpr::from(placeholder("$1")),
+                SelectExpr::from(placeholder("$2")),
+            ])
+            .unwrap()
+            .build()
+            .unwrap();
+
+        // original
+        assert_snapshot!(plan.display_indent_schema(), @r"
+        Projection: $1, $2 [$1:Null;N, $2:Null;N]
+          EmptyRelation: rows=0 []
+        ");
+
+        let plan = plan
+            .with_param_values(vec![ScalarValue::from(1i32), ScalarValue::from("s")])
+            .unwrap();
+
+        // replaced
+        assert_snapshot!(plan.display_indent_schema(), @r#"
+        Projection: Int32(1) AS $1, Utf8("s") AS $2 [$1:Int32, $2:Utf8]
+          EmptyRelation: rows=0 []
+        "#);
+    }
+
     #[test]
     fn test_nullable_schema_after_grouping_set() {
         let schema = Schema::new(vec![
@@ -4847,14 +4933,18 @@ mod tests {
 
         let output_schema = plan.schema();
 
-        assert!(output_schema
-            .field_with_name(None, "foo")
-            .unwrap()
-            .is_nullable(),);
-        assert!(output_schema
-            .field_with_name(None, "bar")
-            .unwrap()
-            .is_nullable());
+        assert!(
+            output_schema
+                .field_with_name(None, "foo")
+                .unwrap()
+                .is_nullable(),
+        );
+        assert!(
+            output_schema
+                .field_with_name(None, "bar")
+                .unwrap()
+                .is_nullable()
+        );
     }
 
     #[test]
@@ -5119,7 +5209,11 @@ mod tests {
             .transform_down_with_subqueries(|plan| {
                 match plan {
                     LogicalPlan::Projection(..) => {
-                        return Ok(Transformed::new(plan, false, TreeNodeRecursion::Jump))
+                        return Ok(Transformed::new(
+                            plan,
+                            false,
+                            TreeNodeRecursion::Jump,
+                        ));
                     }
                     LogicalPlan::Filter(..) => filter_found = true,
                     _ => {}
@@ -5139,7 +5233,7 @@ mod tests {
                                 plan,
                                 false,
                                 TreeNodeRecursion::Jump,
-                            ))
+                            ));
                         }
                         LogicalPlan::Filter(..) => filter_found = true,
                         _ => {}
@@ -5169,7 +5263,11 @@ mod tests {
             fn f_down(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
                 match node {
                     LogicalPlan::Projection(..) => {
-                        return Ok(Transformed::new(node, false, TreeNodeRecursion::Jump))
+                        return Ok(Transformed::new(
+                            node,
+                            false,
+                            TreeNodeRecursion::Jump,
+                        ));
                     }
                     LogicalPlan::Filter(..) => self.filter_found = true,
                     _ => {}
diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs
index bfc6b53d1136e..384d99ca0899e 100644
--- a/datafusion/expr/src/logical_plan/statement.rs
+++ b/datafusion/expr/src/logical_plan/statement.rs
@@ -22,7 +22,7 @@ use itertools::Itertools as _;
 use std::fmt::{self, Display};
 use std::sync::{Arc, LazyLock};
 
-use crate::{expr_vec_fmt, Expr, LogicalPlan};
+use crate::{Expr, LogicalPlan, expr_vec_fmt};
 
 /// Various types of Statements.
 ///
@@ -39,6 +39,8 @@ pub enum Statement {
     TransactionEnd(TransactionEnd),
     /// Set a Variable
     SetVariable(SetVariable),
+    /// Reset a Variable
+    ResetVariable(ResetVariable),
     /// Prepare a statement and find any bind parameters
     /// (e.g. `?`). This is used to implement SQL-prepared statements.
     Prepare(Prepare),
@@ -66,6 +68,7 @@ impl Statement {
             Statement::TransactionStart(_) => "TransactionStart",
             Statement::TransactionEnd(_) => "TransactionEnd",
             Statement::SetVariable(_) => "SetVariable",
+            Statement::ResetVariable(_) => "ResetVariable",
             Statement::Prepare(_) => "Prepare",
             Statement::Execute(_) => "Execute",
             Statement::Deallocate(_) => "Deallocate",
@@ -109,6 +112,9 @@ impl Statement {
                     }) => {
                         write!(f, "SetVariable: set {variable:?} to {value:?}")
                     }
+                    Statement::ResetVariable(ResetVariable { variable }) => {
+                        write!(f, "ResetVariable: reset {variable:?}")
+                    }
                     Statement::Prepare(Prepare { name, fields, .. }) => {
                         write!(
                             f,
@@ -194,6 +200,12 @@ pub struct SetVariable {
     pub value: String,
 }
 
+/// Reset a configuration variable to its default
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
+pub struct ResetVariable {
+    /// The variable name
+    pub variable: String,
+}
 /// Prepare a statement but do not execute it. Prepare statements can have 0 or more
 /// `Expr::Placeholder` expressions that are filled in during execution
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs
index 47088370a1d93..62a27b0a025ad 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -38,11 +38,11 @@
 //! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions
 
 use crate::{
-    dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, DdlStatement,
-    Distinct, DistinctOn, DmlStatement, Execute, Explain, Expr, Extension, Filter, Join,
-    Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition,
-    Sort, Statement, Subquery, SubqueryAlias, TableScan, Union, Unnest,
-    UserDefinedLogicalNode, Values, Window,
+    Aggregate, Analyze, CreateMemoryTable, CreateView, DdlStatement, Distinct,
+    DistinctOn, DmlStatement, Execute, Explain, Expr, Extension, Filter, Join, Limit,
+    LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition, Sort,
+    Statement, Subquery, SubqueryAlias, TableScan, Union, Unnest, UserDefinedLogicalNode,
+    Values, Window, dml::CopyTo,
 };
 use datafusion_common::tree_node::TreeNodeRefContainer;
 
@@ -51,7 +51,7 @@ use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeIterator, TreeNodeRecursion,
     TreeNodeRewriter, TreeNodeVisitor,
 };
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 
 impl TreeNode for LogicalPlan {
     fn apply_children<'n, F: FnMut(&'n Self) -> Result<TreeNodeRecursion>>(
diff --git a/datafusion/expr/src/partition_evaluator.rs b/datafusion/expr/src/partition_evaluator.rs
index a0f0988b4f4e5..f82bacee1a8ae 100644
--- a/datafusion/expr/src/partition_evaluator.rs
+++ b/datafusion/expr/src/partition_evaluator.rs
@@ -18,7 +18,7 @@
 //! Partition evaluation module
 
 use arrow::array::ArrayRef;
-use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err};
 use std::fmt::Debug;
 use std::ops::Range;
 
diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 25a0f83947eee..954f511651ced 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -21,15 +21,20 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use crate::expr::NullTreatment;
+#[cfg(feature = "sql")]
+use crate::logical_plan::LogicalPlan;
 use crate::{
     AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame,
     WindowFunctionDefinition, WindowUDF,
 };
-use arrow::datatypes::{DataType, Field, SchemaRef};
+use arrow::datatypes::{DataType, Field, FieldRef, SchemaRef};
+use datafusion_common::datatype::DataTypeExt;
 use datafusion_common::{
-    config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema,
-    Result, TableReference,
+    DFSchema, Result, TableReference, config::ConfigOptions,
+    file_options::file_type::FileType, not_impl_err,
 };
+#[cfg(feature = "sql")]
+use sqlparser::ast::{Expr as SQLExpr, Ident, ObjectName, TableAlias, TableFactor};
 
 /// Provides the `SQL` query planner meta-data about tables and
 /// functions referenced in SQL statements, without a direct dependency on the
@@ -83,6 +88,12 @@ pub trait ContextProvider {
         &[]
     }
 
+    /// Return [`RelationPlanner`] extensions for planning table factors
+    #[cfg(feature = "sql")]
+    fn get_relation_planners(&self) -> &[Arc<dyn RelationPlanner>] {
+        &[]
+    }
+
     /// Return [`TypePlanner`] extensions for planning data types
     #[cfg(feature = "sql")]
     fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
@@ -103,6 +114,17 @@ pub trait ContextProvider {
     /// A user defined variable is typically accessed via `@var_name`
     fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
 
+    /// Return metadata about a system/user-defined variable, if any.
+    ///
+    /// By default, this wraps [`Self::get_variable_type`] in an Arrow [`Field`]
+    /// with nullable set to `true` and no metadata. Implementations that can
+    /// provide richer information (such as nullability or extension metadata)
+    /// should override this method.
+    fn get_variable_field(&self, variable_names: &[String]) -> Option<FieldRef> {
+        self.get_variable_type(variable_names)
+            .map(|data_type| data_type.into_nullable_field_ref())
+    }
+
     /// Return overall configuration options
     fn options(&self) -> &ConfigOptions;
 
@@ -324,6 +346,85 @@ pub enum PlannerResult<T> {
     Original(T),
 }
 
+/// Result of planning a relation with [`RelationPlanner`]
+#[cfg(feature = "sql")]
+#[derive(Debug, Clone)]
+pub struct PlannedRelation {
+    /// The logical plan for the relation
+    pub plan: LogicalPlan,
+    /// Optional table alias for the relation
+    pub alias: Option<TableAlias>,
+}
+
+#[cfg(feature = "sql")]
+impl PlannedRelation {
+    /// Create a new `PlannedRelation` with the given plan and alias
+    pub fn new(plan: LogicalPlan, alias: Option<TableAlias>) -> Self {
+        Self { plan, alias }
+    }
+}
+
+/// Result of attempting to plan a relation with extension planners
+#[cfg(feature = "sql")]
+#[derive(Debug)]
+pub enum RelationPlanning {
+    /// The relation was successfully planned by an extension planner
+    Planned(PlannedRelation),
+    /// No extension planner handled the relation, return it for default processing
+    Original(TableFactor),
+}
+
+/// Customize planning SQL table factors to [`LogicalPlan`]s.
+#[cfg(feature = "sql")]
+pub trait RelationPlanner: Debug + Send + Sync {
+    /// Plan a table factor into a [`LogicalPlan`].
+    ///
+    /// Returning [`RelationPlanning::Planned`] short-circuits further planning and uses the
+    /// provided plan. Returning [`RelationPlanning::Original`] allows the next registered planner,
+    /// or DataFusion's default logic, to handle the relation.
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        context: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning>;
+}
+
+/// Provides utilities for relation planners to interact with DataFusion's SQL
+/// planner.
+///
+/// This trait provides SQL planning utilities specific to relation planning,
+/// such as converting SQL expressions to logical expressions and normalizing
+/// identifiers. It uses composition to provide access to session context via
+/// [`ContextProvider`].
+#[cfg(feature = "sql")]
+pub trait RelationPlannerContext {
+    /// Provides access to the underlying context provider for reading session
+    /// configuration, accessing tables, functions, and other metadata.
+    fn context_provider(&self) -> &dyn ContextProvider;
+
+    /// Plans the specified relation through the full planner pipeline, starting
+    /// from the first registered relation planner.
+    fn plan(&mut self, relation: TableFactor) -> Result<LogicalPlan>;
+
+    /// Converts a SQL expression into a logical expression using the current
+    /// planner context.
+    fn sql_to_expr(&mut self, expr: SQLExpr, schema: &DFSchema) -> Result<Expr>;
+
+    /// Converts a SQL expression into a logical expression without DataFusion
+    /// rewrites.
+    fn sql_expr_to_logical_expr(
+        &mut self,
+        expr: SQLExpr,
+        schema: &DFSchema,
+    ) -> Result<Expr>;
+
+    /// Normalizes an identifier according to session settings.
+    fn normalize_ident(&self, ident: Ident) -> String;
+
+    /// Normalizes a SQL object name into a [`TableReference`].
+    fn object_name_to_table_reference(&self, name: ObjectName) -> Result<TableReference>;
+}
+
 /// Customize planning SQL types to DataFusion (Arrow) types.
 #[cfg(feature = "sql")]
 pub trait TypePlanner: Debug + Send + Sync {
diff --git a/datafusion/expr/src/predicate_bounds.rs b/datafusion/expr/src/predicate_bounds.rs
new file mode 100644
index 0000000000000..992d9f88bb14a
--- /dev/null
+++ b/datafusion/expr/src/predicate_bounds.rs
@@ -0,0 +1,681 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{Between, BinaryExpr, Expr, ExprSchemable};
+use arrow::datatypes::DataType;
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+use datafusion_common::{ExprSchema, Result, ScalarValue};
+use datafusion_expr_common::interval_arithmetic::NullableInterval;
+use datafusion_expr_common::operator::Operator;
+
+/// Computes the output interval for the given boolean expression based on statically
+/// available information.
+///
+/// # Arguments
+///
+/// * `predicate` - The boolean expression to analyze
+/// * `is_null` - A callback function that provides additional nullability information for
+///   expressions. When called with an expression, it should return:
+///   - `Some(true)` if the expression is known to evaluate to NULL
+///   - `Some(false)` if the expression is known to NOT evaluate to NULL
+///   - `None` if the nullability cannot be determined
+///
+///   This callback allows the caller to provide context-specific knowledge about expression
+///   nullability that cannot be determined from the schema alone. For example, it can be used
+///   to indicate that a particular column reference is known to be NULL in a specific context,
+///   or that certain expressions will never be NULL based on runtime constraints.
+///
+/// * `input_schema` - Schema information for resolving expression types and nullability
+///
+/// # Return Value
+///
+/// The function returns a [NullableInterval] that describes the possible boolean values the
+/// predicate can evaluate to.
+///
+pub(super) fn evaluate_bounds(
+    predicate: &Expr,
+    certainly_null_expr: Option<&Expr>,
+    input_schema: &dyn ExprSchema,
+) -> Result<NullableInterval> {
+    let evaluator = PredicateBoundsEvaluator {
+        input_schema,
+        certainly_null_expr,
+    };
+    evaluator.evaluate_bounds(predicate)
+}
+
+struct PredicateBoundsEvaluator<'a> {
+    input_schema: &'a dyn ExprSchema,
+    certainly_null_expr: Option<&'a Expr>,
+}
+
+impl PredicateBoundsEvaluator<'_> {
+    /// Derives the bounds of the given boolean expression
+    fn evaluate_bounds(&self, predicate: &Expr) -> Result<NullableInterval> {
+        Ok(match predicate {
+            Expr::Literal(scalar, _) => {
+                // Interpret literals as boolean, coercing if necessary
+                match scalar {
+                    ScalarValue::Null => NullableInterval::UNKNOWN,
+                    ScalarValue::Boolean(b) => match b {
+                        Some(true) => NullableInterval::TRUE,
+                        Some(false) => NullableInterval::FALSE,
+                        None => NullableInterval::UNKNOWN,
+                    },
+                    _ => {
+                        let b = Expr::Literal(scalar.cast_to(&DataType::Boolean)?, None);
+                        self.evaluate_bounds(&b)?
+                    }
+                }
+            }
+            Expr::IsNull(e) => {
+                // If `e` is not nullable, then `e IS NULL` is provably false
+                if !e.nullable(self.input_schema)? {
+                    NullableInterval::FALSE
+                } else {
+                    match e.get_type(self.input_schema)? {
+                        // If `e` is a boolean expression, check if `e` is provably 'unknown'.
+                        DataType::Boolean => self.evaluate_bounds(e)?.is_unknown()?,
+                        // If `e` is not a boolean expression, check if `e` is provably null
+                        _ => self.is_null(e),
+                    }
+                }
+            }
+            Expr::IsNotNull(e) => {
+                // If `e` is not nullable, then `e IS NOT NULL` is provably true
+                if !e.nullable(self.input_schema)? {
+                    NullableInterval::TRUE
+                } else {
+                    match e.get_type(self.input_schema)? {
+                        // If `e` is a boolean expression, try to evaluate it and test for not unknown
+                        DataType::Boolean => {
+                            self.evaluate_bounds(e)?.is_unknown()?.not()?
+                        }
+                        // If `e` is not a boolean expression, check if `e` is provably null
+                        _ => self.is_null(e).not()?,
+                    }
+                }
+            }
+            Expr::IsTrue(e) => self.evaluate_bounds(e)?.is_true()?,
+            Expr::IsNotTrue(e) => self.evaluate_bounds(e)?.is_true()?.not()?,
+            Expr::IsFalse(e) => self.evaluate_bounds(e)?.is_false()?,
+            Expr::IsNotFalse(e) => self.evaluate_bounds(e)?.is_false()?.not()?,
+            Expr::IsUnknown(e) => self.evaluate_bounds(e)?.is_unknown()?,
+            Expr::IsNotUnknown(e) => self.evaluate_bounds(e)?.is_unknown()?.not()?,
+            Expr::Not(e) => self.evaluate_bounds(e)?.not()?,
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::And,
+                right,
+            }) => NullableInterval::and(
+                &self.evaluate_bounds(left)?,
+                &self.evaluate_bounds(right)?,
+            )?,
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Or,
+                right,
+            }) => NullableInterval::or(
+                &self.evaluate_bounds(left)?,
+                &self.evaluate_bounds(right)?,
+            )?,
+            e => {
+                let is_null = self.is_null(e);
+
+                // If an expression is null, then it's value is UNKNOWN
+                let maybe_null =
+                    is_null.contains_value(ScalarValue::Boolean(Some(true)))?;
+
+                let maybe_not_null =
+                    is_null.contains_value(ScalarValue::Boolean(Some(false)))?;
+
+                match (maybe_null, maybe_not_null) {
+                    (true, true) | (false, false) => NullableInterval::ANY_TRUTH_VALUE,
+                    (true, false) => NullableInterval::UNKNOWN,
+                    (false, true) => NullableInterval::TRUE_OR_FALSE,
+                }
+            }
+        })
+    }
+
+    /// Determines if the given expression can evaluate to `NULL`.
+    ///
+    /// This method only returns sets containing `TRUE`, `FALSE`, or both.
+    fn is_null(&self, expr: &Expr) -> NullableInterval {
+        // Fast path for literals
+        if let Expr::Literal(scalar, _) = expr {
+            if scalar.is_null() {
+                return NullableInterval::TRUE;
+            } else {
+                return NullableInterval::FALSE;
+            }
+        }
+
+        // If `expr` is not nullable, we can be certain `expr` is not null
+        if let Ok(false) = expr.nullable(self.input_schema) {
+            return NullableInterval::FALSE;
+        }
+
+        // Check if the expression is the `certainly_null_expr` that was passed in.
+        if let Some(certainly_null_expr) = &self.certainly_null_expr
+            && expr.eq(certainly_null_expr)
+        {
+            return NullableInterval::TRUE;
+        }
+
+        // `expr` is nullable, so our default answer for `is null` is going to be `{ TRUE, FALSE }`.
+        // Try to see if we can narrow it down to just one option.
+        match expr {
+            Expr::BinaryExpr(BinaryExpr { op, .. }) if op.returns_null_on_null() => {
+                self.is_null_if_any_child_null(expr)
+            }
+            Expr::Alias(_)
+            | Expr::Cast(_)
+            | Expr::Like(_)
+            | Expr::Negative(_)
+            | Expr::Not(_)
+            | Expr::SimilarTo(_) => self.is_null_if_any_child_null(expr),
+            Expr::Between(Between {
+                expr, low, high, ..
+            }) if self.is_null(expr).is_certainly_true()
+                || (self.is_null(low.as_ref()).is_certainly_true()
+                    && self.is_null(high.as_ref()).is_certainly_true()) =>
+            {
+                // Between is always null if the left side is null
+                // or both the low and high bounds are null
+                NullableInterval::TRUE
+            }
+            _ => NullableInterval::TRUE_OR_FALSE,
+        }
+    }
+
+    fn is_null_if_any_child_null(&self, expr: &Expr) -> NullableInterval {
+        // These expressions are null if any of their direct children is null
+        // If any child is inconclusive, the result for this expression is also inconclusive
+        let mut is_null = NullableInterval::FALSE;
+
+        let _ = expr.apply_children(|child| {
+            let child_is_null = self.is_null(child);
+
+            if child_is_null.contains_value(ScalarValue::Boolean(Some(true)))? {
+                // If a child might be null, then the result may also be null
+                is_null = NullableInterval::TRUE_OR_FALSE;
+            }
+
+            if !child_is_null.contains_value(ScalarValue::Boolean(Some(false)))? {
+                // If the child is never not null, then the result can also never be not null
+                // and we can stop traversing the children
+                is_null = NullableInterval::TRUE;
+                Ok(TreeNodeRecursion::Stop)
+            } else {
+                Ok(TreeNodeRecursion::Continue)
+            }
+        });
+
+        is_null
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::expr::ScalarFunction;
+    use crate::predicate_bounds::evaluate_bounds;
+    use crate::{
+        Expr, binary_expr, col, create_udf, is_false, is_not_false, is_not_null,
+        is_not_true, is_not_unknown, is_null, is_true, is_unknown, lit, not,
+    };
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::{DFSchema, Result, ScalarValue};
+    use datafusion_expr_common::columnar_value::ColumnarValue;
+    use datafusion_expr_common::interval_arithmetic::NullableInterval;
+    use datafusion_expr_common::operator::Operator::{And, Eq, Or};
+    use datafusion_expr_common::signature::Volatility;
+    use std::ops::Neg;
+    use std::sync::Arc;
+
+    fn eval_bounds(predicate: &Expr) -> Result<NullableInterval> {
+        let schema = DFSchema::try_from(Schema::empty())?;
+        evaluate_bounds(predicate, None, &schema)
+    }
+
+    #[test]
+    fn evaluate_bounds_literal() {
+        #[rustfmt::skip]
+        let cases = vec![
+            (lit(ScalarValue::Null), NullableInterval::UNKNOWN),
+            (lit(false), NullableInterval::FALSE),
+            (lit(true), NullableInterval::TRUE),
+            (lit(0), NullableInterval::FALSE),
+            (lit(1), NullableInterval::TRUE),
+            (lit(ScalarValue::Utf8(None)), NullableInterval::UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+
+        assert!(eval_bounds(&lit("foo")).is_err());
+    }
+
+    #[test]
+    fn evaluate_bounds_and() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+        let one = lit(1);
+        let t = lit(true);
+        let f = lit(false);
+        let func = make_scalar_func_expr();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (binary_expr(null.clone(), And, null.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(null.clone(), And, one.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(null.clone(), And, zero.clone()), NullableInterval::FALSE),
+            (binary_expr(one.clone(), And, one.clone()), NullableInterval::TRUE),
+            (binary_expr(one.clone(), And, zero.clone()), NullableInterval::FALSE),
+            (binary_expr(null.clone(), And, t.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(t.clone(), And, null.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(null.clone(), And, f.clone()), NullableInterval::FALSE),
+            (binary_expr(f.clone(), And, null.clone()), NullableInterval::FALSE),
+            (binary_expr(t.clone(), And, t.clone()), NullableInterval::TRUE),
+            (binary_expr(t.clone(), And, f.clone()), NullableInterval::FALSE),
+            (binary_expr(f.clone(), And, t.clone()), NullableInterval::FALSE),
+            (binary_expr(f.clone(), And, f.clone()), NullableInterval::FALSE),
+            (binary_expr(t.clone(), And, func.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (binary_expr(func.clone(), And, t.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (binary_expr(f.clone(), And, func.clone()), NullableInterval::FALSE),
+            (binary_expr(func.clone(), And, f.clone()), NullableInterval::FALSE),
+            (binary_expr(null.clone(), And, func.clone()), NullableInterval::FALSE_OR_UNKNOWN),
+            (binary_expr(func.clone(), And, null.clone()), NullableInterval::FALSE_OR_UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_or() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+        let one = lit(1);
+        let t = lit(true);
+        let f = lit(false);
+        let func = make_scalar_func_expr();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (binary_expr(null.clone(), Or, null.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(null.clone(), Or, one.clone()), NullableInterval::TRUE),
+            (binary_expr(null.clone(), Or, zero.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(one.clone(), Or, one.clone()), NullableInterval::TRUE),
+            (binary_expr(one.clone(), Or, zero.clone()), NullableInterval::TRUE),
+            (binary_expr(null.clone(), Or, t.clone()), NullableInterval::TRUE),
+            (binary_expr(t.clone(), Or, null.clone()), NullableInterval::TRUE),
+            (binary_expr(null.clone(), Or, f.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(f.clone(), Or, null.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(t.clone(), Or, t.clone()), NullableInterval::TRUE),
+            (binary_expr(t.clone(), Or, f.clone()), NullableInterval::TRUE),
+            (binary_expr(f.clone(), Or, t.clone()), NullableInterval::TRUE),
+            (binary_expr(f.clone(), Or, f.clone()), NullableInterval::FALSE),
+            (binary_expr(t.clone(), Or, func.clone()), NullableInterval::TRUE),
+            (binary_expr(func.clone(), Or, t.clone()), NullableInterval::TRUE),
+            (binary_expr(f.clone(), Or, func.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (binary_expr(func.clone(), Or, f.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (binary_expr(null.clone(), Or, func.clone()), NullableInterval::TRUE_OR_UNKNOWN),
+            (binary_expr(func.clone(), Or, null.clone()), NullableInterval::TRUE_OR_UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_not() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+        let one = lit(1);
+        let t = lit(true);
+        let f = lit(false);
+        let func = make_scalar_func_expr();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (not(null.clone()), NullableInterval::UNKNOWN),
+            (not(one.clone()), NullableInterval::FALSE),
+            (not(zero.clone()), NullableInterval::TRUE),
+            (not(t.clone()), NullableInterval::FALSE),
+            (not(f.clone()), NullableInterval::TRUE),
+            (not(func.clone()), NullableInterval::ANY_TRUTH_VALUE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_is() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+        let one = lit(1);
+        let t = lit(true);
+        let f = lit(false);
+        let col = col("col");
+        let nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::UInt8,
+            true,
+        )]))
+        .unwrap();
+        let not_nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::UInt8,
+            false,
+        )]))
+        .unwrap();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (is_null(null.clone()), NullableInterval::TRUE),
+            (is_null(one.clone()), NullableInterval::FALSE),
+            (is_null(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::TRUE),
+            (is_not_null(null.clone()), NullableInterval::FALSE),
+            (is_not_null(one.clone()), NullableInterval::TRUE),
+            (is_not_null(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::FALSE),
+            (is_true(null.clone()), NullableInterval::FALSE),
+            (is_true(t.clone()), NullableInterval::TRUE),
+            (is_true(f.clone()), NullableInterval::FALSE),
+            (is_true(zero.clone()), NullableInterval::FALSE),
+            (is_true(one.clone()), NullableInterval::TRUE),
+            (is_true(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::FALSE),
+            (is_not_true(null.clone()), NullableInterval::TRUE),
+            (is_not_true(t.clone()), NullableInterval::FALSE),
+            (is_not_true(f.clone()), NullableInterval::TRUE),
+            (is_not_true(zero.clone()), NullableInterval::TRUE),
+            (is_not_true(one.clone()), NullableInterval::FALSE),
+            (is_not_true(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::TRUE),
+            (is_false(null.clone()), NullableInterval::FALSE),
+            (is_false(t.clone()), NullableInterval::FALSE),
+            (is_false(f.clone()), NullableInterval::TRUE),
+            (is_false(zero.clone()), NullableInterval::TRUE),
+            (is_false(one.clone()), NullableInterval::FALSE),
+            (is_false(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::FALSE),
+            (is_not_false(null.clone()), NullableInterval::TRUE),
+            (is_not_false(t.clone()), NullableInterval::TRUE),
+            (is_not_false(f.clone()), NullableInterval::FALSE),
+            (is_not_false(zero.clone()), NullableInterval::FALSE),
+            (is_not_false(one.clone()), NullableInterval::TRUE),
+            (is_not_false(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::TRUE),
+            (is_unknown(null.clone()), NullableInterval::TRUE),
+            (is_unknown(t.clone()), NullableInterval::FALSE),
+            (is_unknown(f.clone()), NullableInterval::FALSE),
+            (is_unknown(zero.clone()), NullableInterval::FALSE),
+            (is_unknown(one.clone()), NullableInterval::FALSE),
+            (is_unknown(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::TRUE),
+            (is_not_unknown(null.clone()), NullableInterval::FALSE),
+            (is_not_unknown(t.clone()), NullableInterval::TRUE),
+            (is_not_unknown(f.clone()), NullableInterval::TRUE),
+            (is_not_unknown(zero.clone()), NullableInterval::TRUE),
+            (is_not_unknown(one.clone()), NullableInterval::TRUE),
+            (is_not_unknown(binary_expr(null.clone(), Eq, null.clone())), NullableInterval::FALSE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (is_null(col.clone()), &nullable_schema, NullableInterval::TRUE_OR_FALSE),
+            (is_null(col.clone()), &not_nullable_schema, NullableInterval::FALSE),
+            (is_null(binary_expr(col.clone(), Eq, col.clone())), &nullable_schema, NullableInterval::TRUE_OR_FALSE),
+            (is_null(binary_expr(col.clone(), Eq, col.clone())), &not_nullable_schema, NullableInterval::FALSE),
+            (is_not_null(col.clone()), &nullable_schema, NullableInterval::TRUE_OR_FALSE),
+            (is_not_null(col.clone()), &not_nullable_schema, NullableInterval::TRUE),
+            (is_not_null(binary_expr(col.clone(), Eq, col.clone())), &nullable_schema, NullableInterval::TRUE_OR_FALSE),
+            (is_not_null(binary_expr(col.clone(), Eq, col.clone())), &not_nullable_schema, NullableInterval::TRUE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                evaluate_bounds(&case.0, None, case.1).unwrap(),
+                case.2,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_between() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (zero.clone().between(zero.clone(), zero.clone()), NullableInterval::TRUE_OR_FALSE),
+            (null.clone().between(zero.clone(), zero.clone()), NullableInterval::UNKNOWN),
+            (zero.clone().between(null.clone(), zero.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (zero.clone().between(zero.clone(), null.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (zero.clone().between(null.clone(), null.clone()), NullableInterval::UNKNOWN),
+            (null.clone().between(null.clone(), null.clone()), NullableInterval::UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_binary_op() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+        let col = col("col");
+        let nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::Utf8,
+            true,
+        )]))
+        .unwrap();
+        let not_nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::Utf8,
+            false,
+        )]))
+        .unwrap();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (binary_expr(zero.clone(), Eq, zero.clone()), NullableInterval::TRUE_OR_FALSE),
+            (binary_expr(null.clone(), Eq, zero.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(zero.clone(), Eq, null.clone()), NullableInterval::UNKNOWN),
+            (binary_expr(null.clone(), Eq, null.clone()), NullableInterval::UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (binary_expr(zero.clone(), Eq, col.clone()), NullableInterval::TRUE_OR_FALSE),
+            (binary_expr(col.clone(), Eq, zero.clone()), NullableInterval::TRUE_OR_FALSE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+
+            assert_eq!(
+                evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
+                NullableInterval::ANY_TRUTH_VALUE,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_negative() {
+        let null = lit(ScalarValue::Null);
+        let zero = lit(0);
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (zero.clone().neg(), NullableInterval::TRUE_OR_FALSE),
+            (null.clone().neg(), NullableInterval::UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_like() {
+        let null = lit(ScalarValue::Null);
+        let expr = lit("foo");
+        let pattern = lit("f.*");
+        let col = col("col");
+        let nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::Utf8,
+            true,
+        )]))
+        .unwrap();
+        let not_nullable_schema = DFSchema::try_from(Schema::new(vec![Field::new(
+            "col",
+            DataType::Utf8,
+            false,
+        )]))
+        .unwrap();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (expr.clone().like(pattern.clone()), NullableInterval::TRUE_OR_FALSE),
+            (null.clone().like(pattern.clone()), NullableInterval::UNKNOWN),
+            (expr.clone().like(null.clone()), NullableInterval::UNKNOWN),
+            (null.clone().like(null.clone()), NullableInterval::UNKNOWN),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                eval_bounds(&case.0).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+        }
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (col.clone().like(pattern.clone()), NullableInterval::TRUE_OR_FALSE),
+            (expr.clone().like(col.clone()), NullableInterval::TRUE_OR_FALSE),
+        ];
+
+        for case in cases {
+            assert_eq!(
+                evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
+                case.1,
+                "Failed for {}",
+                case.0
+            );
+
+            assert_eq!(
+                evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
+                NullableInterval::ANY_TRUTH_VALUE,
+                "Failed for {}",
+                case.0
+            );
+        }
+    }
+
+    #[test]
+    fn evaluate_bounds_udf() {
+        let func = make_scalar_func_expr();
+
+        #[rustfmt::skip]
+        let cases = vec![
+            (func.clone(), NullableInterval::ANY_TRUTH_VALUE),
+            (not(func.clone()), NullableInterval::ANY_TRUTH_VALUE),
+            (binary_expr(func.clone(), And, func.clone()), NullableInterval::ANY_TRUTH_VALUE),
+        ];
+
+        for case in cases {
+            assert_eq!(eval_bounds(&case.0).unwrap(), case.1);
+        }
+    }
+
+    fn make_scalar_func_expr() -> Expr {
+        let scalar_func_impl =
+            |_: &[ColumnarValue]| Ok(ColumnarValue::Scalar(ScalarValue::Null));
+        let udf = create_udf(
+            "foo",
+            vec![],
+            DataType::Boolean,
+            Volatility::Stable,
+            Arc::new(scalar_func_impl),
+        );
+        Expr::ScalarFunction(ScalarFunction::new_udf(Arc::new(udf), vec![]))
+    }
+}
diff --git a/datafusion/expr/src/ptr_eq.rs b/datafusion/expr/src/ptr_eq.rs
index 0bbfba5e8d063..79ea3d7219143 100644
--- a/datafusion/expr/src/ptr_eq.rs
+++ b/datafusion/expr/src/ptr_eq.rs
@@ -39,7 +39,7 @@ pub fn arc_ptr_hash<T: ?Sized>(a: &Arc<T>, hasher: &mut impl Hasher) {
 ///
 /// If you have pointers to a `dyn UDF impl` consider using [`super::udf_eq::UdfEq`].
 #[derive(Clone)]
-#[allow(private_bounds)] // This is so that PtrEq can only be used with allowed pointer types (e.g. Arc), without allowing misuse.
+#[expect(private_bounds)] // This is so that PtrEq can only be used with allowed pointer types (e.g. Arc), without allowing misuse.
 pub struct PtrEq<Ptr: PointerType>(Ptr);
 
 impl<T> PartialEq for PtrEq<Arc<T>>
diff --git a/datafusion/expr/src/registry.rs b/datafusion/expr/src/registry.rs
index 9554dd68e1758..472e065211aac 100644
--- a/datafusion/expr/src/registry.rs
+++ b/datafusion/expr/src/registry.rs
@@ -20,7 +20,7 @@
 use crate::expr_rewriter::FunctionRewrite;
 use crate::planner::ExprPlanner;
 use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
-use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
+use datafusion_common::{HashMap, Result, not_impl_err, plan_datafusion_err};
 use std::collections::HashSet;
 use std::fmt::Debug;
 use std::sync::Arc;
diff --git a/datafusion/expr/src/select_expr.rs b/datafusion/expr/src/select_expr.rs
index bfec4c5844d08..22b9660572a66 100644
--- a/datafusion/expr/src/select_expr.rs
+++ b/datafusion/expr/src/select_expr.rs
@@ -20,7 +20,7 @@ use std::fmt;
 use arrow::datatypes::FieldRef;
 use datafusion_common::{Column, TableReference};
 
-use crate::{expr::WildcardOptions, Expr};
+use crate::{Expr, expr::WildcardOptions};
 
 /// Represents a SELECT expression in a SQL query.
 ///
diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs
index 02794271a9ee1..bbe65904fb775 100644
--- a/datafusion/expr/src/simplify.rs
+++ b/datafusion/expr/src/simplify.rs
@@ -18,9 +18,9 @@
 //! Structs and traits to provide the information needed for expression simplification.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_datafusion_err, DFSchemaRef, Result};
+use datafusion_common::{DFSchemaRef, Result, internal_datafusion_err};
 
-use crate::{execution_props::ExecutionProps, Expr, ExprSchemable};
+use crate::{Expr, ExprSchemable, execution_props::ExecutionProps};
 
 /// Provides the information necessary to apply algebraic simplification to an
 /// [Expr]. See [SimplifyContext] for one concrete implementation.
@@ -48,7 +48,7 @@ pub trait SimplifyInfo {
 /// # Example
 /// See the `simplify_demo` in the [`expr_api` example]
 ///
-/// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+/// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
 #[derive(Debug, Clone)]
 pub struct SimplifyContext<'a> {
     schema: Option<DFSchemaRef>,
@@ -74,10 +74,10 @@ impl<'a> SimplifyContext<'a> {
 impl SimplifyInfo for SimplifyContext<'_> {
     /// Returns true if this Expr has boolean type
     fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
-        if let Some(schema) = &self.schema {
-            if let Ok(DataType::Boolean) = expr.get_type(schema) {
-                return Ok(true);
-            }
+        if let Some(schema) = &self.schema
+            && let Ok(DataType::Boolean) = expr.get_type(schema)
+        {
+            return Ok(true);
         }
 
         Ok(false)
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 8609afeae6018..d784169d541ab 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -22,21 +22,21 @@
 use std::any::Any;
 
 use arrow::datatypes::{
-    DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
-    DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
+    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION,
+    DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DataType, FieldRef,
 };
 
 use datafusion_common::plan_err;
-use datafusion_common::{exec_err, not_impl_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, not_impl_err, utils::take_function_args};
 
-use crate::type_coercion::aggregates::NUMERICS;
 use crate::Volatility::Immutable;
+use crate::type_coercion::aggregates::NUMERICS;
 use crate::{
+    Accumulator, AggregateUDFImpl, Expr, GroupsAccumulator, ReversedUDAF, Signature,
     expr::AggregateFunction,
     function::{AccumulatorArgs, StateFieldsArgs},
     utils::AggregateOrderSensitivity,
-    Accumulator, AggregateUDFImpl, Expr, GroupsAccumulator, ReversedUDAF, Signature,
 };
 
 macro_rules! create_func {
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index 81846b4f80608..742bae5b2320b 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -17,17 +17,17 @@
 
 //! Tree node implementation for Logical Expressions
 
+use crate::Expr;
 use crate::expr::{
     AggregateFunction, AggregateFunctionParams, Alias, Between, BinaryExpr, Case, Cast,
     GroupingSet, InList, InSubquery, Like, Placeholder, ScalarFunction, TryCast, Unnest,
     WindowFunction, WindowFunctionParams,
 };
-use crate::Expr;
 
+use datafusion_common::Result;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, TreeNodeRefContainer,
 };
-use datafusion_common::Result;
 
 /// Implementation of the [`TreeNode`] trait
 ///
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index bcaff11bcdb49..7be9713f53186 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -24,10 +24,10 @@ use arrow::{
 };
 use datafusion_common::types::LogicalType;
 use datafusion_common::utils::{
-    base_type, coerced_fixed_size_list_to_list, ListCoercion,
+    ListCoercion, base_type, coerced_fixed_size_list_to_list,
 };
 use datafusion_common::{
-    exec_err, internal_err, plan_err, types::NativeType, utils::list_ndims, Result,
+    Result, exec_err, internal_err, plan_err, types::NativeType, utils::list_ndims,
 };
 use datafusion_expr_common::signature::ArrayFunctionArgument;
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
@@ -58,7 +58,10 @@ pub fn data_types_with_scalar_udf(
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
             // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
-            return plan_err!("'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments", func.name());
+            return plan_err!(
+                "'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments",
+                func.name()
+            );
         } else {
             return plan_err!("'{}' does not support zero arguments", func.name());
         }
@@ -96,7 +99,10 @@ pub fn fields_with_aggregate_udf(
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
             // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
-            return plan_err!("'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments", func.name());
+            return plan_err!(
+                "'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments",
+                func.name()
+            );
         } else {
             return plan_err!("'{}' does not support zero arguments", func.name());
         }
@@ -148,7 +154,10 @@ pub fn fields_with_window_udf(
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
             // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763
-            return plan_err!("'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments", func.name());
+            return plan_err!(
+                "'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments",
+                func.name()
+            );
         } else {
             return plan_err!("'{}' does not support zero arguments", func.name());
         }
@@ -340,7 +349,7 @@ fn get_valid_types_with_aggregate_udf(
                     "Function '{}' user-defined coercion failed with {:?}",
                     func.name(),
                     e.strip_backtrace()
-                )
+                );
             }
         },
         TypeSignature::OneOf(signatures) => signatures
@@ -369,7 +378,7 @@ fn get_valid_types_with_window_udf(
                     "Function '{}' user-defined coercion failed with {:?}",
                     func.name(),
                     e.strip_backtrace()
-                )
+                );
             }
         },
         TypeSignature::OneOf(signatures) => signatures
@@ -625,7 +634,9 @@ fn get_valid_types(
                 if let Some(dt) = comparison_coercion_numeric(&target_type, data_type) {
                     target_type = dt;
                 } else {
-                    return plan_err!("For function '{function_name}' {target_type} and {data_type} is not comparable");
+                    return plan_err!(
+                        "For function '{function_name}' {target_type} and {data_type} is not comparable"
+                    );
                 }
             }
             // Convert null to String type.
@@ -642,20 +653,24 @@ fn get_valid_types(
             for (current_type, param) in current_types.iter().zip(param_types.iter()) {
                 let current_native_type: NativeType = current_type.into();
 
-                if param.desired_type().matches_native_type(&current_native_type) {
-                    let casted_type = param.desired_type().default_casted_type(
-                        &current_native_type,
-                        current_type,
-                    )?;
+                if param
+                    .desired_type()
+                    .matches_native_type(&current_native_type)
+                {
+                    let casted_type = param
+                        .desired_type()
+                        .default_casted_type(&current_native_type, current_type)?;
 
                     new_types.push(casted_type);
                 } else if param
-                .allowed_source_types()
-                .iter()
-                .any(|t| t.matches_native_type(&current_native_type)) {
+                    .allowed_source_types()
+                    .iter()
+                    .any(|t| t.matches_native_type(&current_native_type))
+                {
                     // If the condition is met which means `implicit coercion`` is provided so we can safely unwrap
                     let default_casted_type = param.default_casted_type().unwrap();
-                    let casted_type = default_casted_type.default_cast_for(current_type)?;
+                    let casted_type =
+                        default_casted_type.default_cast_for(current_type)?;
                     new_types.push(casted_type);
                 } else {
                     return internal_err!(
@@ -671,7 +686,9 @@ fn get_valid_types(
         }
         TypeSignature::Uniform(number, valid_types) => {
             if *number == 0 {
-                return plan_err!("The function '{function_name}' expected at least one argument");
+                return plan_err!(
+                    "The function '{function_name}' expected at least one argument"
+                );
             }
 
             valid_types
@@ -682,7 +699,7 @@ fn get_valid_types(
         TypeSignature::UserDefined => {
             return internal_err!(
                 "Function '{function_name}' user-defined signature should be handled by function-specific coerce_types"
-            )
+            );
         }
         TypeSignature::VariadicAny => {
             if current_types.is_empty() {
@@ -693,10 +710,16 @@ fn get_valid_types(
             vec![current_types.to_vec()]
         }
         TypeSignature::Exact(valid_types) => vec![valid_types.clone()],
-        TypeSignature::ArraySignature(ref function_signature) => match function_signature {
-            ArrayFunctionSignature::Array { arguments, array_coercion, } => {
-                array_valid_types(function_name, current_types, arguments, array_coercion.as_ref())?
-            }
+        TypeSignature::ArraySignature(function_signature) => match function_signature {
+            ArrayFunctionSignature::Array {
+                arguments,
+                array_coercion,
+            } => array_valid_types(
+                function_name,
+                current_types,
+                arguments,
+                array_coercion.as_ref(),
+            )?,
             ArrayFunctionSignature::RecursiveArray => {
                 if current_types.len() != 1 {
                     return Ok(vec![vec![]]);
@@ -943,7 +966,8 @@ mod tests {
 
     use super::*;
     use arrow::datatypes::Field;
-    use datafusion_common::assert_contains;
+    use datafusion_common::{assert_contains, types::logical_binary};
+    use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
 
     #[test]
     fn test_string_conversion() {
@@ -1336,6 +1360,30 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_get_valid_types_coercible_binary() -> Result<()> {
+        let signature = Signature::coercible(
+            vec![Coercion::new_exact(TypeSignatureClass::Native(
+                logical_binary(),
+            ))],
+            Volatility::Immutable,
+        );
+
+        // Binary types should stay their original selves
+        for t in [
+            DataType::Binary,
+            DataType::BinaryView,
+            DataType::LargeBinary,
+        ] {
+            assert_eq!(
+                get_valid_types("", &signature.type_signature, std::slice::from_ref(&t))?,
+                vec![vec![t]]
+            );
+        }
+
+        Ok(())
+    }
+
     #[test]
     fn test_get_valid_types_fixed_size_arrays() -> Result<()> {
         let function = "fixed_size_arrays";
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 42a5f9b262392..a69176e1173a5 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -26,23 +26,23 @@ use std::vec;
 
 use arrow::datatypes::{DataType, Field, FieldRef};
 
-use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue, Statistics};
+use datafusion_common::{Result, ScalarValue, Statistics, exec_err, not_impl_err};
 use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
 use crate::expr::{
+    AggregateFunction, AggregateFunctionParams, ExprListDisplay, WindowFunctionParams,
     schema_name_from_exprs, schema_name_from_exprs_comma_separated_without_space,
-    schema_name_from_sorts, AggregateFunction, AggregateFunctionParams, ExprListDisplay,
-    WindowFunctionParams,
+    schema_name_from_sorts,
 };
 use crate::function::{
     AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs,
 };
 use crate::groups_accumulator::GroupsAccumulator;
 use crate::udf_eq::UdfEq;
-use crate::utils::format_state_name;
 use crate::utils::AggregateOrderSensitivity;
-use crate::{expr_vec_fmt, Accumulator, Expr};
+use crate::utils::format_state_name;
+use crate::{Accumulator, Expr, expr_vec_fmt};
 use crate::{Documentation, Signature};
 
 /// Logical representation of a user-defined [aggregate function] (UDAF).
@@ -74,8 +74,8 @@ use crate::{Documentation, Signature};
 /// [aggregate function]: https://en.wikipedia.org/wiki/Aggregate_function
 /// [`Accumulator`]: Accumulator
 /// [`create_udaf`]: crate::expr_fn::create_udaf
-/// [`simple_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs
-/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
+/// [`simple_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udaf.rs
+/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udaf.rs
 #[derive(Debug, Clone, PartialOrd)]
 pub struct AggregateUDF {
     inner: Arc<dyn AggregateUDFImpl>,
@@ -360,7 +360,7 @@ where
 /// See [`advanced_udaf.rs`] for a full example with complete implementation and
 /// [`AggregateUDF`] for other available options.
 ///
-/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
+/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udaf.rs
 ///
 /// # Basic Example
 /// ```
@@ -565,11 +565,12 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// be derived from `name`. See [`format_state_name`] for a utility function
     /// to generate a unique name.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        let fields = vec![args
-            .return_field
-            .as_ref()
-            .clone()
-            .with_name(format_state_name(args.name, "value"))];
+        let fields = vec![
+            args.return_field
+                .as_ref()
+                .clone()
+                .with_name(format_state_name(args.name, "value")),
+        ];
 
         Ok(fields
             .into_iter()
@@ -740,10 +741,14 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
         ScalarValue::try_from(data_type)
     }
 
-    /// If this function supports `[IGNORE NULLS | RESPECT NULLS]` clause, return true
-    /// If the function does not, return false
+    /// If this function supports `[IGNORE NULLS | RESPECT NULLS]` SQL clause,
+    /// return `true`. Otherwise, return `false` which will cause an error to be
+    /// raised during SQL parsing if these clauses are detected for this function.
+    ///
+    /// Functions which implement this as `true` are expected to handle the resulting
+    /// null handling config present in [`AccumulatorArgs`], `ignore_nulls`.
     fn supports_null_handling_clause(&self) -> bool {
-        true
+        false
     }
 
     /// If this function supports the `WITHIN GROUP (ORDER BY column [ASC|DESC])`
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index fd54bb13a62f3..26d7fc99cb17c 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -25,7 +25,9 @@ use crate::udf_eq::UdfEq;
 use crate::{ColumnarValue, Documentation, Expr, Signature};
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
+use datafusion_common::{
+    ExprSchema, Result, ScalarValue, assert_or_internal_err, not_impl_err,
+};
 use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
 use datafusion_expr_common::interval_arithmetic::Interval;
 use std::any::Any;
@@ -54,8 +56,8 @@ use std::sync::Arc;
 /// compatibility with the older API.
 ///
 /// [`create_udf`]: crate::expr_fn::create_udf
-/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
-/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udf.rs
+/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udf.rs
 #[derive(Debug, Clone)]
 pub struct ScalarUDF {
     inner: Arc<dyn ScalarUDFImpl>,
@@ -89,7 +91,8 @@ impl PartialOrd for ScalarUDF {
             "Detected incorrect implementation of PartialEq when comparing functions: '{}' and '{}'. \
             The functions compare as equal, but they are not equal based on general properties that \
             the PartialOrd implementation observes,",
-            self.name(), other.name()
+            self.name(),
+            other.name()
         );
         Some(cmp)
     }
@@ -225,7 +228,7 @@ impl ScalarUDF {
 
     #[deprecated(since = "50.0.0", note = "Use `return_field_from_args` instead.")]
     pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         self.inner.is_nullable(args, schema)
     }
 
@@ -240,13 +243,15 @@ impl ScalarUDF {
         // This doesn't use debug_assert!, but it's meant to run anywhere except on production. It's same in spirit, thus conditioning on debug_assertions.
         #[cfg(debug_assertions)]
         {
-            if &result.data_type() != return_field.data_type() {
-                return datafusion_common::internal_err!("Function '{}' returned value of type '{:?}' while the following type was promised at planning time and expected: '{:?}'",
-                        self.name(),
-                        result.data_type(),
-                        return_field.data_type()
-                    );
-            }
+            let result_data_type = result.data_type();
+            let expected_type = return_field.data_type();
+            assert_or_internal_err!(
+                result_data_type == *expected_type,
+                "Function '{}' returned value of type '{:?}' while the following type was promised at planning time and expected: '{:?}'",
+                self.name(),
+                result_data_type,
+                expected_type
+            );
             // TODO verify return data is non-null when it was promised to be?
         }
         Ok(result)
@@ -408,7 +413,7 @@ pub struct ReturnFieldArgs<'a> {
 /// See [`advanced_udf.rs`] for a full example with complete implementation and
 /// [`ScalarUDF`] for other available options.
 ///
-/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udf.rs
 ///
 /// # Basic Example
 /// ```
@@ -605,7 +610,7 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
     ///     // report output is only nullable if any one of the arguments are nullable
     ///     let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
-    ///     let field = Arc::new(Field::new("ignored_name", DataType::Int32, true));
+    ///     let field = Arc::new(Field::new("ignored_name", DataType::Int32, nullable));
     ///     Ok(field)
     /// }
     /// # }
@@ -897,7 +902,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
     }
 
     fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         self.inner.is_nullable(args, schema)
     }
 
diff --git a/datafusion/expr/src/udf_eq.rs b/datafusion/expr/src/udf_eq.rs
index 6664495267129..30cfb1d831fde 100644
--- a/datafusion/expr/src/udf_eq.rs
+++ b/datafusion/expr/src/udf_eq.rs
@@ -26,7 +26,7 @@ use std::sync::Arc;
 ///
 /// If you want to just compare pointers for equality, use [`super::ptr_eq::PtrEq`].
 #[derive(Clone)]
-#[allow(private_bounds)] // This is so that UdfEq can only be used with allowed pointer types (e.g. Arc), without allowing misuse.
+#[expect(private_bounds)] // This is so that UdfEq can only be used with allowed pointer types (e.g. Arc), without allowing misuse.
 pub struct UdfEq<Ptr: UdfPointer>(Ptr);
 
 impl<Ptr> PartialEq for UdfEq<Ptr>
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 3220fdcbcad70..37055daa1ca4f 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -31,9 +31,9 @@ use arrow::datatypes::{DataType, FieldRef};
 use crate::expr::WindowFunction;
 use crate::udf_eq::UdfEq;
 use crate::{
-    function::WindowFunctionSimplification, Expr, PartitionEvaluator, Signature,
+    Expr, PartitionEvaluator, Signature, function::WindowFunctionSimplification,
 };
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_doc::Documentation;
 use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
 use datafusion_functions_window_common::expr::ExpressionArgs;
@@ -66,8 +66,8 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 ///
 /// [`PartitionEvaluator`]: crate::PartitionEvaluator
 /// [`create_udwf`]: crate::expr_fn::create_udwf
-/// [`simple_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs
-/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
+/// [`simple_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udwf.rs
+/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs
 #[derive(Debug, Clone, PartialOrd)]
 pub struct WindowUDF {
     inner: Arc<dyn WindowUDFImpl>,
@@ -237,7 +237,7 @@ where
 /// [`WindowUDF`] for other available options.
 ///
 ///
-/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
+/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs
 /// # Basic Example
 /// ```
 /// # use std::any::Any;
@@ -355,7 +355,7 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// optimizations manually for specific UDFs.
     ///
     /// Example:
-    /// `advanced_udwf.rs`: <https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs>
+    /// `advanced_udwf.rs`: <https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs>
     ///
     /// # Returns
     /// [None] if simplify is not defined or,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index cd733e0a130a9..de4ebf5fa96e9 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use crate::expr::{Alias, Sort, WildcardOptions, WindowFunctionParams};
 use crate::expr_rewriter::strip_outer_reference;
 use crate::{
-    and, BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator,
+    BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator, and,
 };
 use datafusion_expr_common::signature::{Signature, TypeSignature};
 
@@ -34,8 +34,8 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, HashMap,
-    Result, TableReference,
+    Column, DFSchema, DFSchemaRef, HashMap, Result, TableReference, internal_err,
+    plan_err,
 };
 
 #[cfg(not(feature = "sql"))]
@@ -66,6 +66,23 @@ pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
     }
 }
 
+/// Internal helper that generates indices for powerset subsets using bitset iteration.
+/// Returns an iterator of index vectors, where each vector contains the indices
+/// of elements to include in that subset.
+fn powerset_indices(len: usize) -> impl Iterator<Item = Vec<usize>> {
+    (0..(1 << len)).map(move |mask| {
+        let mut indices = vec![];
+        let mut bitset = mask;
+        while bitset > 0 {
+            let rightmost: u64 = bitset & !(bitset - 1);
+            let idx = rightmost.trailing_zeros() as usize;
+            indices.push(idx);
+            bitset &= bitset - 1;
+        }
+        indices
+    })
+}
+
 /// The [power set] (or powerset) of a set S is the set of all subsets of S, \
 /// including the empty set and S itself.
 ///
@@ -83,33 +100,23 @@ pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
 ///  and hence the power set of S is {{}, {x}, {y}, {z}, {x, y}, {x, z}, {y, z}, {x, y, z}}.
 ///
 /// [power set]: https://en.wikipedia.org/wiki/Power_set
-fn powerset<T>(slice: &[T]) -> Result<Vec<Vec<&T>>, String> {
+pub fn powerset<T>(slice: &[T]) -> Result<Vec<Vec<&T>>> {
     if slice.len() >= 64 {
-        return Err("The size of the set must be less than 64.".into());
+        return plan_err!("The size of the set must be less than 64");
     }
 
-    let mut v = Vec::new();
-    for mask in 0..(1 << slice.len()) {
-        let mut ss = vec![];
-        let mut bitset = mask;
-        while bitset > 0 {
-            let rightmost: u64 = bitset & !(bitset - 1);
-            let idx = rightmost.trailing_zeros();
-            let item = slice.get(idx as usize).unwrap();
-            ss.push(item);
-            // zero the trailing bit
-            bitset &= bitset - 1;
-        }
-        v.push(ss);
-    }
-    Ok(v)
+    Ok(powerset_indices(slice.len())
+        .map(|indices| indices.iter().map(|&idx| &slice[idx]).collect())
+        .collect())
 }
 
 /// check the number of expressions contained in the grouping_set
 fn check_grouping_set_size_limit(size: usize) -> Result<()> {
     let max_grouping_set_size = 65535;
     if size > max_grouping_set_size {
-        return plan_err!("The number of group_expression in grouping_set exceeds the maximum limit {max_grouping_set_size}, found {size}");
+        return plan_err!(
+            "The number of group_expression in grouping_set exceeds the maximum limit {max_grouping_set_size}, found {size}"
+        );
     }
 
     Ok(())
@@ -119,7 +126,9 @@ fn check_grouping_set_size_limit(size: usize) -> Result<()> {
 fn check_grouping_sets_size_limit(size: usize) -> Result<()> {
     let max_grouping_sets_size = 4096;
     if size > max_grouping_sets_size {
-        return plan_err!("The number of grouping_set in grouping_sets exceeds the maximum limit {max_grouping_sets_size}, found {size}");
+        return plan_err!(
+            "The number of grouping_set in grouping_sets exceeds the maximum limit {max_grouping_sets_size}, found {size}"
+        );
     }
 
     Ok(())
@@ -207,8 +216,7 @@ pub fn enumerate_grouping_sets(group_expr: Vec<Expr>) -> Result<Vec<Expr>> {
                     grouping_sets.iter().map(|e| e.iter().collect()).collect()
                 }
                 Expr::GroupingSet(GroupingSet::Cube(group_exprs)) => {
-                    let grouping_sets = powerset(group_exprs)
-                        .map_err(|e| plan_datafusion_err!("{}", e))?;
+                    let grouping_sets = powerset(group_exprs)?;
                     check_grouping_sets_size_limit(grouping_sets.len())?;
                     grouping_sets
                 }
@@ -354,7 +362,7 @@ fn get_excluded_columns(
 /// Returns all `Expr`s in the schema, except the `Column`s in the `columns_to_skip`
 fn get_exprs_except_skipped(
     schema: &DFSchema,
-    columns_to_skip: HashSet<Column>,
+    columns_to_skip: &HashSet<Column>,
 ) -> Vec<Expr> {
     if columns_to_skip.is_empty() {
         schema.iter().map(Expr::from).collect::<Vec<Expr>>()
@@ -419,7 +427,7 @@ pub fn expand_wildcard(
     };
     // Add each excluded `Column` to columns_to_skip
     columns_to_skip.extend(excluded_columns);
-    Ok(get_exprs_except_skipped(schema, columns_to_skip))
+    Ok(get_exprs_except_skipped(schema, &columns_to_skip))
 }
 
 /// Resolves an `Expr::Wildcard` to a collection of qualified `Expr::Column`'s.
@@ -464,7 +472,7 @@ pub fn expand_qualified_wildcard(
     columns_to_skip.extend(excluded_columns);
     Ok(get_exprs_except_skipped(
         &qualified_dfschema,
-        columns_to_skip,
+        &columns_to_skip,
     ))
 }
 
@@ -928,6 +936,7 @@ pub fn find_valid_equijoin_key_pair(
 ///     round(Float64)
 ///     round(Float32)
 /// ```
+#[expect(clippy::needless_pass_by_value)]
 pub fn generate_signature_error_msg(
     func_name: &str,
     func_signature: Signature,
@@ -942,9 +951,11 @@ pub fn generate_signature_error_msg(
         .join("\n");
 
     format!(
-            "No function matches the given name and argument types '{}({})'. You might need to add explicit type casts.\n\tCandidate functions:\n{}",
-            func_name, TypeSignature::join_types(input_expr_types, ", "), candidate_signatures
-        )
+        "No function matches the given name and argument types '{}({})'. You might need to add explicit type casts.\n\tCandidate functions:\n{}",
+        func_name,
+        TypeSignature::join_types(input_expr_types, ", "),
+        candidate_signatures
+    )
 }
 
 /// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
@@ -1275,11 +1286,10 @@ pub fn collect_subquery_cols(
 mod tests {
     use super::*;
     use crate::{
-        col, cube,
+        Cast, ExprFunctionExt, WindowFunctionDefinition, col, cube,
         expr::WindowFunction,
         expr_vec_fmt, grouping_set, lit, rollup,
         test::function_stub::{max_udaf, min_udaf, sum_udaf},
-        Cast, ExprFunctionExt, WindowFunctionDefinition,
     };
     use arrow::datatypes::{UnionFields, UnionMode};
     use datafusion_expr_common::signature::{TypeSignature, Volatility};
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index 5fb2916c34e95..334c1fa2a090b 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -27,7 +27,7 @@ use crate::{expr::Sort, lit};
 use std::fmt::{self, Formatter};
 use std::hash::Hash;
 
-use datafusion_common::{plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, plan_err};
 #[cfg(feature = "sql")]
 use sqlparser::ast::{self, ValueWithSpan};
 
@@ -131,12 +131,10 @@ impl TryFrom<ast::WindowFrame> for WindowFrame {
                     "Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING"
                 )?
             }
-        } else if let WindowFrameBound::Preceding(val) = &end_bound {
-            if val.is_null() {
-                plan_err!(
-                    "Invalid window frame: end bound cannot be UNBOUNDED PRECEDING"
-                )?
-            }
+        } else if let WindowFrameBound::Preceding(val) = &end_bound
+            && val.is_null()
+        {
+            plan_err!("Invalid window frame: end bound cannot be UNBOUNDED PRECEDING")?
         };
 
         let units = value.units.into();
@@ -375,9 +373,10 @@ fn convert_frame_bound_to_scalar_value(
     match units {
         // For ROWS and GROUPS we are sure that the ScalarValue must be a non-negative integer ...
         ast::WindowFrameUnits::Rows | ast::WindowFrameUnits::Groups => match v {
-            ast::Expr::Value(ValueWithSpan{value: ast::Value::Number(value, false), span: _}) => {
-                Ok(ScalarValue::try_from_string(value, &DataType::UInt64)?)
-            },
+            ast::Expr::Value(ValueWithSpan {
+                value: ast::Value::Number(value, false),
+                span: _,
+            }) => Ok(ScalarValue::try_from_string(value, &DataType::UInt64)?),
             ast::Expr::Interval(ast::Interval {
                 value,
                 leading_field: None,
@@ -386,11 +385,12 @@ fn convert_frame_bound_to_scalar_value(
                 fractional_seconds_precision: None,
             }) => {
                 let value = match *value {
-                    ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item,
+                    ast::Expr::Value(ValueWithSpan {
+                        value: ast::Value::SingleQuotedString(item),
+                        span: _,
+                    }) => item,
                     e => {
-                        return exec_err!(
-                            "INTERVAL expression cannot be {e:?}"
-                        );
+                        return exec_err!("INTERVAL expression cannot be {e:?}");
                     }
                 };
                 Ok(ScalarValue::try_from_string(value, &DataType::UInt64)?)
@@ -402,18 +402,22 @@ fn convert_frame_bound_to_scalar_value(
         // ... instead for RANGE it could be anything depending on the type of the ORDER BY clause,
         // so we use a ScalarValue::Utf8.
         ast::WindowFrameUnits::Range => Ok(ScalarValue::Utf8(Some(match v {
-            ast::Expr::Value(ValueWithSpan{value: ast::Value::Number(value, false), span: _}) => value,
+            ast::Expr::Value(ValueWithSpan {
+                value: ast::Value::Number(value, false),
+                span: _,
+            }) => value,
             ast::Expr::Interval(ast::Interval {
                 value,
                 leading_field,
                 ..
             }) => {
                 let result = match *value {
-                    ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item,
+                    ast::Expr::Value(ValueWithSpan {
+                        value: ast::Value::SingleQuotedString(item),
+                        span: _,
+                    }) => item,
                     e => {
-                        return exec_err!(
-                            "INTERVAL expression cannot be {e:?}"
-                        );
+                        return exec_err!("INTERVAL expression cannot be {e:?}");
                     }
                 };
                 if let Some(leading_field) = leading_field {
@@ -604,8 +608,16 @@ mod tests {
             last_field: None,
             leading_precision: None,
         })));
-        test_bound_err!(Rows, number.clone(), "Error during planning: Invalid window frame: frame offsets for ROWS / GROUPS must be non negative integers");
-        test_bound_err!(Groups, number.clone(), "Error during planning: Invalid window frame: frame offsets for ROWS / GROUPS must be non negative integers");
+        test_bound_err!(
+            Rows,
+            number.clone(),
+            "Error during planning: Invalid window frame: frame offsets for ROWS / GROUPS must be non negative integers"
+        );
+        test_bound_err!(
+            Groups,
+            number.clone(),
+            "Error during planning: Invalid window frame: frame offsets for ROWS / GROUPS must be non negative integers"
+        );
         test_bound!(
             Range,
             number.clone(),
diff --git a/datafusion/expr/src/window_state.rs b/datafusion/expr/src/window_state.rs
index cdfb18ee1ddd7..d7da7a778b011 100644
--- a/datafusion/expr/src/window_state.rs
+++ b/datafusion/expr/src/window_state.rs
@@ -23,14 +23,13 @@ use crate::{WindowFrame, WindowFrameBound, WindowFrameUnits};
 
 use arrow::{
     array::ArrayRef,
-    compute::{concat, concat_batches, SortOptions},
+    compute::{SortOptions, concat, concat_batches},
     datatypes::{DataType, SchemaRef},
     record_batch::RecordBatch,
 };
 use datafusion_common::{
-    internal_datafusion_err, internal_err,
+    Result, ScalarValue, internal_datafusion_err, internal_err,
     utils::{compare_rows, get_row_at_idx, search_in_slice},
-    Result, ScalarValue,
 };
 
 /// Holds the state of evaluating a window function
@@ -170,7 +169,7 @@ impl WindowFrameContext {
             // comparison of rows.
             WindowFrameContext::Range {
                 window_frame,
-                ref mut state,
+                state,
             } => state.calculate_range(
                 window_frame,
                 last_range,
@@ -183,7 +182,7 @@ impl WindowFrameContext {
             // or position of NULLs do not impact inequality.
             WindowFrameContext::Groups {
                 window_frame,
-                ref mut state,
+                state,
             } => state.calculate_range(window_frame, range_columns, length, idx),
         }
     }
@@ -205,14 +204,14 @@ impl WindowFrameContext {
             WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
                 return internal_err!(
                     "Frame start cannot be UNBOUNDED FOLLOWING '{window_frame:?}'"
-                )
+                );
             }
             WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
                 std::cmp::min(idx + n as usize, length)
             }
             // ERRONEOUS FRAMES
             WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                return internal_err!("Rows should be UInt64")
+                return internal_err!("Rows should be UInt64");
             }
         };
         let end = match window_frame.end_bound {
@@ -220,7 +219,7 @@ impl WindowFrameContext {
             WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
                 return internal_err!(
                     "Frame end cannot be UNBOUNDED PRECEDING '{window_frame:?}'"
-                )
+                );
             }
             WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
                 if idx >= n as usize {
@@ -237,7 +236,7 @@ impl WindowFrameContext {
             }
             // ERRONEOUS FRAMES
             WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                return internal_err!("Rows should be UInt64")
+                return internal_err!("Rows should be UInt64");
             }
         };
         Ok(Range { start, end })
diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml
index 3ac08180fb68c..28e1b2ee5681f 100644
--- a/datafusion/ffi/Cargo.toml
+++ b/datafusion/ffi/Cargo.toml
@@ -40,17 +40,31 @@ workspace = true
 name = "datafusion_ffi"
 crate-type = ["cdylib", "rlib"]
 
+# Note to developers: do *not* add `datafusion` as a dependency in this crate.
+# It increases build times and library binary size for users.
+
 [dependencies]
 abi_stable = "0.11.3"
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema = { workspace = true }
 async-ffi = { version = "0.5.0", features = ["abi_stable"] }
 async-trait = { workspace = true }
-datafusion = { workspace = true, default-features = false }
+datafusion-catalog = { workspace = true }
 datafusion-common = { workspace = true }
+datafusion-datasource = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-functions = { workspace = true, optional = true }
+datafusion-functions-aggregate = { workspace = true, optional = true }
 datafusion-functions-aggregate-common = { workspace = true }
+datafusion-functions-table = { workspace = true, optional = true }
+datafusion-functions-window = { workspace = true, optional = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-plan = { workspace = true }
 datafusion-proto = { workspace = true }
 datafusion-proto-common = { workspace = true }
+datafusion-session = { workspace = true }
 futures = { workspace = true }
 log = { workspace = true }
 prost = { workspace = true }
@@ -58,8 +72,18 @@ semver = "1.0.27"
 tokio = { workspace = true }
 
 [dev-dependencies]
+datafusion = { workspace = true, default-features = false, features = ["sql"] }
+datafusion-functions = { workspace = true }
+datafusion-functions-aggregate = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
+datafusion-functions-window = { workspace = true }
 doc-comment = { workspace = true }
 
 [features]
-integration-tests = []
+integration-tests = [
+    "datafusion-functions",
+    "datafusion-functions-aggregate",
+    "datafusion-functions-table",
+    "datafusion-functions-window",
+]
 tarpaulin_include = [] # Exists only to prevent warnings on stable and still have accurate coverage
diff --git a/datafusion/ffi/README.md b/datafusion/ffi/README.md
index 72070984f9315..304ebb90f49dd 100644
--- a/datafusion/ffi/README.md
+++ b/datafusion/ffi/README.md
@@ -101,6 +101,98 @@ In this crate we have a variety of structs which closely mimic the behavior of
 their internal counterparts. To see detailed notes about how to use them, see
 the example in `FFI_TableProvider`.
 
+## Memory Management
+
+One of the advantages of Rust is the ownership model, which means programmers
+_usually_ do not need to worry about memory management. When interacting with
+foreign code, this is not necessarily true. If you review the structures in
+this crate, you will find that many of them implement the `Drop` trait and
+perform a foreign call.
+
+Suppose we have a `FFI_CatalogProvider`, for example. This struct is safe to
+pass across the FFI boundary, so it may be owned by either the library that
+produces the underlying `CatalogProvider` or by another library that consumes
+it. If we look closer at the `FFI_CatalogProvider`, it has a pointer to
+some private data. That private data is only accessible on the producer's
+side. If you attempt to access it on the consumer's side, you may get
+segmentation faults or other bad behavior. Within that private data is the
+actual `Arc<dyn CatalogProvider`. That `Arc<>` must be freed, but if the
+`FFI_CatalogProvider` is only owned on the consumer's side, we have no way
+to access the private data and free it.
+
+To account for this, most structs in this crate have a `release` method that
+is used to clean up any privately held data. This calls into the producer's
+side, regardless of if it is called on either the local or foreign side.
+Most of the structs in this crate carry atomic reference counts to the
+underlying data, and this is straight forward. Some structs like the
+`FFI_Accumulator` contain an inner `Box<dyn Accumulator>`. The reason for
+this is that we need to be able to mutably access these based on the
+`Accumulator` trait definition. For these we have slightly more complicated
+release code based on whether it is being dropped on the local or foreign side.
+Traits that use a `Box<>` for their underlying data also cannot implement
+`Clone`.
+
+## Library Marker ID
+
+When reviewing the code, many of the structs in this crate contain a call to
+a `library_marker_id`. The purpose of this call is to determine if a library is
+accessing _local_ code through the FFI structs. Consider this example: you have
+a `primary` program that exposes functions to create a schema provider. You
+have a `secondary` library that exposes a function to create a catalog provider
+and the `secondary` library uses the schema provider of the `primary` program.
+From the point of view of the `secondary` library, the schema provider is
+foreign code.
+
+Now when we register the `secondary` library with the `primary` program as a
+catalog provider and we make calls to get a schema, the `secondary` library
+will return a FFI wrapped schema provider back to the `primary` program. In
+this case that schema provider is actually local code to the `primary` program
+except that it is wrapped in the FFI code!
+
+We work around this by the `library_marker_id` calls. What this does is it
+creates a global variable within each library and returns a `usize` address
+of that library. This is guaranteed to be unique for every library that contains
+FFI code. By comparing these `usize` addresses we can determine if a FFI struct
+is local or foreign.
+
+In our example of the schema provider, if you were to make a call in your
+primary program to get the schema provider, it would reach out to the foreign
+catalog provider and send back a `FFI_SchemaProvider` object. By then
+comparing the `library_marker_id` of this object to the `primary` program, we
+determine it is local code. This means it is safe to access the underlying
+private data.
+
+Users of the FFI code should not need to access these function. If you are
+implementing a new FFI struct, then it is recommended that you follow the
+established patterns for converting from FFI struct into the underlying
+traits. Specifically you should use `crate::get_library_marker_id` and in
+your unit tests you should override this with
+`crate::mock_foreign_marker_id` to force your test to create the foreign
+variant of your struct.
+
+## Task Context Provider
+
+Many of the FFI structs in this crate contain a `FFI_TaskContextProvider`. The
+purpose of this struct is to _weakly_ hold a reference to a method to
+access the current `TaskContext`. The reason we need this accessor is because
+we use the `datafusion-proto` crate to serialize and deserialize data across
+the FFI boundary. In particular, we need to serialize and deserialize
+functions using a `TaskContext`, which implements `FunctionRegistry`.
+
+This becomes difficult because we may need to register multiple user defined
+functions, table or catalog providers, etc with a `Session`, and each of these
+will need the `TaskContext` to perform the processing. For this reason we
+cannot simply include the `TaskContext` at the time of registration because
+it would not have knowledge of anything registered afterward.
+
+The `FFI_TaskContextProvider` is built from a trait that provides a method
+to get the current `TaskContext`. `FFI_TaskContextProvider` only holds a
+`Weak` reference to the `TaskContextProvider`, because otherwise we could
+create a circular dependency at runtime. It is imperative that if you use
+these methods that your provider remains valid for the lifetime of the
+calls. The `FFI_TaskContextProvider` is implemented on `SessionContext`
+and it is easy to implement on any struct that implements `Session`.
+
 [apache datafusion]: https://datafusion.apache.org/
 [api docs]: http://docs.rs/datafusion-ffi/latest
 [rust abi]: https://doc.rust-lang.org/reference/abi.html
diff --git a/datafusion/ffi/src/arrow_wrappers.rs b/datafusion/ffi/src/arrow_wrappers.rs
index 7b3751dcae823..c83e412310e7f 100644
--- a/datafusion/ffi/src/arrow_wrappers.rs
+++ b/datafusion/ffi/src/arrow_wrappers.rs
@@ -18,12 +18,11 @@
 use std::sync::Arc;
 
 use abi_stable::StableAbi;
-use arrow::{
-    array::{make_array, ArrayRef},
-    datatypes::{Schema, SchemaRef},
-    error::ArrowError,
-    ffi::{from_ffi, to_ffi, FFI_ArrowArray, FFI_ArrowSchema},
-};
+use arrow::array::{ArrayRef, make_array};
+use arrow::datatypes::{Schema, SchemaRef};
+use arrow::error::ArrowError;
+use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi, to_ffi};
+use datafusion_common::{DataFusionError, ScalarValue};
 use log::error;
 
 /// This is a wrapper struct around FFI_ArrowSchema simply to indicate
@@ -37,7 +36,9 @@ impl From<SchemaRef> for WrappedSchema {
         let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) {
             Ok(s) => s,
             Err(e) => {
-                error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {e}");
+                error!(
+                    "Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {e}"
+                );
                 FFI_ArrowSchema::empty()
             }
         };
@@ -50,14 +51,17 @@ impl From<SchemaRef> for WrappedSchema {
 /// give the user a warning, and return some kind of result. In this case we default to an
 /// empty schema.
 #[cfg(not(tarpaulin_include))]
-fn catch_df_schema_error(e: ArrowError) -> Schema {
-    error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {e}");
+fn catch_df_schema_error(e: &ArrowError) -> Schema {
+    error!(
+        "Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {e}"
+    );
     Schema::empty()
 }
 
 impl From<WrappedSchema> for SchemaRef {
     fn from(value: WrappedSchema) -> Self {
-        let schema = Schema::try_from(&value.0).unwrap_or_else(catch_df_schema_error);
+        let schema =
+            Schema::try_from(&value.0).unwrap_or_else(|e| catch_df_schema_error(&e));
         Arc::new(schema)
     }
 }
@@ -94,3 +98,21 @@ impl TryFrom<&ArrayRef> for WrappedArray {
         Ok(WrappedArray { array, schema })
     }
 }
+
+impl TryFrom<&ScalarValue> for WrappedArray {
+    type Error = DataFusionError;
+
+    fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
+        let array = value.to_array()?;
+        WrappedArray::try_from(&array).map_err(Into::into)
+    }
+}
+
+impl TryFrom<WrappedArray> for ScalarValue {
+    type Error = DataFusionError;
+
+    fn try_from(value: WrappedArray) -> Result<Self, Self::Error> {
+        let array: ArrayRef = value.try_into()?;
+        ScalarValue::try_from_array(array.as_ref(), 0)
+    }
+}
diff --git a/datafusion/ffi/src/catalog_provider.rs b/datafusion/ffi/src/catalog_provider.rs
index 65dcab34f17d0..61e26f1663532 100644
--- a/datafusion/ffi/src/catalog_provider.rs
+++ b/datafusion/ffi/src/catalog_provider.rs
@@ -15,26 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, ffi::c_void, sync::Arc};
-
-use abi_stable::{
-    std_types::{ROption, RResult, RString, RVec},
-    StableAbi,
+use std::any::Any;
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RString, RVec};
+use datafusion_catalog::{CatalogProvider, SchemaProvider};
+use datafusion_common::error::Result;
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
 };
-use datafusion::catalog::{CatalogProvider, SchemaProvider};
 use tokio::runtime::Handle;
 
-use crate::{
-    df_result, rresult_return,
-    schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider},
-};
-
-use datafusion::error::Result;
+use crate::execution::FFI_TaskContextProvider;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider};
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
 
 /// A stable struct for sharing [`CatalogProvider`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_CatalogProvider {
     pub schema_names: unsafe extern "C" fn(provider: &Self) -> RVec<RString>,
 
@@ -43,19 +45,21 @@ pub struct FFI_CatalogProvider {
         name: RString,
     ) -> ROption<FFI_SchemaProvider>,
 
-    pub register_schema:
-        unsafe extern "C" fn(
-            provider: &Self,
-            name: RString,
-            schema: &FFI_SchemaProvider,
-        ) -> RResult<ROption<FFI_SchemaProvider>, RString>,
+    pub register_schema: unsafe extern "C" fn(
+        provider: &Self,
+        name: RString,
+        schema: &FFI_SchemaProvider,
+    )
+        -> FFIResult<ROption<FFI_SchemaProvider>>,
 
-    pub deregister_schema:
-        unsafe extern "C" fn(
-            provider: &Self,
-            name: RString,
-            cascade: bool,
-        ) -> RResult<ROption<FFI_SchemaProvider>, RString>,
+    pub deregister_schema: unsafe extern "C" fn(
+        provider: &Self,
+        name: RString,
+        cascade: bool,
+    )
+        -> FFIResult<ROption<FFI_SchemaProvider>>,
+
+    pub logical_codec: FFI_LogicalExtensionCodec,
 
     /// Used to create a clone on the provider of the execution plan. This should
     /// only need to be called by the receiver of the plan.
@@ -70,6 +74,11 @@ pub struct FFI_CatalogProvider {
     /// Internal data. This is only to be accessed by the provider of the plan.
     /// A [`ForeignCatalogProvider`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_CatalogProvider {}
@@ -82,93 +91,132 @@ struct ProviderPrivateData {
 
 impl FFI_CatalogProvider {
     unsafe fn inner(&self) -> &Arc<dyn CatalogProvider + Send> {
-        let private_data = self.private_data as *const ProviderPrivateData;
-        &(*private_data).provider
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            &(*private_data).provider
+        }
     }
 
     unsafe fn runtime(&self) -> Option<Handle> {
-        let private_data = self.private_data as *const ProviderPrivateData;
-        (*private_data).runtime.clone()
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            (*private_data).runtime.clone()
+        }
     }
 }
 
 unsafe extern "C" fn schema_names_fn_wrapper(
     provider: &FFI_CatalogProvider,
 ) -> RVec<RString> {
-    let names = provider.inner().schema_names();
-    names.into_iter().map(|s| s.into()).collect()
+    unsafe {
+        let names = provider.inner().schema_names();
+        names.into_iter().map(|s| s.into()).collect()
+    }
 }
 
 unsafe extern "C" fn schema_fn_wrapper(
     provider: &FFI_CatalogProvider,
     name: RString,
 ) -> ROption<FFI_SchemaProvider> {
-    let maybe_schema = provider.inner().schema(name.as_str());
-    maybe_schema
-        .map(|schema| FFI_SchemaProvider::new(schema, provider.runtime()))
-        .into()
+    unsafe {
+        let maybe_schema = provider.inner().schema(name.as_str());
+        maybe_schema
+            .map(|schema| {
+                FFI_SchemaProvider::new_with_ffi_codec(
+                    schema,
+                    provider.runtime(),
+                    provider.logical_codec.clone(),
+                )
+            })
+            .into()
+    }
 }
 
 unsafe extern "C" fn register_schema_fn_wrapper(
     provider: &FFI_CatalogProvider,
     name: RString,
     schema: &FFI_SchemaProvider,
-) -> RResult<ROption<FFI_SchemaProvider>, RString> {
-    let runtime = provider.runtime();
-    let provider = provider.inner();
-    let schema = Arc::new(ForeignSchemaProvider::from(schema));
-
-    let returned_schema =
-        rresult_return!(provider.register_schema(name.as_str(), schema))
-            .map(|schema| FFI_SchemaProvider::new(schema, runtime))
-            .into();
-
-    RResult::ROk(returned_schema)
+) -> FFIResult<ROption<FFI_SchemaProvider>> {
+    unsafe {
+        let runtime = provider.runtime();
+        let inner_provider = provider.inner();
+        let schema: Arc<dyn SchemaProvider + Send> = schema.into();
+
+        let returned_schema =
+            rresult_return!(inner_provider.register_schema(name.as_str(), schema))
+                .map(|schema| {
+                    FFI_SchemaProvider::new_with_ffi_codec(
+                        schema,
+                        runtime,
+                        provider.logical_codec.clone(),
+                    )
+                })
+                .into();
+
+        RResult::ROk(returned_schema)
+    }
 }
 
 unsafe extern "C" fn deregister_schema_fn_wrapper(
     provider: &FFI_CatalogProvider,
     name: RString,
     cascade: bool,
-) -> RResult<ROption<FFI_SchemaProvider>, RString> {
-    let runtime = provider.runtime();
-    let provider = provider.inner();
-
-    let maybe_schema =
-        rresult_return!(provider.deregister_schema(name.as_str(), cascade));
-
-    RResult::ROk(
-        maybe_schema
-            .map(|schema| FFI_SchemaProvider::new(schema, runtime))
-            .into(),
-    )
+) -> FFIResult<ROption<FFI_SchemaProvider>> {
+    unsafe {
+        let runtime = provider.runtime();
+        let inner_provider = provider.inner();
+
+        let maybe_schema =
+            rresult_return!(inner_provider.deregister_schema(name.as_str(), cascade));
+
+        RResult::ROk(
+            maybe_schema
+                .map(|schema| {
+                    FFI_SchemaProvider::new_with_ffi_codec(
+                        schema,
+                        runtime,
+                        provider.logical_codec.clone(),
+                    )
+                })
+                .into(),
+        )
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_CatalogProvider) {
-    let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!provider.private_data.is_null());
+        let private_data =
+            Box::from_raw(provider.private_data as *mut ProviderPrivateData);
+        drop(private_data);
+        provider.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(
     provider: &FFI_CatalogProvider,
 ) -> FFI_CatalogProvider {
-    let old_private_data = provider.private_data as *const ProviderPrivateData;
-    let runtime = (*old_private_data).runtime.clone();
-
-    let private_data = Box::into_raw(Box::new(ProviderPrivateData {
-        provider: Arc::clone(&(*old_private_data).provider),
-        runtime,
-    })) as *mut c_void;
-
-    FFI_CatalogProvider {
-        schema_names: schema_names_fn_wrapper,
-        schema: schema_fn_wrapper,
-        register_schema: register_schema_fn_wrapper,
-        deregister_schema: deregister_schema_fn_wrapper,
-        clone: clone_fn_wrapper,
-        release: release_fn_wrapper,
-        version: super::version,
-        private_data,
+    unsafe {
+        let old_private_data = provider.private_data as *const ProviderPrivateData;
+        let runtime = (*old_private_data).runtime.clone();
+
+        let private_data = Box::into_raw(Box::new(ProviderPrivateData {
+            provider: Arc::clone(&(*old_private_data).provider),
+            runtime,
+        })) as *mut c_void;
+
+        FFI_CatalogProvider {
+            schema_names: schema_names_fn_wrapper,
+            schema: schema_fn_wrapper,
+            register_schema: register_schema_fn_wrapper,
+            deregister_schema: deregister_schema_fn_wrapper,
+            logical_codec: provider.logical_codec.clone(),
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data,
+            library_marker_id: crate::get_library_marker_id,
+        }
     }
 }
 
@@ -183,6 +231,24 @@ impl FFI_CatalogProvider {
     pub fn new(
         provider: Arc<dyn CatalogProvider + Send>,
         runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+        logical_codec: Option<Arc<dyn LogicalExtensionCodec>>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let logical_codec =
+            logical_codec.unwrap_or_else(|| Arc::new(DefaultLogicalExtensionCodec {}));
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            logical_codec,
+            runtime.clone(),
+            task_ctx_provider.clone(),
+        );
+        Self::new_with_ffi_codec(provider, runtime, logical_codec)
+    }
+
+    pub fn new_with_ffi_codec(
+        provider: Arc<dyn CatalogProvider + Send>,
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
     ) -> Self {
         let private_data = Box::new(ProviderPrivateData { provider, runtime });
 
@@ -191,10 +257,12 @@ impl FFI_CatalogProvider {
             schema: schema_fn_wrapper,
             register_schema: register_schema_fn_wrapper,
             deregister_schema: deregister_schema_fn_wrapper,
+            logical_codec,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             version: super::version,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -204,14 +272,19 @@ impl FFI_CatalogProvider {
 /// defined on this struct must only use the stable functions provided in
 /// FFI_CatalogProvider to interact with the foreign table provider.
 #[derive(Debug)]
-pub struct ForeignCatalogProvider(FFI_CatalogProvider);
+pub struct ForeignCatalogProvider(pub(crate) FFI_CatalogProvider);
 
 unsafe impl Send for ForeignCatalogProvider {}
 unsafe impl Sync for ForeignCatalogProvider {}
 
-impl From<&FFI_CatalogProvider> for ForeignCatalogProvider {
+impl From<&FFI_CatalogProvider> for Arc<dyn CatalogProvider + Send> {
     fn from(provider: &FFI_CatalogProvider) -> Self {
-        Self(provider.clone())
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            return Arc::clone(unsafe { provider.inner() });
+        }
+
+        Arc::new(ForeignCatalogProvider(provider.clone()))
+            as Arc<dyn CatalogProvider + Send>
     }
 }
 
@@ -254,7 +327,11 @@ impl CatalogProvider for ForeignCatalogProvider {
         unsafe {
             let schema = match schema.as_any().downcast_ref::<ForeignSchemaProvider>() {
                 Some(s) => &s.0,
-                None => &FFI_SchemaProvider::new(schema, None),
+                None => &FFI_SchemaProvider::new_with_ffi_codec(
+                    schema,
+                    None,
+                    self.0.logical_codec.clone(),
+                ),
             };
             let returned_schema: Option<FFI_SchemaProvider> =
                 df_result!((self.0.register_schema)(&self.0, name.into(), schema))?
@@ -292,15 +369,20 @@ mod tests {
         let prior_schema = Arc::new(MemorySchemaProvider::new());
 
         let catalog = Arc::new(MemoryCatalogProvider::new());
-        assert!(catalog
-            .as_ref()
-            .register_schema("prior_schema", prior_schema)
-            .unwrap()
-            .is_none());
+        assert!(
+            catalog
+                .as_ref()
+                .register_schema("prior_schema", prior_schema)
+                .unwrap()
+                .is_none()
+        );
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
 
-        let ffi_catalog = FFI_CatalogProvider::new(catalog, None);
+        let mut ffi_catalog =
+            FFI_CatalogProvider::new(catalog, None, task_ctx_provider, None);
+        ffi_catalog.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_catalog: ForeignCatalogProvider = (&ffi_catalog).into();
+        let foreign_catalog: Arc<dyn CatalogProvider + Send> = (&ffi_catalog).into();
 
         let prior_schema_names = foreign_catalog.schema_names();
         assert_eq!(prior_schema_names.len(), 1);
@@ -335,4 +417,32 @@ mod tests {
         let returned_schema = foreign_catalog.schema("second_schema");
         assert!(returned_schema.is_some());
     }
+
+    #[test]
+    fn test_ffi_catalog_provider_local_bypass() {
+        let catalog = Arc::new(MemoryCatalogProvider::new());
+
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+        let mut ffi_catalog =
+            FFI_CatalogProvider::new(catalog, None, task_ctx_provider, None);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_catalog: Arc<dyn CatalogProvider + Send> = (&ffi_catalog).into();
+        assert!(
+            foreign_catalog
+                .as_any()
+                .downcast_ref::<MemoryCatalogProvider>()
+                .is_some()
+        );
+
+        // Verify different library markers generate foreign providers
+        ffi_catalog.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_catalog: Arc<dyn CatalogProvider + Send> = (&ffi_catalog).into();
+        assert!(
+            foreign_catalog
+                .as_any()
+                .downcast_ref::<ForeignCatalogProvider>()
+                .is_some()
+        );
+    }
 }
diff --git a/datafusion/ffi/src/catalog_provider_list.rs b/datafusion/ffi/src/catalog_provider_list.rs
new file mode 100644
index 0000000000000..40f8be3871bb9
--- /dev/null
+++ b/datafusion/ffi/src/catalog_provider_list.rs
@@ -0,0 +1,389 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RString, RVec};
+use datafusion_catalog::{CatalogProvider, CatalogProviderList};
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+};
+use tokio::runtime::Handle;
+
+use crate::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider};
+use crate::execution::FFI_TaskContextProvider;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+
+/// A stable struct for sharing [`CatalogProviderList`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_CatalogProviderList {
+    /// Register a catalog
+    pub register_catalog: unsafe extern "C" fn(
+        &Self,
+        name: RString,
+        catalog: &FFI_CatalogProvider,
+    ) -> ROption<FFI_CatalogProvider>,
+
+    /// List of existing catalogs
+    pub catalog_names: unsafe extern "C" fn(&Self) -> RVec<RString>,
+
+    /// Access a catalog
+    pub catalog:
+        unsafe extern "C" fn(&Self, name: RString) -> ROption<FFI_CatalogProvider>,
+
+    pub logical_codec: FFI_LogicalExtensionCodec,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Return the major DataFusion version number of this provider.
+    pub version: unsafe extern "C" fn() -> u64,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignCatalogProviderList`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_CatalogProviderList {}
+unsafe impl Sync for FFI_CatalogProviderList {}
+
+struct ProviderPrivateData {
+    provider: Arc<dyn CatalogProviderList + Send>,
+    runtime: Option<Handle>,
+}
+
+impl FFI_CatalogProviderList {
+    unsafe fn inner(&self) -> &Arc<dyn CatalogProviderList + Send> {
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            &(*private_data).provider
+        }
+    }
+
+    unsafe fn runtime(&self) -> Option<Handle> {
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            (*private_data).runtime.clone()
+        }
+    }
+}
+
+unsafe extern "C" fn catalog_names_fn_wrapper(
+    provider: &FFI_CatalogProviderList,
+) -> RVec<RString> {
+    unsafe {
+        let names = provider.inner().catalog_names();
+        names.into_iter().map(|s| s.into()).collect()
+    }
+}
+
+unsafe extern "C" fn register_catalog_fn_wrapper(
+    provider: &FFI_CatalogProviderList,
+    name: RString,
+    catalog: &FFI_CatalogProvider,
+) -> ROption<FFI_CatalogProvider> {
+    unsafe {
+        let runtime = provider.runtime();
+        let inner_provider = provider.inner();
+        let catalog: Arc<dyn CatalogProvider + Send> = catalog.into();
+
+        inner_provider
+            .register_catalog(name.into(), catalog)
+            .map(|catalog| {
+                FFI_CatalogProvider::new_with_ffi_codec(
+                    catalog,
+                    runtime,
+                    provider.logical_codec.clone(),
+                )
+            })
+            .into()
+    }
+}
+
+unsafe extern "C" fn catalog_fn_wrapper(
+    provider: &FFI_CatalogProviderList,
+    name: RString,
+) -> ROption<FFI_CatalogProvider> {
+    unsafe {
+        let runtime = provider.runtime();
+        let inner_provider = provider.inner();
+        inner_provider
+            .catalog(name.as_str())
+            .map(|catalog| {
+                FFI_CatalogProvider::new_with_ffi_codec(
+                    catalog,
+                    runtime,
+                    provider.logical_codec.clone(),
+                )
+            })
+            .into()
+    }
+}
+
+unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_CatalogProviderList) {
+    unsafe {
+        debug_assert!(!provider.private_data.is_null());
+        let private_data =
+            Box::from_raw(provider.private_data as *mut ProviderPrivateData);
+        drop(private_data);
+        provider.private_data = std::ptr::null_mut();
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(
+    provider: &FFI_CatalogProviderList,
+) -> FFI_CatalogProviderList {
+    unsafe {
+        let old_private_data = provider.private_data as *const ProviderPrivateData;
+        let runtime = (*old_private_data).runtime.clone();
+
+        let private_data = Box::into_raw(Box::new(ProviderPrivateData {
+            provider: Arc::clone(&(*old_private_data).provider),
+            runtime,
+        })) as *mut c_void;
+
+        FFI_CatalogProviderList {
+            register_catalog: register_catalog_fn_wrapper,
+            catalog_names: catalog_names_fn_wrapper,
+            catalog: catalog_fn_wrapper,
+            logical_codec: provider.logical_codec.clone(),
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+impl Drop for FFI_CatalogProviderList {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl FFI_CatalogProviderList {
+    /// Creates a new [`FFI_CatalogProviderList`].
+    pub fn new(
+        provider: Arc<dyn CatalogProviderList + Send>,
+        runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+        logical_codec: Option<Arc<dyn LogicalExtensionCodec>>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let logical_codec =
+            logical_codec.unwrap_or_else(|| Arc::new(DefaultLogicalExtensionCodec {}));
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            logical_codec,
+            runtime.clone(),
+            task_ctx_provider.clone(),
+        );
+        Self::new_with_ffi_codec(provider, runtime, logical_codec)
+    }
+    pub fn new_with_ffi_codec(
+        provider: Arc<dyn CatalogProviderList + Send>,
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
+    ) -> Self {
+        let private_data = Box::new(ProviderPrivateData { provider, runtime });
+
+        Self {
+            register_catalog: register_catalog_fn_wrapper,
+            catalog_names: catalog_names_fn_wrapper,
+            catalog: catalog_fn_wrapper,
+            logical_codec,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_CatalogProviderList to interact with the foreign catalog provider list.
+#[derive(Debug)]
+pub struct ForeignCatalogProviderList(FFI_CatalogProviderList);
+
+unsafe impl Send for ForeignCatalogProviderList {}
+unsafe impl Sync for ForeignCatalogProviderList {}
+
+impl From<&FFI_CatalogProviderList> for Arc<dyn CatalogProviderList + Send> {
+    fn from(provider: &FFI_CatalogProviderList) -> Self {
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            return Arc::clone(unsafe { provider.inner() });
+        }
+
+        Arc::new(ForeignCatalogProviderList(provider.clone()))
+            as Arc<dyn CatalogProviderList + Send>
+    }
+}
+
+impl Clone for FFI_CatalogProviderList {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl CatalogProviderList for ForeignCatalogProviderList {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn register_catalog(
+        &self,
+        name: String,
+        catalog: Arc<dyn CatalogProvider>,
+    ) -> Option<Arc<dyn CatalogProvider>> {
+        unsafe {
+            let catalog = match catalog.as_any().downcast_ref::<ForeignCatalogProvider>()
+            {
+                Some(s) => &s.0,
+                None => &FFI_CatalogProvider::new_with_ffi_codec(
+                    catalog,
+                    None,
+                    self.0.logical_codec.clone(),
+                ),
+            };
+
+            (self.0.register_catalog)(&self.0, name.into(), catalog)
+                .map(|s| Arc::new(ForeignCatalogProvider(s)) as Arc<dyn CatalogProvider>)
+                .into()
+        }
+    }
+
+    fn catalog_names(&self) -> Vec<String> {
+        unsafe {
+            (self.0.catalog_names)(&self.0)
+                .into_iter()
+                .map(Into::into)
+                .collect()
+        }
+    }
+
+    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
+        unsafe {
+            (self.0.catalog)(&self.0, name.into())
+                .map(|catalog| {
+                    Arc::new(ForeignCatalogProvider(catalog)) as Arc<dyn CatalogProvider>
+                })
+                .into()
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::catalog::{MemoryCatalogProvider, MemoryCatalogProviderList};
+
+    use super::*;
+
+    #[test]
+    fn test_round_trip_ffi_catalog_provider_list() {
+        let prior_catalog = Arc::new(MemoryCatalogProvider::new());
+
+        let catalog_list = Arc::new(MemoryCatalogProviderList::new());
+        assert!(
+            catalog_list
+                .as_ref()
+                .register_catalog("prior_catalog".to_owned(), prior_catalog)
+                .is_none()
+        );
+
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+        let mut ffi_catalog_list =
+            FFI_CatalogProviderList::new(catalog_list, None, task_ctx_provider, None);
+        ffi_catalog_list.library_marker_id = crate::mock_foreign_marker_id;
+
+        let foreign_catalog_list: Arc<dyn CatalogProviderList + Send> =
+            (&ffi_catalog_list).into();
+
+        let prior_catalog_names = foreign_catalog_list.catalog_names();
+        assert_eq!(prior_catalog_names.len(), 1);
+        assert_eq!(prior_catalog_names[0], "prior_catalog");
+
+        // Replace an existing catalog with one of the same name
+        let returned_catalog = foreign_catalog_list.register_catalog(
+            "prior_catalog".to_owned(),
+            Arc::new(MemoryCatalogProvider::new()),
+        );
+        assert!(returned_catalog.is_some());
+        assert_eq!(foreign_catalog_list.catalog_names().len(), 1);
+
+        // Add a new catalog
+        let returned_catalog = foreign_catalog_list.register_catalog(
+            "second_catalog".to_owned(),
+            Arc::new(MemoryCatalogProvider::new()),
+        );
+        assert!(returned_catalog.is_none());
+        assert_eq!(foreign_catalog_list.catalog_names().len(), 2);
+
+        // Retrieve non-existent catalog
+        let returned_catalog = foreign_catalog_list.catalog("non_existent_catalog");
+        assert!(returned_catalog.is_none());
+
+        // Retrieve valid catalog
+        let returned_catalog = foreign_catalog_list.catalog("second_catalog");
+        assert!(returned_catalog.is_some());
+    }
+
+    #[test]
+    fn test_ffi_catalog_provider_list_local_bypass() {
+        let catalog_list = Arc::new(MemoryCatalogProviderList::new());
+
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+        let mut ffi_catalog_list =
+            FFI_CatalogProviderList::new(catalog_list, None, task_ctx_provider, None);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_catalog_list: Arc<dyn CatalogProviderList + Send> =
+            (&ffi_catalog_list).into();
+        assert!(
+            foreign_catalog_list
+                .as_any()
+                .downcast_ref::<MemoryCatalogProviderList>()
+                .is_some()
+        );
+
+        // Verify different library markers generate foreign providers
+        ffi_catalog_list.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_catalog_list: Arc<dyn CatalogProviderList + Send> =
+            (&ffi_catalog_list).into();
+        assert!(
+            foreign_catalog_list
+                .as_any()
+                .downcast_ref::<ForeignCatalogProviderList>()
+                .is_some()
+        );
+    }
+}
diff --git a/datafusion/ffi/src/execution/mod.rs b/datafusion/ffi/src/execution/mod.rs
new file mode 100644
index 0000000000000..41107947fff01
--- /dev/null
+++ b/datafusion/ffi/src/execution/mod.rs
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod task_ctx;
+pub mod task_ctx_provider;
+
+pub use task_ctx::FFI_TaskContext;
+pub use task_ctx_provider::FFI_TaskContextProvider;
diff --git a/datafusion/ffi/src/execution/task_ctx.rs b/datafusion/ffi/src/execution/task_ctx.rs
new file mode 100644
index 0000000000000..e0598db0a0170
--- /dev/null
+++ b/datafusion/ffi/src/execution/task_ctx.rs
@@ -0,0 +1,287 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::pmr::ROption;
+use abi_stable::std_types::{RHashMap, RString};
+use datafusion_execution::TaskContext;
+use datafusion_execution::config::SessionConfig;
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_expr::{
+    AggregateUDF, AggregateUDFImpl, ScalarUDF, ScalarUDFImpl, WindowUDF, WindowUDFImpl,
+};
+
+use crate::session::config::FFI_SessionConfig;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udf::FFI_ScalarUDF;
+use crate::udwf::FFI_WindowUDF;
+
+/// A stable struct for sharing [`TaskContext`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_TaskContext {
+    /// Return the session ID.
+    pub session_id: unsafe extern "C" fn(&Self) -> RString,
+
+    /// Return the task ID.
+    pub task_id: unsafe extern "C" fn(&Self) -> ROption<RString>,
+
+    /// Return the session configuration.
+    pub session_config: unsafe extern "C" fn(&Self) -> FFI_SessionConfig,
+
+    /// Returns a hashmap of names to scalar functions.
+    pub scalar_functions: unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_ScalarUDF>,
+
+    /// Returns a hashmap of names to aggregate functions.
+    pub aggregate_functions:
+        unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_AggregateUDF>,
+
+    /// Returns a hashmap of names to window functions.
+    pub window_functions: unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_WindowUDF>,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// The foreign library should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+struct TaskContextPrivateData {
+    ctx: Arc<TaskContext>,
+}
+
+impl FFI_TaskContext {
+    unsafe fn inner(&self) -> &Arc<TaskContext> {
+        unsafe {
+            let private_data = self.private_data as *const TaskContextPrivateData;
+            &(*private_data).ctx
+        }
+    }
+}
+
+unsafe extern "C" fn session_id_fn_wrapper(ctx: &FFI_TaskContext) -> RString {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.session_id().into()
+    }
+}
+
+unsafe extern "C" fn task_id_fn_wrapper(ctx: &FFI_TaskContext) -> ROption<RString> {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.task_id().map(|s| s.as_str().into()).into()
+    }
+}
+
+unsafe extern "C" fn session_config_fn_wrapper(
+    ctx: &FFI_TaskContext,
+) -> FFI_SessionConfig {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.session_config().into()
+    }
+}
+
+unsafe extern "C" fn scalar_functions_fn_wrapper(
+    ctx: &FFI_TaskContext,
+) -> RHashMap<RString, FFI_ScalarUDF> {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.scalar_functions()
+            .iter()
+            .map(|(name, udf)| (name.to_owned().into(), Arc::clone(udf).into()))
+            .collect()
+    }
+}
+
+unsafe extern "C" fn aggregate_functions_fn_wrapper(
+    ctx: &FFI_TaskContext,
+) -> RHashMap<RString, FFI_AggregateUDF> {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.aggregate_functions()
+            .iter()
+            .map(|(name, udaf)| {
+                (
+                    name.to_owned().into(),
+                    FFI_AggregateUDF::from(Arc::clone(udaf)),
+                )
+            })
+            .collect()
+    }
+}
+
+unsafe extern "C" fn window_functions_fn_wrapper(
+    ctx: &FFI_TaskContext,
+) -> RHashMap<RString, FFI_WindowUDF> {
+    unsafe {
+        let ctx = ctx.inner();
+        ctx.window_functions()
+            .iter()
+            .map(|(name, udf)| {
+                (name.to_owned().into(), FFI_WindowUDF::from(Arc::clone(udf)))
+            })
+            .collect()
+    }
+}
+
+unsafe extern "C" fn release_fn_wrapper(ctx: &mut FFI_TaskContext) {
+    unsafe {
+        let private_data = Box::from_raw(ctx.private_data as *mut TaskContextPrivateData);
+        drop(private_data);
+    }
+}
+
+impl Drop for FFI_TaskContext {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl From<Arc<TaskContext>> for FFI_TaskContext {
+    fn from(ctx: Arc<TaskContext>) -> Self {
+        let private_data = Box::new(TaskContextPrivateData { ctx });
+
+        FFI_TaskContext {
+            session_id: session_id_fn_wrapper,
+            task_id: task_id_fn_wrapper,
+            session_config: session_config_fn_wrapper,
+            scalar_functions: scalar_functions_fn_wrapper,
+            aggregate_functions: aggregate_functions_fn_wrapper,
+            window_functions: window_functions_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+impl From<FFI_TaskContext> for Arc<TaskContext> {
+    fn from(ffi_ctx: FFI_TaskContext) -> Self {
+        unsafe {
+            if (ffi_ctx.library_marker_id)() == crate::get_library_marker_id() {
+                return Arc::clone(ffi_ctx.inner());
+            }
+
+            let task_id = (ffi_ctx.task_id)(&ffi_ctx).map(|s| s.to_string()).into();
+            let session_id = (ffi_ctx.session_id)(&ffi_ctx).into();
+            let session_config = (ffi_ctx.session_config)(&ffi_ctx);
+            let session_config =
+                SessionConfig::try_from(&session_config).unwrap_or_default();
+
+            let scalar_functions = (ffi_ctx.scalar_functions)(&ffi_ctx)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udf = <Arc<dyn ScalarUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(ScalarUDF::new_from_shared_impl(udf)),
+                    )
+                })
+                .collect();
+            let aggregate_functions = (ffi_ctx.aggregate_functions)(&ffi_ctx)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udaf = <Arc<dyn AggregateUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(AggregateUDF::new_from_shared_impl(udaf)),
+                    )
+                })
+                .collect();
+            let window_functions = (ffi_ctx.window_functions)(&ffi_ctx)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udwf = <Arc<dyn WindowUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(WindowUDF::new_from_shared_impl(udwf)),
+                    )
+                })
+                .collect();
+
+            let runtime = Arc::new(RuntimeEnv::default());
+
+            Arc::new(TaskContext::new(
+                task_id,
+                session_id,
+                session_config,
+                scalar_functions,
+                aggregate_functions,
+                window_functions,
+                runtime,
+            ))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion::prelude::SessionContext;
+    use datafusion_common::Result;
+    use datafusion_execution::TaskContext;
+
+    use crate::execution::FFI_TaskContext;
+
+    #[test]
+    fn ffi_task_ctx_round_trip() -> Result<()> {
+        let session_ctx = SessionContext::new();
+        let original = session_ctx.task_ctx();
+        let mut ffi_task_ctx = FFI_TaskContext::from(Arc::clone(&original));
+        ffi_task_ctx.library_marker_id = crate::mock_foreign_marker_id;
+
+        let foreign_task_ctx: Arc<TaskContext> = ffi_task_ctx.into();
+
+        // TaskContext doesn't implement Eq (nor should it) so check some of the
+        // data is round tripping correctly.
+
+        assert_eq!(
+            original.scalar_functions(),
+            foreign_task_ctx.scalar_functions()
+        );
+        assert_eq!(
+            original.aggregate_functions(),
+            foreign_task_ctx.aggregate_functions()
+        );
+        assert_eq!(
+            original.window_functions(),
+            foreign_task_ctx.window_functions()
+        );
+        assert_eq!(original.task_id(), foreign_task_ctx.task_id());
+        assert_eq!(original.session_id(), foreign_task_ctx.session_id());
+        assert_eq!(
+            format!("{:?}", original.session_config()),
+            format!("{:?}", foreign_task_ctx.session_config())
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/execution/task_ctx_provider.rs b/datafusion/ffi/src/execution/task_ctx_provider.rs
new file mode 100644
index 0000000000000..5d4eaac83975a
--- /dev/null
+++ b/datafusion/ffi/src/execution/task_ctx_provider.rs
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ffi::c_void;
+use std::sync::{Arc, Weak};
+
+use abi_stable::StableAbi;
+use datafusion_common::{DataFusionError, ffi_datafusion_err};
+use datafusion_execution::{TaskContext, TaskContextProvider};
+
+use crate::execution::task_ctx::FFI_TaskContext;
+use crate::util::FFIResult;
+use crate::{df_result, rresult};
+
+/// Struct for accessing the [`TaskContext`]. This method contains a weak
+/// reference, so there are no guarantees that the [`TaskContext`] remains
+/// valid. This is used primarily for protobuf encoding and decoding of
+/// data passed across the FFI boundary. See the crate README for
+/// additional information.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_TaskContextProvider {
+    /// Retrieve the current [`TaskContext`] provided the provider has not
+    /// gone out of scope. This function will return an error if the weakly
+    /// held reference to the underlying [`TaskContextProvider`] is no longer
+    /// available.
+    pub task_ctx: unsafe extern "C" fn(&Self) -> FFIResult<FFI_TaskContext>,
+
+    /// Used to create a clone on the task context accessor. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// The foreign library should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_TaskContextProvider {}
+unsafe impl Sync for FFI_TaskContextProvider {}
+
+struct TaskContextProviderPrivateData {
+    ctx: Weak<dyn TaskContextProvider>,
+}
+
+impl FFI_TaskContextProvider {
+    unsafe fn inner(&self) -> Option<Arc<TaskContext>> {
+        unsafe {
+            let private_data = self.private_data as *const TaskContextProviderPrivateData;
+            (*private_data).ctx.upgrade().map(|ctx| ctx.task_ctx())
+        }
+    }
+}
+
+unsafe extern "C" fn task_ctx_fn_wrapper(
+    ctx_provider: &FFI_TaskContextProvider,
+) -> FFIResult<FFI_TaskContext> {
+    unsafe {
+        rresult!(
+            ctx_provider
+                .inner()
+                .map(FFI_TaskContext::from)
+                .ok_or_else(|| {
+                    ffi_datafusion_err!(
+                        "TaskContextProvider went out of scope over FFI boundary."
+                    )
+                })
+        )
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(
+    provider: &FFI_TaskContextProvider,
+) -> FFI_TaskContextProvider {
+    unsafe {
+        let private_data = provider.private_data as *const TaskContextProviderPrivateData;
+        let ctx = Weak::clone(&(*private_data).ctx);
+
+        let private_data = Box::new(TaskContextProviderPrivateData { ctx });
+
+        FFI_TaskContextProvider {
+            task_ctx: task_ctx_fn_wrapper,
+            release: release_fn_wrapper,
+            clone: clone_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+unsafe extern "C" fn release_fn_wrapper(ctx: &mut FFI_TaskContextProvider) {
+    unsafe {
+        let private_data =
+            Box::from_raw(ctx.private_data as *mut TaskContextProviderPrivateData);
+        drop(private_data);
+    }
+}
+impl Drop for FFI_TaskContextProvider {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl Clone for FFI_TaskContextProvider {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl From<&Arc<dyn TaskContextProvider>> for FFI_TaskContextProvider {
+    fn from(ctx: &Arc<dyn TaskContextProvider>) -> Self {
+        let ctx = Arc::downgrade(ctx);
+        let private_data = Box::new(TaskContextProviderPrivateData { ctx });
+
+        FFI_TaskContextProvider {
+            task_ctx: task_ctx_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+impl TryFrom<&FFI_TaskContextProvider> for Arc<TaskContext> {
+    type Error = DataFusionError;
+    fn try_from(ffi_ctx: &FFI_TaskContextProvider) -> Result<Self, Self::Error> {
+        unsafe {
+            if (ffi_ctx.library_marker_id)() == crate::get_library_marker_id() {
+                return ffi_ctx.inner().ok_or_else(|| {
+                    ffi_datafusion_err!(
+                        "TaskContextProvider went out of scope over FFI boundary."
+                    )
+                });
+            }
+
+            df_result!((ffi_ctx.task_ctx)(ffi_ctx)).map(Into::into)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion_common::{DataFusionError, Result};
+    use datafusion_execution::{TaskContext, TaskContextProvider};
+
+    use crate::execution::FFI_TaskContextProvider;
+
+    #[derive(Default)]
+    struct TestCtxProvider {
+        ctx: Arc<TaskContext>,
+    }
+
+    impl TaskContextProvider for TestCtxProvider {
+        fn task_ctx(&self) -> Arc<TaskContext> {
+            Arc::clone(&self.ctx)
+        }
+    }
+
+    #[test]
+    fn ffi_task_context_provider_round_trip() -> Result<()> {
+        let ctx = Arc::new(TestCtxProvider::default()) as Arc<dyn TaskContextProvider>;
+        let mut ffi_ctx_provider: FFI_TaskContextProvider = (&Arc::clone(&ctx)).into();
+        ffi_ctx_provider.library_marker_id = crate::mock_foreign_marker_id;
+
+        let foreign_task_ctx: Arc<TaskContext> = (&ffi_ctx_provider).try_into()?;
+
+        assert_eq!(
+            format!("{foreign_task_ctx:?}"),
+            format!("{:?}", ctx.task_ctx())
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_task_context_provider_clone() -> Result<()> {
+        let ctx = Arc::new(TestCtxProvider::default()) as Arc<dyn TaskContextProvider>;
+        let first_provider: FFI_TaskContextProvider = (&ctx).into();
+
+        let second_provider = first_provider.clone();
+
+        let first_ctx: Arc<TaskContext> = (&first_provider).try_into()?;
+        let second_ctx: Arc<TaskContext> = (&second_provider).try_into()?;
+
+        assert!(Arc::ptr_eq(&first_ctx, &second_ctx));
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_task_context_provider_out_of_scope() {
+        fn create_ffi_out_of_scope() -> FFI_TaskContextProvider {
+            let ctx =
+                Arc::new(TestCtxProvider::default()) as Arc<dyn TaskContextProvider>;
+            (&ctx).into()
+        }
+
+        let provider = create_ffi_out_of_scope();
+        let failed_ctx = <Arc<TaskContext>>::try_from(&provider);
+
+        let Err(DataFusionError::Ffi(_)) = failed_ctx else {
+            panic!("Expected out of scope error")
+        };
+    }
+}
diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs
index 70c957d8c3733..c879b022067c3 100644
--- a/datafusion/ffi/src/execution_plan.rs
+++ b/datafusion/ffi/src/execution_plan.rs
@@ -15,29 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, pin::Pin, sync::Arc};
-
-use abi_stable::{
-    std_types::{RResult, RString, RVec},
-    StableAbi,
-};
-use datafusion::{
-    error::DataFusionError,
-    execution::{SendableRecordBatchStream, TaskContext},
-    physical_plan::{DisplayAs, ExecutionPlan, PlanProperties},
+use std::ffi::c_void;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RString, RVec};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
 };
-use datafusion::{error::Result, physical_plan::DisplayFormatType};
 use tokio::runtime::Handle;
 
-use crate::{
-    df_result, plan_properties::FFI_PlanProperties,
-    record_batch_stream::FFI_RecordBatchStream, rresult,
-};
+use crate::execution::FFI_TaskContext;
+use crate::plan_properties::FFI_PlanProperties;
+use crate::record_batch_stream::FFI_RecordBatchStream;
+use crate::util::FFIResult;
+use crate::{df_result, rresult};
 
 /// A stable struct for sharing a [`ExecutionPlan`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_ExecutionPlan {
     /// Return the plan properties
     pub properties: unsafe extern "C" fn(plan: &Self) -> FFI_PlanProperties,
@@ -53,7 +52,8 @@ pub struct FFI_ExecutionPlan {
     pub execute: unsafe extern "C" fn(
         plan: &Self,
         partition: usize,
-    ) -> RResult<FFI_RecordBatchStream, RString>,
+        context: FFI_TaskContext,
+    ) -> FFIResult<FFI_RecordBatchStream>,
 
     /// Used to create a clone on the provider of the execution plan. This should
     /// only need to be called by the receiver of the plan.
@@ -65,6 +65,11 @@ pub struct FFI_ExecutionPlan {
     /// Internal data. This is only to be accessed by the provider of the plan.
     /// A [`ForeignExecutionPlan`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_ExecutionPlan {}
@@ -72,73 +77,79 @@ unsafe impl Sync for FFI_ExecutionPlan {}
 
 pub struct ExecutionPlanPrivateData {
     pub plan: Arc<dyn ExecutionPlan>,
-    pub context: Arc<TaskContext>,
     pub runtime: Option<Handle>,
 }
 
+impl FFI_ExecutionPlan {
+    fn inner(&self) -> &Arc<dyn ExecutionPlan> {
+        let private_data = self.private_data as *const ExecutionPlanPrivateData;
+        unsafe { &(*private_data).plan }
+    }
+}
+
 unsafe extern "C" fn properties_fn_wrapper(
     plan: &FFI_ExecutionPlan,
 ) -> FFI_PlanProperties {
-    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
-    let plan = &(*private_data).plan;
-
-    plan.properties().into()
+    plan.inner().properties().into()
 }
 
 unsafe extern "C" fn children_fn_wrapper(
     plan: &FFI_ExecutionPlan,
 ) -> RVec<FFI_ExecutionPlan> {
-    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
-    let plan = &(*private_data).plan;
-    let ctx = &(*private_data).context;
-    let runtime = &(*private_data).runtime;
-
-    let children: Vec<_> = plan
-        .children()
-        .into_iter()
-        .map(|child| {
-            FFI_ExecutionPlan::new(Arc::clone(child), Arc::clone(ctx), runtime.clone())
-        })
-        .collect();
-
-    children.into()
+    unsafe {
+        let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+        let plan = &(*private_data).plan;
+        let runtime = &(*private_data).runtime;
+
+        let children: Vec<_> = plan
+            .children()
+            .into_iter()
+            .map(|child| FFI_ExecutionPlan::new(Arc::clone(child), runtime.clone()))
+            .collect();
+
+        children.into()
+    }
 }
 
 unsafe extern "C" fn execute_fn_wrapper(
     plan: &FFI_ExecutionPlan,
     partition: usize,
-) -> RResult<FFI_RecordBatchStream, RString> {
-    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
-    let plan = &(*private_data).plan;
-    let ctx = &(*private_data).context;
-    let runtime = (*private_data).runtime.clone();
-
-    rresult!(plan
-        .execute(partition, Arc::clone(ctx))
-        .map(|rbs| FFI_RecordBatchStream::new(rbs, runtime)))
+    context: FFI_TaskContext,
+) -> FFIResult<FFI_RecordBatchStream> {
+    unsafe {
+        let ctx = context.into();
+        let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+        let plan = &(*private_data).plan;
+        let runtime = (*private_data).runtime.clone();
+
+        rresult!(
+            plan.execute(partition, ctx)
+                .map(|rbs| FFI_RecordBatchStream::new(rbs, runtime))
+        )
+    }
 }
 
 unsafe extern "C" fn name_fn_wrapper(plan: &FFI_ExecutionPlan) -> RString {
-    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
-    let plan = &(*private_data).plan;
-
-    plan.name().into()
+    plan.inner().name().into()
 }
 
 unsafe extern "C" fn release_fn_wrapper(plan: &mut FFI_ExecutionPlan) {
-    let private_data = Box::from_raw(plan.private_data as *mut ExecutionPlanPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!plan.private_data.is_null());
+        let private_data =
+            Box::from_raw(plan.private_data as *mut ExecutionPlanPrivateData);
+        drop(private_data);
+        plan.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(plan: &FFI_ExecutionPlan) -> FFI_ExecutionPlan {
-    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
-    let plan_data = &(*private_data);
-
-    FFI_ExecutionPlan::new(
-        Arc::clone(&plan_data.plan),
-        Arc::clone(&plan_data.context),
-        plan_data.runtime.clone(),
-    )
+    unsafe {
+        let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+        let plan_data = &(*private_data);
+
+        FFI_ExecutionPlan::new(Arc::clone(&plan_data.plan), plan_data.runtime.clone())
+    }
 }
 
 impl Clone for FFI_ExecutionPlan {
@@ -149,16 +160,8 @@ impl Clone for FFI_ExecutionPlan {
 
 impl FFI_ExecutionPlan {
     /// This function is called on the provider's side.
-    pub fn new(
-        plan: Arc<dyn ExecutionPlan>,
-        context: Arc<TaskContext>,
-        runtime: Option<Handle>,
-    ) -> Self {
-        let private_data = Box::new(ExecutionPlanPrivateData {
-            plan,
-            context,
-            runtime,
-        });
+    pub fn new(plan: Arc<dyn ExecutionPlan>, runtime: Option<Handle>) -> Self {
+        let private_data = Box::new(ExecutionPlanPrivateData { plan, runtime });
 
         Self {
             properties: properties_fn_wrapper,
@@ -168,6 +171,7 @@ impl FFI_ExecutionPlan {
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -218,10 +222,14 @@ impl DisplayAs for ForeignExecutionPlan {
     }
 }
 
-impl TryFrom<&FFI_ExecutionPlan> for ForeignExecutionPlan {
+impl TryFrom<&FFI_ExecutionPlan> for Arc<dyn ExecutionPlan> {
     type Error = DataFusionError;
 
     fn try_from(plan: &FFI_ExecutionPlan) -> Result<Self, Self::Error> {
+        if (plan.library_marker_id)() == crate::get_library_marker_id() {
+            return Ok(Arc::clone(plan.inner()));
+        }
+
         unsafe {
             let name = (plan.name)(plan).into();
 
@@ -230,16 +238,17 @@ impl TryFrom<&FFI_ExecutionPlan> for ForeignExecutionPlan {
             let children_rvec = (plan.children)(plan);
             let children = children_rvec
                 .iter()
-                .map(ForeignExecutionPlan::try_from)
-                .map(|child| child.map(|c| Arc::new(c) as Arc<dyn ExecutionPlan>))
+                .map(<Arc<dyn ExecutionPlan>>::try_from)
                 .collect::<Result<Vec<_>>>()?;
 
-            Ok(Self {
+            let plan = ForeignExecutionPlan {
                 name,
                 plan: plan.clone(),
                 properties,
                 children,
-            })
+            };
+
+            Ok(Arc::new(plan))
         }
     }
 }
@@ -258,10 +267,7 @@ impl ExecutionPlan for ForeignExecutionPlan {
     }
 
     fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        self.children
-            .iter()
-            .map(|p| p as &Arc<dyn ExecutionPlan>)
-            .collect()
+        self.children.iter().collect()
     }
 
     fn with_new_children(
@@ -279,25 +285,21 @@ impl ExecutionPlan for ForeignExecutionPlan {
     fn execute(
         &self,
         partition: usize,
-        _context: Arc<TaskContext>,
+        context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
+        let context = FFI_TaskContext::from(context);
         unsafe {
-            df_result!((self.plan.execute)(&self.plan, partition))
+            df_result!((self.plan.execute)(&self.plan, partition, context))
                 .map(|stream| Pin::new(Box::new(stream)) as SendableRecordBatchStream)
         }
     }
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion::{
-        physical_plan::{
-            execution_plan::{Boundedness, EmissionType},
-            Partitioning,
-        },
-        prelude::SessionContext,
-    };
+    use datafusion::physical_plan::Partitioning;
+    use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
 
     use super::*;
 
@@ -375,19 +377,19 @@ mod tests {
     fn test_round_trip_ffi_execution_plan() -> Result<()> {
         let schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
-        let ctx = SessionContext::new();
 
         let original_plan = Arc::new(EmptyExec::new(schema));
         let original_name = original_plan.name().to_string();
 
-        let local_plan = FFI_ExecutionPlan::new(original_plan, ctx.task_ctx(), None);
+        let mut local_plan = FFI_ExecutionPlan::new(original_plan, None);
+        local_plan.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_plan: ForeignExecutionPlan = (&local_plan).try_into()?;
+        let foreign_plan: Arc<dyn ExecutionPlan> = (&local_plan).try_into()?;
 
-        assert!(original_name == foreign_plan.name());
+        assert_eq!(original_name, foreign_plan.name());
 
         let display = datafusion::physical_plan::display::DisplayableExecutionPlan::new(
-            &foreign_plan,
+            foreign_plan.as_ref(),
         );
 
         let buf = display.one_line().to_string();
@@ -403,16 +405,17 @@ mod tests {
     fn test_ffi_execution_plan_children() -> Result<()> {
         let schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
-        let ctx = SessionContext::new();
 
         // Version 1: Adding child to the foreign plan
         let child_plan = Arc::new(EmptyExec::new(Arc::clone(&schema)));
-        let child_local = FFI_ExecutionPlan::new(child_plan, ctx.task_ctx(), None);
-        let child_foreign = Arc::new(ForeignExecutionPlan::try_from(&child_local)?);
+        let mut child_local = FFI_ExecutionPlan::new(child_plan, None);
+        child_local.library_marker_id = crate::mock_foreign_marker_id;
+        let child_foreign = <Arc<dyn ExecutionPlan>>::try_from(&child_local)?;
 
         let parent_plan = Arc::new(EmptyExec::new(Arc::clone(&schema)));
-        let parent_local = FFI_ExecutionPlan::new(parent_plan, ctx.task_ctx(), None);
-        let parent_foreign = Arc::new(ForeignExecutionPlan::try_from(&parent_local)?);
+        let mut parent_local = FFI_ExecutionPlan::new(parent_plan, None);
+        parent_local.library_marker_id = crate::mock_foreign_marker_id;
+        let parent_foreign = <Arc<dyn ExecutionPlan>>::try_from(&parent_local)?;
 
         assert_eq!(parent_foreign.children().len(), 0);
         assert_eq!(child_foreign.children().len(), 0);
@@ -422,16 +425,42 @@ mod tests {
 
         // Version 2: Adding child to the local plan
         let child_plan = Arc::new(EmptyExec::new(Arc::clone(&schema)));
-        let child_local = FFI_ExecutionPlan::new(child_plan, ctx.task_ctx(), None);
-        let child_foreign = Arc::new(ForeignExecutionPlan::try_from(&child_local)?);
+        let mut child_local = FFI_ExecutionPlan::new(child_plan, None);
+        child_local.library_marker_id = crate::mock_foreign_marker_id;
+        let child_foreign = <Arc<dyn ExecutionPlan>>::try_from(&child_local)?;
 
         let parent_plan = Arc::new(EmptyExec::new(Arc::clone(&schema)));
         let parent_plan = parent_plan.with_new_children(vec![child_foreign])?;
-        let parent_local = FFI_ExecutionPlan::new(parent_plan, ctx.task_ctx(), None);
-        let parent_foreign = Arc::new(ForeignExecutionPlan::try_from(&parent_local)?);
+        let mut parent_local = FFI_ExecutionPlan::new(parent_plan, None);
+        parent_local.library_marker_id = crate::mock_foreign_marker_id;
+        let parent_foreign = <Arc<dyn ExecutionPlan>>::try_from(&parent_local)?;
 
         assert_eq!(parent_foreign.children().len(), 1);
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_execution_plan_local_bypass() {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+
+        let plan = Arc::new(EmptyExec::new(schema));
+
+        let mut ffi_plan = FFI_ExecutionPlan::new(plan, None);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_plan: Arc<dyn ExecutionPlan> = (&ffi_plan).try_into().unwrap();
+        assert!(foreign_plan.as_any().downcast_ref::<EmptyExec>().is_some());
+
+        // Verify different library markers generate foreign providers
+        ffi_plan.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_plan: Arc<dyn ExecutionPlan> = (&ffi_plan).try_into().unwrap();
+        assert!(
+            foreign_plan
+                .as_any()
+                .downcast_ref::<ForeignExecutionPlan>()
+                .is_some()
+        );
+    }
 }
diff --git a/datafusion/ffi/src/expr/columnar_value.rs b/datafusion/ffi/src/expr/columnar_value.rs
new file mode 100644
index 0000000000000..7ad7645ecb6cf
--- /dev/null
+++ b/datafusion/ffi/src/expr/columnar_value.rs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use datafusion_common::{DataFusionError, ScalarValue};
+use datafusion_expr::ColumnarValue;
+
+use crate::arrow_wrappers::WrappedArray;
+
+/// A stable struct for sharing [`ColumnarValue`] across FFI boundaries.
+/// Scalar values are passed as an Arrow array of length 1.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub enum FFI_ColumnarValue {
+    Array(WrappedArray),
+    Scalar(WrappedArray),
+}
+
+impl TryFrom<ColumnarValue> for FFI_ColumnarValue {
+    type Error = DataFusionError;
+    fn try_from(value: ColumnarValue) -> Result<Self, Self::Error> {
+        Ok(match value {
+            ColumnarValue::Array(v) => {
+                FFI_ColumnarValue::Array(WrappedArray::try_from(&v)?)
+            }
+            ColumnarValue::Scalar(v) => {
+                FFI_ColumnarValue::Scalar(WrappedArray::try_from(&v)?)
+            }
+        })
+    }
+}
+
+impl TryFrom<FFI_ColumnarValue> for ColumnarValue {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_ColumnarValue) -> Result<Self, Self::Error> {
+        Ok(match value {
+            FFI_ColumnarValue::Array(v) => ColumnarValue::Array(v.try_into()?),
+            FFI_ColumnarValue::Scalar(v) => {
+                ColumnarValue::Scalar(ScalarValue::try_from(v)?)
+            }
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::create_array;
+    use datafusion_common::{DataFusionError, ScalarValue};
+    use datafusion_expr::ColumnarValue;
+
+    use crate::expr::columnar_value::FFI_ColumnarValue;
+
+    #[test]
+    fn ffi_columnar_value_round_trip() -> Result<(), DataFusionError> {
+        let array = create_array!(Int32, [1, 2, 3, 4, 5]);
+
+        for original in [
+            ColumnarValue::Array(array),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
+        ] {
+            let ffi_variant = FFI_ColumnarValue::try_from(original.clone())?;
+
+            let returned_value = ColumnarValue::try_from(ffi_variant)?;
+
+            assert_eq!(format!("{returned_value:?}"), format!("{original:?}"));
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/expr/distribution.rs b/datafusion/ffi/src/expr/distribution.rs
new file mode 100644
index 0000000000000..b9ebfc2362c7a
--- /dev/null
+++ b/datafusion/ffi/src/expr/distribution.rs
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use datafusion_common::DataFusionError;
+use datafusion_expr::statistics::{
+    BernoulliDistribution, Distribution, ExponentialDistribution, GaussianDistribution,
+    GenericDistribution, UniformDistribution,
+};
+
+use crate::arrow_wrappers::WrappedArray;
+use crate::expr::interval::FFI_Interval;
+
+/// A stable struct for sharing [`Distribution`] across FFI boundaries.
+/// See ['Distribution'] for the meaning of each variant.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[expect(clippy::large_enum_variant)]
+pub enum FFI_Distribution {
+    Uniform(FFI_UniformDistribution),
+    Exponential(FFI_ExponentialDistribution),
+    Gaussian(FFI_GaussianDistribution),
+    Bernoulli(FFI_BernoulliDistribution),
+    Generic(FFI_GenericDistribution),
+}
+
+impl TryFrom<&Distribution> for FFI_Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: &Distribution) -> Result<Self, Self::Error> {
+        match value {
+            Distribution::Uniform(d) => Ok(FFI_Distribution::Uniform(d.try_into()?)),
+            Distribution::Exponential(d) => {
+                Ok(FFI_Distribution::Exponential(d.try_into()?))
+            }
+            Distribution::Gaussian(d) => Ok(FFI_Distribution::Gaussian(d.try_into()?)),
+            Distribution::Bernoulli(d) => Ok(FFI_Distribution::Bernoulli(d.try_into()?)),
+            Distribution::Generic(d) => Ok(FFI_Distribution::Generic(d.try_into()?)),
+        }
+    }
+}
+
+impl TryFrom<FFI_Distribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_Distribution) -> Result<Self, Self::Error> {
+        match value {
+            FFI_Distribution::Uniform(d) => d.try_into(),
+            FFI_Distribution::Exponential(d) => d.try_into(),
+            FFI_Distribution::Gaussian(d) => d.try_into(),
+            FFI_Distribution::Bernoulli(d) => d.try_into(),
+            FFI_Distribution::Generic(d) => d.try_into(),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_UniformDistribution {
+    interval: FFI_Interval,
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_ExponentialDistribution {
+    rate: WrappedArray,
+    offset: WrappedArray,
+    positive_tail: bool,
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_GaussianDistribution {
+    mean: WrappedArray,
+    variance: WrappedArray,
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_BernoulliDistribution {
+    p: WrappedArray,
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_GenericDistribution {
+    mean: WrappedArray,
+    median: WrappedArray,
+    variance: WrappedArray,
+    range: FFI_Interval,
+}
+
+impl TryFrom<&UniformDistribution> for FFI_UniformDistribution {
+    type Error = DataFusionError;
+    fn try_from(value: &UniformDistribution) -> Result<Self, Self::Error> {
+        Ok(Self {
+            interval: value.range().try_into()?,
+        })
+    }
+}
+
+impl TryFrom<&ExponentialDistribution> for FFI_ExponentialDistribution {
+    type Error = DataFusionError;
+    fn try_from(value: &ExponentialDistribution) -> Result<Self, Self::Error> {
+        let rate = value.rate().try_into()?;
+        let offset = value.offset().try_into()?;
+
+        Ok(Self {
+            rate,
+            offset,
+            positive_tail: value.positive_tail(),
+        })
+    }
+}
+
+impl TryFrom<&GaussianDistribution> for FFI_GaussianDistribution {
+    type Error = DataFusionError;
+    fn try_from(value: &GaussianDistribution) -> Result<Self, Self::Error> {
+        let mean = value.mean().try_into()?;
+        let variance = value.variance().try_into()?;
+
+        Ok(Self { mean, variance })
+    }
+}
+
+impl TryFrom<&BernoulliDistribution> for FFI_BernoulliDistribution {
+    type Error = DataFusionError;
+    fn try_from(value: &BernoulliDistribution) -> Result<Self, Self::Error> {
+        let p = value.p_value().try_into()?;
+
+        Ok(Self { p })
+    }
+}
+
+impl TryFrom<&GenericDistribution> for FFI_GenericDistribution {
+    type Error = DataFusionError;
+    fn try_from(value: &GenericDistribution) -> Result<Self, Self::Error> {
+        let mean = value.mean().try_into()?;
+        let median = value.median().try_into()?;
+        let variance = value.variance().try_into()?;
+
+        Ok(Self {
+            mean,
+            median,
+            variance,
+            range: value.range().try_into()?,
+        })
+    }
+}
+
+impl TryFrom<FFI_UniformDistribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_UniformDistribution) -> Result<Self, Self::Error> {
+        let interval = value.interval.try_into()?;
+        Distribution::new_uniform(interval)
+    }
+}
+
+impl TryFrom<FFI_ExponentialDistribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_ExponentialDistribution) -> Result<Self, Self::Error> {
+        let rate = value.rate.try_into()?;
+        let offset = value.offset.try_into()?;
+
+        Distribution::new_exponential(rate, offset, value.positive_tail)
+    }
+}
+
+impl TryFrom<FFI_GaussianDistribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_GaussianDistribution) -> Result<Self, Self::Error> {
+        let mean = value.mean.try_into()?;
+        let variance = value.variance.try_into()?;
+
+        Distribution::new_gaussian(mean, variance)
+    }
+}
+
+impl TryFrom<FFI_BernoulliDistribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_BernoulliDistribution) -> Result<Self, Self::Error> {
+        let p = value.p.try_into()?;
+
+        Distribution::new_bernoulli(p)
+    }
+}
+
+impl TryFrom<FFI_GenericDistribution> for Distribution {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_GenericDistribution) -> Result<Self, Self::Error> {
+        let mean = value.mean.try_into()?;
+        let median = value.median.try_into()?;
+        let variance = value.variance.try_into()?;
+        let range = value.range.try_into()?;
+
+        Distribution::new_generic(mean, median, variance, range)
+    }
+}
diff --git a/datafusion/ffi/src/expr/expr_properties.rs b/datafusion/ffi/src/expr/expr_properties.rs
new file mode 100644
index 0000000000000..199a399a6471f
--- /dev/null
+++ b/datafusion/ffi/src/expr/expr_properties.rs
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use arrow_schema::SortOptions;
+use datafusion_common::DataFusionError;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+
+use crate::expr::interval::FFI_Interval;
+
+/// A stable struct for sharing [`ExprProperties`] across FFI boundaries.
+/// See [`ExprProperties`] for the meaning of each field.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_ExprProperties {
+    sort_properties: FFI_SortProperties,
+    range: FFI_Interval,
+    preserves_lex_ordering: bool,
+}
+
+impl TryFrom<&ExprProperties> for FFI_ExprProperties {
+    type Error = DataFusionError;
+    fn try_from(value: &ExprProperties) -> Result<Self, Self::Error> {
+        let sort_properties = (&value.sort_properties).into();
+        let range = value.range.clone().try_into()?;
+
+        Ok(FFI_ExprProperties {
+            sort_properties,
+            range,
+            preserves_lex_ordering: value.preserves_lex_ordering,
+        })
+    }
+}
+
+impl TryFrom<FFI_ExprProperties> for ExprProperties {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_ExprProperties) -> Result<Self, Self::Error> {
+        let sort_properties = (&value.sort_properties).into();
+        let range = value.range.try_into()?;
+        Ok(ExprProperties {
+            sort_properties,
+            range,
+            preserves_lex_ordering: value.preserves_lex_ordering,
+        })
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub enum FFI_SortProperties {
+    Ordered(FFI_SortOptions),
+    Unordered,
+    Singleton,
+}
+
+impl From<&SortProperties> for FFI_SortProperties {
+    fn from(value: &SortProperties) -> Self {
+        match value {
+            SortProperties::Unordered => FFI_SortProperties::Unordered,
+            SortProperties::Singleton => FFI_SortProperties::Singleton,
+            SortProperties::Ordered(o) => FFI_SortProperties::Ordered(o.into()),
+        }
+    }
+}
+
+impl From<&FFI_SortProperties> for SortProperties {
+    fn from(value: &FFI_SortProperties) -> Self {
+        match value {
+            FFI_SortProperties::Unordered => SortProperties::Unordered,
+            FFI_SortProperties::Singleton => SortProperties::Singleton,
+            FFI_SortProperties::Ordered(o) => SortProperties::Ordered(o.into()),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_SortOptions {
+    pub descending: bool,
+    pub nulls_first: bool,
+}
+
+impl From<&SortOptions> for FFI_SortOptions {
+    fn from(value: &SortOptions) -> Self {
+        Self {
+            descending: value.descending,
+            nulls_first: value.nulls_first,
+        }
+    }
+}
+
+impl From<&FFI_SortOptions> for SortOptions {
+    fn from(value: &FFI_SortOptions) -> Self {
+        Self {
+            descending: value.descending,
+            nulls_first: value.nulls_first,
+        }
+    }
+}
diff --git a/datafusion/ffi/src/expr/interval.rs b/datafusion/ffi/src/expr/interval.rs
new file mode 100644
index 0000000000000..450f3747a57f0
--- /dev/null
+++ b/datafusion/ffi/src/expr/interval.rs
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use datafusion_common::DataFusionError;
+use datafusion_expr::interval_arithmetic::Interval;
+
+use crate::arrow_wrappers::WrappedArray;
+
+/// A stable struct for sharing [`Interval`] across FFI boundaries.
+/// See [`Interval`] for the meaning of each field. Scalar values
+/// are passed as Arrow arrays of length 1.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_Interval {
+    lower: WrappedArray,
+    upper: WrappedArray,
+}
+
+impl TryFrom<&Interval> for FFI_Interval {
+    type Error = DataFusionError;
+    fn try_from(value: &Interval) -> Result<Self, Self::Error> {
+        let upper = value.upper().try_into()?;
+        let lower = value.lower().try_into()?;
+
+        Ok(FFI_Interval { upper, lower })
+    }
+}
+impl TryFrom<Interval> for FFI_Interval {
+    type Error = DataFusionError;
+    fn try_from(value: Interval) -> Result<Self, Self::Error> {
+        FFI_Interval::try_from(&value)
+    }
+}
+
+impl TryFrom<FFI_Interval> for Interval {
+    type Error = DataFusionError;
+    fn try_from(value: FFI_Interval) -> Result<Self, Self::Error> {
+        let upper = value.upper.try_into()?;
+        let lower = value.lower.try_into()?;
+
+        Interval::try_new(lower, upper)
+    }
+}
diff --git a/datafusion/ffi/src/expr/mod.rs b/datafusion/ffi/src/expr/mod.rs
new file mode 100644
index 0000000000000..e11d52a2a1e57
--- /dev/null
+++ b/datafusion/ffi/src/expr/mod.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod columnar_value;
+pub mod distribution;
+pub mod expr_properties;
+pub mod interval;
diff --git a/datafusion/ffi/src/insert_op.rs b/datafusion/ffi/src/insert_op.rs
index 8e8693076cc0e..6471039105e80 100644
--- a/datafusion/ffi/src/insert_op.rs
+++ b/datafusion/ffi/src/insert_op.rs
@@ -16,12 +16,11 @@
 // under the License.
 
 use abi_stable::StableAbi;
-use datafusion::logical_expr::logical_plan::dml::InsertOp;
+use datafusion_expr::logical_plan::dml::InsertOp;
 
 /// FFI safe version of [`InsertOp`].
 #[repr(C)]
 #[derive(StableAbi)]
-#[allow(non_camel_case_types)]
 pub enum FFI_InsertOp {
     Append,
     Overwrite,
diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs
index 0c2340e8ce7b1..bf0cf9b122c1c 100644
--- a/datafusion/ffi/src/lib.rs
+++ b/datafusion/ffi/src/lib.rs
@@ -23,15 +23,22 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
 
 pub mod arrow_wrappers;
 pub mod catalog_provider;
+pub mod catalog_provider_list;
+pub mod execution;
 pub mod execution_plan;
+pub mod expr;
 pub mod insert_op;
+pub mod physical_expr;
 pub mod plan_properties;
+pub mod proto;
 pub mod record_batch_stream;
 pub mod schema_provider;
-pub mod session_config;
+pub mod session;
 pub mod table_provider;
 pub mod table_source;
 pub mod udaf;
@@ -54,5 +61,34 @@ pub extern "C" fn version() -> u64 {
     version.major
 }
 
+static LIBRARY_MARKER: u8 = 0;
+
+/// This utility is used to determine if two FFI structs are within
+/// the same library. It is possible that the interplay between
+/// foreign and local functions calls create one FFI struct that
+/// references another. It is helpful to determine if a foreign
+/// struct in the same library or called from a different one.
+/// If we are in the same library, then we can access the underlying
+/// types directly.
+///
+/// This function works by checking the address of the library
+/// marker. Each library that implements the FFI code will have
+/// a different address for the marker. By checking the marker
+/// address we can determine if a struct is truly foreign or is
+/// actually within the same originating library.
+///
+/// See the crate's `README.md` for additional information.
+pub extern "C" fn get_library_marker_id() -> usize {
+    &LIBRARY_MARKER as *const u8 as usize
+}
+
+/// For unit testing in this crate we need to trick the providers
+/// into thinking we have a foreign call. We do this by overwriting
+/// their `library_marker_id` function to return a different value.
+#[cfg(test)]
+pub(crate) extern "C" fn mock_foreign_marker_id() -> usize {
+    get_library_marker_id() + 1
+}
+
 #[cfg(doctest)]
 doc_comment::doctest!("../README.md", readme_example_test);
diff --git a/datafusion/ffi/src/physical_expr/mod.rs b/datafusion/ffi/src/physical_expr/mod.rs
new file mode 100644
index 0000000000000..d268dd613f987
--- /dev/null
+++ b/datafusion/ffi/src/physical_expr/mod.rs
@@ -0,0 +1,988 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub(crate) mod partitioning;
+pub(crate) mod sort;
+
+use std::any::Any;
+use std::ffi::c_void;
+use std::fmt::{Display, Formatter};
+use std::hash::{DefaultHasher, Hash, Hasher};
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RString, RVec};
+use arrow::array::{ArrayRef, BooleanArray, RecordBatch};
+use arrow::datatypes::SchemaRef;
+use arrow_schema::ffi::FFI_ArrowSchema;
+use arrow_schema::{DataType, Field, FieldRef, Schema};
+use datafusion_common::{Result, ffi_datafusion_err};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::interval_arithmetic::Interval;
+use datafusion_expr::sort_properties::ExprProperties;
+use datafusion_expr::statistics::Distribution;
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
+
+use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
+use crate::expr::columnar_value::FFI_ColumnarValue;
+use crate::expr::distribution::FFI_Distribution;
+use crate::expr::expr_properties::FFI_ExprProperties;
+use crate::expr::interval::FFI_Interval;
+use crate::record_batch_stream::{
+    record_batch_to_wrapped_array, wrapped_array_to_record_batch,
+};
+use crate::util::FFIResult;
+use crate::{df_result, rresult, rresult_return};
+
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_PhysicalExpr {
+    pub data_type: unsafe extern "C" fn(
+        &Self,
+        input_schema: WrappedSchema,
+    ) -> FFIResult<WrappedSchema>,
+
+    pub nullable:
+        unsafe extern "C" fn(&Self, input_schema: WrappedSchema) -> FFIResult<bool>,
+
+    pub evaluate:
+        unsafe extern "C" fn(&Self, batch: WrappedArray) -> FFIResult<FFI_ColumnarValue>,
+
+    pub return_field: unsafe extern "C" fn(
+        &Self,
+        input_schema: WrappedSchema,
+    ) -> FFIResult<WrappedSchema>,
+
+    pub evaluate_selection: unsafe extern "C" fn(
+        &Self,
+        batch: WrappedArray,
+        selection: WrappedArray,
+    ) -> FFIResult<FFI_ColumnarValue>,
+
+    pub children: unsafe extern "C" fn(&Self) -> RVec<FFI_PhysicalExpr>,
+
+    pub new_with_children:
+        unsafe extern "C" fn(&Self, children: &RVec<FFI_PhysicalExpr>) -> FFIResult<Self>,
+
+    pub evaluate_bounds: unsafe extern "C" fn(
+        &Self,
+        children: RVec<FFI_Interval>,
+    ) -> FFIResult<FFI_Interval>,
+
+    pub propagate_constraints:
+        unsafe extern "C" fn(
+            &Self,
+            interval: FFI_Interval,
+            children: RVec<FFI_Interval>,
+        ) -> FFIResult<ROption<RVec<FFI_Interval>>>,
+
+    pub evaluate_statistics: unsafe extern "C" fn(
+        &Self,
+        children: RVec<FFI_Distribution>,
+    ) -> FFIResult<FFI_Distribution>,
+
+    pub propagate_statistics:
+        unsafe extern "C" fn(
+            &Self,
+            parent: FFI_Distribution,
+            children: RVec<FFI_Distribution>,
+        ) -> FFIResult<ROption<RVec<FFI_Distribution>>>,
+
+    pub get_properties: unsafe extern "C" fn(
+        &Self,
+        children: RVec<FFI_ExprProperties>,
+    ) -> FFIResult<FFI_ExprProperties>,
+
+    pub fmt_sql: unsafe extern "C" fn(&Self) -> FFIResult<RString>,
+
+    pub snapshot: unsafe extern "C" fn(&Self) -> FFIResult<ROption<FFI_PhysicalExpr>>,
+
+    pub snapshot_generation: unsafe extern "C" fn(&Self) -> u64,
+
+    pub is_volatile_node: unsafe extern "C" fn(&Self) -> bool,
+
+    // Display trait
+    pub display: unsafe extern "C" fn(&Self) -> RString,
+
+    // Hash trait
+    pub hash: unsafe extern "C" fn(&Self) -> u64,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Return the major DataFusion version number of this provider.
+    pub version: unsafe extern "C" fn() -> u64,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignPhysicalExpr`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_PhysicalExpr {}
+unsafe impl Sync for FFI_PhysicalExpr {}
+
+impl FFI_PhysicalExpr {
+    fn inner(&self) -> &Arc<dyn PhysicalExpr> {
+        unsafe {
+            let private_data = self.private_data as *const PhysicalExprPrivateData;
+            &(*private_data).expr
+        }
+    }
+}
+
+struct PhysicalExprPrivateData {
+    expr: Arc<dyn PhysicalExpr>,
+}
+
+unsafe extern "C" fn data_type_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    input_schema: WrappedSchema,
+) -> FFIResult<WrappedSchema> {
+    let expr = expr.inner();
+    let schema: SchemaRef = input_schema.into();
+    let data_type = expr
+        .data_type(&schema)
+        .and_then(|dt| FFI_ArrowSchema::try_from(dt).map_err(Into::into))
+        .map(WrappedSchema);
+    rresult!(data_type)
+}
+
+unsafe extern "C" fn nullable_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    input_schema: WrappedSchema,
+) -> FFIResult<bool> {
+    let expr = expr.inner();
+    let schema: SchemaRef = input_schema.into();
+    rresult!(expr.nullable(&schema))
+}
+
+unsafe extern "C" fn evaluate_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    batch: WrappedArray,
+) -> FFIResult<FFI_ColumnarValue> {
+    let batch = rresult_return!(wrapped_array_to_record_batch(batch));
+    rresult!(
+        expr.inner()
+            .evaluate(&batch)
+            .and_then(FFI_ColumnarValue::try_from)
+    )
+}
+
+unsafe extern "C" fn return_field_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    input_schema: WrappedSchema,
+) -> FFIResult<WrappedSchema> {
+    let expr = expr.inner();
+    let schema: SchemaRef = input_schema.into();
+    rresult!(
+        expr.return_field(&schema)
+            .and_then(|f| FFI_ArrowSchema::try_from(&f).map_err(Into::into))
+            .map(WrappedSchema)
+    )
+}
+
+unsafe extern "C" fn evaluate_selection_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    batch: WrappedArray,
+    selection: WrappedArray,
+) -> FFIResult<FFI_ColumnarValue> {
+    let batch = rresult_return!(wrapped_array_to_record_batch(batch));
+    let selection: ArrayRef = rresult_return!(selection.try_into());
+    let selection = rresult_return!(
+        selection
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .ok_or(ffi_datafusion_err!("Unexpected selection array type"))
+    );
+    rresult!(
+        expr.inner()
+            .evaluate_selection(&batch, selection)
+            .and_then(FFI_ColumnarValue::try_from)
+    )
+}
+
+unsafe extern "C" fn children_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+) -> RVec<FFI_PhysicalExpr> {
+    let expr = expr.inner();
+    let children = expr.children();
+    children
+        .into_iter()
+        .map(|child| FFI_PhysicalExpr::from(Arc::clone(child)))
+        .collect()
+}
+
+unsafe extern "C" fn new_with_children_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    children: &RVec<FFI_PhysicalExpr>,
+) -> FFIResult<FFI_PhysicalExpr> {
+    let expr = Arc::clone(expr.inner());
+    let children = children.iter().map(Into::into).collect::<Vec<_>>();
+    rresult!(expr.with_new_children(children).map(FFI_PhysicalExpr::from))
+}
+
+unsafe extern "C" fn evaluate_bounds_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    children: RVec<FFI_Interval>,
+) -> FFIResult<FFI_Interval> {
+    let expr = expr.inner();
+    let children = rresult_return!(
+        children
+            .into_iter()
+            .map(Interval::try_from)
+            .collect::<Result<Vec<_>>>()
+    );
+    let children_borrowed = children.iter().collect::<Vec<_>>();
+
+    rresult!(
+        expr.evaluate_bounds(&children_borrowed)
+            .and_then(FFI_Interval::try_from)
+    )
+}
+
+unsafe extern "C" fn propagate_constraints_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    interval: FFI_Interval,
+    children: RVec<FFI_Interval>,
+) -> FFIResult<ROption<RVec<FFI_Interval>>> {
+    let expr = expr.inner();
+    let interval = rresult_return!(Interval::try_from(interval));
+    let children = rresult_return!(
+        children
+            .into_iter()
+            .map(Interval::try_from)
+            .collect::<Result<Vec<_>>>()
+    );
+    let children_borrowed = children.iter().collect::<Vec<_>>();
+
+    let result =
+        rresult_return!(expr.propagate_constraints(&interval, &children_borrowed));
+
+    let result = rresult_return!(
+        result
+            .map(|intervals| intervals
+                .into_iter()
+                .map(FFI_Interval::try_from)
+                .collect::<Result<RVec<_>>>())
+            .transpose()
+    );
+
+    RResult::ROk(result.into())
+}
+
+unsafe extern "C" fn evaluate_statistics_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    children: RVec<FFI_Distribution>,
+) -> FFIResult<FFI_Distribution> {
+    let expr = expr.inner();
+    let children = rresult_return!(
+        children
+            .into_iter()
+            .map(Distribution::try_from)
+            .collect::<Result<Vec<_>>>()
+    );
+    let children_borrowed = children.iter().collect::<Vec<_>>();
+    rresult!(
+        expr.evaluate_statistics(&children_borrowed)
+            .and_then(|dist| FFI_Distribution::try_from(&dist))
+    )
+}
+
+unsafe extern "C" fn propagate_statistics_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    parent: FFI_Distribution,
+    children: RVec<FFI_Distribution>,
+) -> FFIResult<ROption<RVec<FFI_Distribution>>> {
+    let expr = expr.inner();
+    let parent = rresult_return!(Distribution::try_from(parent));
+    let children = rresult_return!(
+        children
+            .into_iter()
+            .map(Distribution::try_from)
+            .collect::<Result<Vec<_>>>()
+    );
+    let children_borrowed = children.iter().collect::<Vec<_>>();
+
+    let result = rresult_return!(expr.propagate_statistics(&parent, &children_borrowed));
+    let result = rresult_return!(
+        result
+            .map(|dists| dists
+                .iter()
+                .map(FFI_Distribution::try_from)
+                .collect::<Result<RVec<_>>>())
+            .transpose()
+    );
+
+    RResult::ROk(result.into())
+}
+
+unsafe extern "C" fn get_properties_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+    children: RVec<FFI_ExprProperties>,
+) -> FFIResult<FFI_ExprProperties> {
+    let expr = expr.inner();
+    let children = rresult_return!(
+        children
+            .into_iter()
+            .map(ExprProperties::try_from)
+            .collect::<Result<Vec<_>>>()
+    );
+    rresult!(
+        expr.get_properties(&children)
+            .and_then(|p| FFI_ExprProperties::try_from(&p))
+    )
+}
+
+unsafe extern "C" fn fmt_sql_fn_wrapper(expr: &FFI_PhysicalExpr) -> FFIResult<RString> {
+    let expr = expr.inner();
+    let result = fmt_sql(expr.as_ref()).to_string();
+    RResult::ROk(result.into())
+}
+
+unsafe extern "C" fn snapshot_fn_wrapper(
+    expr: &FFI_PhysicalExpr,
+) -> FFIResult<ROption<FFI_PhysicalExpr>> {
+    let expr = expr.inner();
+    rresult!(
+        expr.snapshot()
+            .map(|snapshot| snapshot.map(FFI_PhysicalExpr::from).into())
+    )
+}
+
+unsafe extern "C" fn snapshot_generation_fn_wrapper(expr: &FFI_PhysicalExpr) -> u64 {
+    let expr = expr.inner();
+    expr.snapshot_generation()
+}
+
+unsafe extern "C" fn is_volatile_node_fn_wrapper(expr: &FFI_PhysicalExpr) -> bool {
+    let expr = expr.inner();
+    expr.is_volatile_node()
+}
+unsafe extern "C" fn display_fn_wrapper(expr: &FFI_PhysicalExpr) -> RString {
+    let expr = expr.inner();
+    format!("{expr}").into()
+}
+
+unsafe extern "C" fn hash_fn_wrapper(expr: &FFI_PhysicalExpr) -> u64 {
+    let expr = expr.inner();
+    let mut hasher = DefaultHasher::new();
+    expr.hash(&mut hasher);
+    hasher.finish()
+}
+
+unsafe extern "C" fn release_fn_wrapper(expr: &mut FFI_PhysicalExpr) {
+    unsafe {
+        debug_assert!(!expr.private_data.is_null());
+        let private_data =
+            Box::from_raw(expr.private_data as *mut PhysicalExprPrivateData);
+        drop(private_data);
+        expr.private_data = std::ptr::null_mut();
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(expr: &FFI_PhysicalExpr) -> FFI_PhysicalExpr {
+    unsafe {
+        let old_private_data = expr.private_data as *const PhysicalExprPrivateData;
+
+        let private_data = Box::into_raw(Box::new(PhysicalExprPrivateData {
+            expr: Arc::clone(&(*old_private_data).expr),
+        })) as *mut c_void;
+
+        FFI_PhysicalExpr {
+            data_type: data_type_fn_wrapper,
+            nullable: nullable_fn_wrapper,
+            evaluate: evaluate_fn_wrapper,
+            return_field: return_field_fn_wrapper,
+            evaluate_selection: evaluate_selection_fn_wrapper,
+            children: children_fn_wrapper,
+            new_with_children: new_with_children_fn_wrapper,
+            evaluate_bounds: evaluate_bounds_fn_wrapper,
+            propagate_constraints: propagate_constraints_fn_wrapper,
+            evaluate_statistics: evaluate_statistics_fn_wrapper,
+            propagate_statistics: propagate_statistics_fn_wrapper,
+            get_properties: get_properties_fn_wrapper,
+            fmt_sql: fmt_sql_fn_wrapper,
+            snapshot: snapshot_fn_wrapper,
+            snapshot_generation: snapshot_generation_fn_wrapper,
+            is_volatile_node: is_volatile_node_fn_wrapper,
+            display: display_fn_wrapper,
+            hash: hash_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+impl Drop for FFI_PhysicalExpr {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl From<Arc<dyn PhysicalExpr>> for FFI_PhysicalExpr {
+    /// Creates a new [`FFI_PhysicalExpr`].
+    fn from(expr: Arc<dyn PhysicalExpr>) -> Self {
+        let private_data = Box::new(PhysicalExprPrivateData { expr });
+
+        Self {
+            data_type: data_type_fn_wrapper,
+            nullable: nullable_fn_wrapper,
+            evaluate: evaluate_fn_wrapper,
+            return_field: return_field_fn_wrapper,
+            evaluate_selection: evaluate_selection_fn_wrapper,
+            children: children_fn_wrapper,
+            new_with_children: new_with_children_fn_wrapper,
+            evaluate_bounds: evaluate_bounds_fn_wrapper,
+            propagate_constraints: propagate_constraints_fn_wrapper,
+            evaluate_statistics: evaluate_statistics_fn_wrapper,
+            propagate_statistics: propagate_statistics_fn_wrapper,
+            get_properties: get_properties_fn_wrapper,
+            fmt_sql: fmt_sql_fn_wrapper,
+            snapshot: snapshot_fn_wrapper,
+            snapshot_generation: snapshot_generation_fn_wrapper,
+            is_volatile_node: is_volatile_node_fn_wrapper,
+            display: display_fn_wrapper,
+            hash: hash_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_PhysicalExpr to interact with the expression.
+#[derive(Debug)]
+pub struct ForeignPhysicalExpr {
+    expr: FFI_PhysicalExpr,
+    children: Vec<Arc<dyn PhysicalExpr>>,
+}
+
+unsafe impl Send for ForeignPhysicalExpr {}
+unsafe impl Sync for ForeignPhysicalExpr {}
+
+impl From<&FFI_PhysicalExpr> for Arc<dyn PhysicalExpr> {
+    fn from(ffi_expr: &FFI_PhysicalExpr) -> Self {
+        if (ffi_expr.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(ffi_expr.inner())
+        } else {
+            let children = unsafe {
+                (ffi_expr.children)(ffi_expr)
+                    .into_iter()
+                    .map(|expr| <Arc<dyn PhysicalExpr>>::from(&expr))
+                    .collect()
+            };
+
+            Arc::new(ForeignPhysicalExpr {
+                expr: ffi_expr.clone(),
+                children,
+            })
+        }
+    }
+}
+
+impl Clone for FFI_PhysicalExpr {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl PhysicalExpr for ForeignPhysicalExpr {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
+        unsafe {
+            let schema = WrappedSchema::from(Arc::new(input_schema.clone()));
+            df_result!((self.expr.data_type)(&self.expr, schema))
+                .and_then(|d| DataType::try_from(&d.0).map_err(Into::into))
+        }
+    }
+
+    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
+        unsafe {
+            let schema = WrappedSchema::from(Arc::new(input_schema.clone()));
+            df_result!((self.expr.nullable)(&self.expr, schema))
+        }
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        unsafe {
+            let batch = df_result!(record_batch_to_wrapped_array(batch.clone()))?;
+            df_result!((self.expr.evaluate)(&self.expr, batch))
+                .and_then(ColumnarValue::try_from)
+        }
+    }
+
+    fn return_field(&self, input_schema: &Schema) -> Result<FieldRef> {
+        unsafe {
+            let schema = WrappedSchema::from(Arc::new(input_schema.clone()));
+            let result = df_result!((self.expr.return_field)(&self.expr, schema))?;
+            Field::try_from(&result.0).map(Arc::new).map_err(Into::into)
+        }
+    }
+
+    fn evaluate_selection(
+        &self,
+        batch: &RecordBatch,
+        selection: &BooleanArray,
+    ) -> Result<ColumnarValue> {
+        unsafe {
+            let batch = df_result!(record_batch_to_wrapped_array(batch.clone()))?;
+            // This is not ideal - we are cloning the selection array
+            // This is not terrible since it will be a small array.
+            // The other alternative is to modify the trait signature.
+            let selection: ArrayRef = Arc::new(selection.clone());
+            let selection = WrappedArray::try_from(&selection)?;
+            df_result!((self.expr.evaluate_selection)(&self.expr, batch, selection))
+                .and_then(ColumnarValue::try_from)
+        }
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
+        self.children.iter().collect()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        unsafe {
+            let children = children.into_iter().map(FFI_PhysicalExpr::from).collect();
+            df_result!(
+                (self.expr.new_with_children)(&self.expr, &children).map(|expr| <Arc<
+                    dyn PhysicalExpr,
+                >>::from(
+                    &expr
+                ))
+            )
+        }
+    }
+
+    fn evaluate_bounds(&self, children: &[&Interval]) -> Result<Interval> {
+        unsafe {
+            let children = children
+                .iter()
+                .map(|interval| FFI_Interval::try_from(*interval))
+                .collect::<Result<RVec<_>>>()?;
+            df_result!((self.expr.evaluate_bounds)(&self.expr, children))
+                .and_then(Interval::try_from)
+        }
+    }
+
+    fn propagate_constraints(
+        &self,
+        interval: &Interval,
+        children: &[&Interval],
+    ) -> Result<Option<Vec<Interval>>> {
+        unsafe {
+            let interval = interval.try_into()?;
+            let children = children
+                .iter()
+                .map(|interval| FFI_Interval::try_from(*interval))
+                .collect::<Result<RVec<_>>>()?;
+            let result = df_result!((self.expr.propagate_constraints)(
+                &self.expr, interval, children
+            ))?;
+
+            let result: Option<_> = result
+                .map(|intervals| {
+                    intervals
+                        .into_iter()
+                        .map(Interval::try_from)
+                        .collect::<Result<Vec<_>>>()
+                })
+                .into();
+            result.transpose()
+        }
+    }
+
+    fn evaluate_statistics(&self, children: &[&Distribution]) -> Result<Distribution> {
+        unsafe {
+            let children = children
+                .iter()
+                .map(|dist| FFI_Distribution::try_from(*dist))
+                .collect::<Result<RVec<_>>>()?;
+
+            let result =
+                df_result!((self.expr.evaluate_statistics)(&self.expr, children))?;
+            Distribution::try_from(result)
+        }
+    }
+
+    fn propagate_statistics(
+        &self,
+        parent: &Distribution,
+        children: &[&Distribution],
+    ) -> Result<Option<Vec<Distribution>>> {
+        unsafe {
+            let parent = FFI_Distribution::try_from(parent)?;
+            let children = children
+                .iter()
+                .map(|dist| FFI_Distribution::try_from(*dist))
+                .collect::<Result<RVec<_>>>()?;
+            let result = df_result!((self.expr.propagate_statistics)(
+                &self.expr, parent, children
+            ))?;
+
+            let result: Option<Result<Vec<Distribution>>> = result
+                .map(|dists| {
+                    dists
+                        .into_iter()
+                        .map(Distribution::try_from)
+                        .collect::<Result<Vec<_>>>()
+                })
+                .into();
+
+            result.transpose()
+        }
+    }
+
+    fn get_properties(&self, children: &[ExprProperties]) -> Result<ExprProperties> {
+        unsafe {
+            let children = children
+                .iter()
+                .map(FFI_ExprProperties::try_from)
+                .collect::<Result<RVec<_>>>()?;
+            df_result!((self.expr.get_properties)(&self.expr, children))
+                .and_then(ExprProperties::try_from)
+        }
+    }
+
+    fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        unsafe {
+            match (self.expr.fmt_sql)(&self.expr) {
+                RResult::ROk(sql) => write!(f, "{sql}"),
+                RResult::RErr(_) => Err(std::fmt::Error),
+            }
+        }
+    }
+
+    fn snapshot(&self) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+        unsafe {
+            let result = df_result!((self.expr.snapshot)(&self.expr))?;
+            Ok(result
+                .map(|expr| <Arc<dyn PhysicalExpr>>::from(&expr))
+                .into())
+        }
+    }
+
+    fn snapshot_generation(&self) -> u64 {
+        unsafe { (self.expr.snapshot_generation)(&self.expr) }
+    }
+
+    fn is_volatile_node(&self) -> bool {
+        unsafe { (self.expr.is_volatile_node)(&self.expr) }
+    }
+}
+
+impl Eq for ForeignPhysicalExpr {}
+impl PartialEq for ForeignPhysicalExpr {
+    fn eq(&self, other: &Self) -> bool {
+        // FFI_PhysicalExpr cannot be compared, so identity equality is the best we can do.
+        std::ptr::eq(self, other)
+    }
+}
+impl Hash for ForeignPhysicalExpr {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        let value = unsafe { (self.expr.hash)(&self.expr) };
+        value.hash(state)
+    }
+}
+
+impl Display for ForeignPhysicalExpr {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let display = unsafe { (self.expr.display)(&self.expr) };
+        write!(f, "{display}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::hash::{DefaultHasher, Hash, Hasher};
+    use std::sync::Arc;
+
+    use arrow::array::{BooleanArray, RecordBatch, record_batch};
+    use datafusion_common::tree_node::DynTreeNode;
+    use datafusion_common::{DataFusionError, ScalarValue};
+    use datafusion_expr::interval_arithmetic::Interval;
+    use datafusion_expr::statistics::Distribution;
+    use datafusion_physical_expr::expressions::{Column, NegativeExpr, NotExpr};
+    use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, fmt_sql};
+
+    use crate::physical_expr::FFI_PhysicalExpr;
+
+    fn create_test_expr() -> (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>) {
+        let original = Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>;
+        let mut ffi_expr = FFI_PhysicalExpr::from(Arc::clone(&original));
+        ffi_expr.library_marker_id = crate::mock_foreign_marker_id;
+
+        let foreign_expr: Arc<dyn PhysicalExpr> = (&ffi_expr).into();
+
+        (original, foreign_expr)
+    }
+
+    fn test_record_batch() -> RecordBatch {
+        record_batch!(("a", Int32, [1, 2, 3])).unwrap()
+    }
+
+    #[test]
+    fn ffi_physical_expr_fields() -> Result<(), DataFusionError> {
+        let (original, foreign_expr) = create_test_expr();
+        let schema = test_record_batch().schema();
+
+        // Verify the mock marker worked, otherwise tests to follow are not useful
+        assert_ne!(original.as_ref(), foreign_expr.as_ref());
+
+        assert_eq!(
+            original.return_field(&schema)?,
+            foreign_expr.return_field(&schema)?
+        );
+
+        assert_eq!(
+            original.data_type(&schema)?,
+            foreign_expr.data_type(&schema)?
+        );
+        assert_eq!(original.nullable(&schema)?, foreign_expr.nullable(&schema)?);
+
+        Ok(())
+    }
+    #[test]
+    fn ffi_physical_expr_evaluate() -> Result<(), DataFusionError> {
+        let (original, foreign_expr) = create_test_expr();
+        let rb = test_record_batch();
+
+        assert_eq!(
+            original.evaluate(&rb)?.to_array(3)?.as_ref(),
+            foreign_expr.evaluate(&rb)?.to_array(3)?.as_ref()
+        );
+
+        Ok(())
+    }
+    #[test]
+    fn ffi_physical_expr_selection() -> Result<(), DataFusionError> {
+        let (original, foreign_expr) = create_test_expr();
+        let rb = test_record_batch();
+
+        let selection = BooleanArray::from(vec![true, false, true]);
+
+        assert_eq!(
+            original
+                .evaluate_selection(&rb, &selection)?
+                .to_array(3)?
+                .as_ref(),
+            foreign_expr
+                .evaluate_selection(&rb, &selection)?
+                .to_array(3)?
+                .as_ref()
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_expr_with_children() -> Result<(), DataFusionError> {
+        let (original, _) = create_test_expr();
+        let not_expr =
+            Arc::new(NotExpr::new(Arc::clone(&original))) as Arc<dyn PhysicalExpr>;
+        let mut ffi_not = FFI_PhysicalExpr::from(not_expr);
+        ffi_not.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_not: Arc<dyn PhysicalExpr> = (&ffi_not).into();
+
+        let replacement = Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>;
+        let updated =
+            Arc::clone(&foreign_not).with_new_children(vec![Arc::clone(&replacement)])?;
+        assert_eq!(
+            format!("{updated:?}").as_str(),
+            "NotExpr { arg: Column { name: \"b\", index: 1 } }"
+        );
+
+        let updated = foreign_not
+            .with_new_arc_children(Arc::clone(&foreign_not), vec![replacement])?;
+        assert_eq!(format!("{updated}").as_str(), "NOT b@1");
+
+        Ok(())
+    }
+
+    fn create_test_negative_expr() -> (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>) {
+        let (original, _) = create_test_expr();
+
+        let negative_expr =
+            Arc::new(NegativeExpr::new(Arc::clone(&original))) as Arc<dyn PhysicalExpr>;
+        let mut ffi_neg = FFI_PhysicalExpr::from(Arc::clone(&negative_expr));
+        ffi_neg.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_neg: Arc<dyn PhysicalExpr> = (&ffi_neg).into();
+
+        (negative_expr, foreign_neg)
+    }
+
+    #[test]
+    fn ffi_physical_expr_bounds() -> Result<(), DataFusionError> {
+        let (negative_expr, foreign_neg) = create_test_negative_expr();
+
+        let interval =
+            Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(10)))?;
+        let left = negative_expr.evaluate_bounds(&[&interval])?;
+        let right = foreign_neg.evaluate_bounds(&[&interval])?;
+
+        assert_eq!(left, right);
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_expr_constraints() -> Result<(), DataFusionError> {
+        let (negative_expr, foreign_neg) = create_test_negative_expr();
+
+        let interval =
+            Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(10)))?;
+
+        let child =
+            Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(10)))?;
+        let left = negative_expr.propagate_constraints(&interval, &[&child])?;
+        let right = foreign_neg.propagate_constraints(&interval, &[&child])?;
+
+        assert_eq!(left, right);
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_expr_statistics() -> Result<(), DataFusionError> {
+        let (negative_expr, foreign_neg) = create_test_negative_expr();
+        let interval =
+            Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(10)))?;
+
+        for distribution in [
+            Distribution::new_uniform(interval.clone())?,
+            Distribution::new_exponential(
+                ScalarValue::Int32(Some(10)),
+                ScalarValue::Int32(Some(10)),
+                true,
+            )?,
+            Distribution::new_gaussian(
+                ScalarValue::Int32(Some(10)),
+                ScalarValue::Int32(Some(10)),
+            )?,
+            Distribution::new_generic(
+                ScalarValue::Int32(Some(10)),
+                ScalarValue::Int32(Some(10)),
+                ScalarValue::Int32(Some(10)),
+                interval,
+            )?,
+        ] {
+            let left = negative_expr.evaluate_statistics(&[&distribution])?;
+            let right = foreign_neg.evaluate_statistics(&[&distribution])?;
+
+            assert_eq!(left, right);
+
+            let left =
+                negative_expr.propagate_statistics(&distribution, &[&distribution])?;
+            let right =
+                foreign_neg.propagate_statistics(&distribution, &[&distribution])?;
+
+            assert_eq!(left, right);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_expr_properties() -> Result<(), DataFusionError> {
+        let (original, foreign_expr) = create_test_expr();
+
+        let left = original.get_properties(&[])?;
+        let right = foreign_expr.get_properties(&[])?;
+
+        assert_eq!(left.sort_properties, right.sort_properties);
+        assert_eq!(left.range, right.range);
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_formatting() {
+        let (original, foreign_expr) = create_test_expr();
+
+        let left = format!("{}", fmt_sql(original.as_ref()));
+        let right = format!("{}", fmt_sql(foreign_expr.as_ref()));
+        assert_eq!(left, right);
+    }
+
+    #[test]
+    fn ffi_physical_expr_snapshots() -> Result<(), DataFusionError> {
+        let (original, foreign_expr) = create_test_expr();
+
+        let left = original.snapshot()?;
+        let right = foreign_expr.snapshot()?;
+        assert_eq!(left, right);
+
+        assert_eq!(
+            original.snapshot_generation(),
+            foreign_expr.snapshot_generation()
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_expr_volatility() {
+        let (original, foreign_expr) = create_test_expr();
+        assert_eq!(original.is_volatile_node(), foreign_expr.is_volatile_node());
+    }
+
+    #[test]
+    fn ffi_physical_expr_hash() {
+        let (_, foreign_1) = create_test_expr();
+        let (_, foreign_2) = create_test_expr();
+
+        assert_ne!(&foreign_1, &foreign_2);
+
+        let mut hasher = DefaultHasher::new();
+        foreign_1.as_ref().hash(&mut hasher);
+        let hash_1 = hasher.finish();
+
+        let mut hasher = DefaultHasher::new();
+        foreign_2.as_ref().hash(&mut hasher);
+        let hash_2 = hasher.finish();
+
+        // We cannot compare a local object and a foreign object
+        // so create two foreign objects that *should* be identical
+        // even though they were created differently.
+        assert_eq!(hash_1, hash_2);
+    }
+
+    #[test]
+    fn ffi_physical_expr_display() {
+        let (original, foreign_expr) = create_test_expr();
+        assert_eq!(format!("{original}"), format!("{foreign_expr}"));
+    }
+}
diff --git a/datafusion/ffi/src/physical_expr/partitioning.rs b/datafusion/ffi/src/physical_expr/partitioning.rs
new file mode 100644
index 0000000000000..cda4fd2c97f45
--- /dev/null
+++ b/datafusion/ffi/src/physical_expr/partitioning.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::RVec;
+use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+use crate::physical_expr::FFI_PhysicalExpr;
+
+/// A stable struct for sharing [`Partitioning`] across FFI boundaries.
+/// See ['Partitioning'] for the meaning of each variant.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub enum FFI_Partitioning {
+    RoundRobinBatch(usize),
+    Hash(RVec<FFI_PhysicalExpr>, usize),
+    UnknownPartitioning(usize),
+}
+
+impl From<&Partitioning> for FFI_Partitioning {
+    fn from(value: &Partitioning) -> Self {
+        match value {
+            Partitioning::RoundRobinBatch(size) => Self::RoundRobinBatch(*size),
+            Partitioning::Hash(exprs, size) => {
+                let exprs = exprs
+                    .iter()
+                    .map(Arc::clone)
+                    .map(FFI_PhysicalExpr::from)
+                    .collect();
+                Self::Hash(exprs, *size)
+            }
+            Partitioning::UnknownPartitioning(size) => Self::UnknownPartitioning(*size),
+        }
+    }
+}
+
+impl From<&FFI_Partitioning> for Partitioning {
+    fn from(value: &FFI_Partitioning) -> Self {
+        match value {
+            FFI_Partitioning::RoundRobinBatch(size) => {
+                Partitioning::RoundRobinBatch(*size)
+            }
+            FFI_Partitioning::Hash(exprs, size) => {
+                let exprs = exprs.iter().map(<Arc<dyn PhysicalExpr>>::from).collect();
+                Self::Hash(exprs, *size)
+            }
+            FFI_Partitioning::UnknownPartitioning(size) => {
+                Self::UnknownPartitioning(*size)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion_physical_expr::Partitioning;
+    use datafusion_physical_expr::expressions::lit;
+
+    use crate::physical_expr::partitioning::FFI_Partitioning;
+
+    #[test]
+    fn round_trip_ffi_partitioning() {
+        for partitioning in [
+            Partitioning::RoundRobinBatch(10),
+            Partitioning::Hash(vec![lit(1)], 10),
+            Partitioning::UnknownPartitioning(10),
+        ] {
+            let ffi_partitioning: FFI_Partitioning = (&partitioning).into();
+            let returned: Partitioning = (&ffi_partitioning).into();
+
+            if let Partitioning::UnknownPartitioning(return_size) = returned {
+                let Partitioning::UnknownPartitioning(original_size) = partitioning
+                else {
+                    panic!("Expected unknown partitioning")
+                };
+                assert_eq!(return_size, original_size);
+            } else {
+                assert_eq!(partitioning, returned);
+            }
+        }
+    }
+}
diff --git a/datafusion/ffi/src/physical_expr/sort.rs b/datafusion/ffi/src/physical_expr/sort.rs
new file mode 100644
index 0000000000000..fd3339b10555a
--- /dev/null
+++ b/datafusion/ffi/src/physical_expr/sort.rs
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use arrow_schema::SortOptions;
+use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+use crate::expr::expr_properties::FFI_SortOptions;
+use crate::physical_expr::FFI_PhysicalExpr;
+
+/// A stable struct for sharing [`PhysicalSortExpr`] across FFI boundaries.
+/// See [`PhysicalSortExpr`] for the meaning of each field.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_PhysicalSortExpr {
+    expr: FFI_PhysicalExpr,
+    options: FFI_SortOptions,
+}
+
+impl From<&PhysicalSortExpr> for FFI_PhysicalSortExpr {
+    fn from(value: &PhysicalSortExpr) -> Self {
+        let expr = FFI_PhysicalExpr::from(value.clone().expr);
+        let options = FFI_SortOptions::from(&value.options);
+
+        Self { expr, options }
+    }
+}
+
+impl From<&FFI_PhysicalSortExpr> for PhysicalSortExpr {
+    fn from(value: &FFI_PhysicalSortExpr) -> Self {
+        let expr: Arc<dyn PhysicalExpr> = (&value.expr).into();
+        let options = SortOptions::from(&value.options);
+
+        Self { expr, options }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow_schema::SortOptions;
+    use datafusion_physical_expr::expressions::Column;
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+
+    use crate::physical_expr::sort::FFI_PhysicalSortExpr;
+
+    #[test]
+    fn ffi_sort_expr_round_trip() {
+        let col_expr = Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>;
+        let expr = PhysicalSortExpr::new(col_expr, SortOptions::default());
+
+        let ffi_expr = FFI_PhysicalSortExpr::from(&expr);
+        let foreign_expr = PhysicalSortExpr::from(&ffi_expr);
+
+        assert_eq!(expr, foreign_expr);
+    }
+}
diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs
index 48c2698a58c75..d009de3f04b99 100644
--- a/datafusion/ffi/src/plan_properties.rs
+++ b/datafusion/ffi/src/plan_properties.rs
@@ -15,46 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, sync::Arc};
-
-use abi_stable::{
-    std_types::{
-        RResult::{self, ROk},
-        RString, RVec,
-    },
-    StableAbi,
-};
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RVec};
 use arrow::datatypes::SchemaRef;
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_expr::EquivalenceProperties,
-    physical_plan::{
-        execution_plan::{Boundedness, EmissionType},
-        PlanProperties,
-    },
-    prelude::SessionContext,
-};
-use datafusion_proto::{
-    physical_plan::{
-        from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning},
-        to_proto::{serialize_partitioning, serialize_physical_sort_exprs},
-        DefaultPhysicalExtensionCodec,
-    },
-    protobuf::{Partitioning, PhysicalSortExprNodeCollection},
-};
-use prost::Message;
-
-use crate::{arrow_wrappers::WrappedSchema, df_result, rresult_return};
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_plan::PlanProperties;
+use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::physical_expr::partitioning::FFI_Partitioning;
+use crate::physical_expr::sort::FFI_PhysicalSortExpr;
 
 /// A stable struct for sharing [`PlanProperties`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_PlanProperties {
-    /// The output partitioning is a [`Partitioning`] protobuf message serialized
-    /// into bytes to pass across the FFI boundary.
-    pub output_partitioning:
-        unsafe extern "C" fn(plan: &Self) -> RResult<RVec<u8>, RString>,
+    /// The output partitioning of the plan.
+    pub output_partitioning: unsafe extern "C" fn(plan: &Self) -> FFI_Partitioning,
 
     /// Return the emission type of the plan.
     pub emission_type: unsafe extern "C" fn(plan: &Self) -> FFI_EmissionType,
@@ -62,9 +44,9 @@ pub struct FFI_PlanProperties {
     /// Indicate boundedness of the plan and its memory requirements.
     pub boundedness: unsafe extern "C" fn(plan: &Self) -> FFI_Boundedness,
 
-    /// The output ordering is a [`PhysicalSortExprNodeCollection`] protobuf message
-    /// serialized into bytes to pass across the FFI boundary.
-    pub output_ordering: unsafe extern "C" fn(plan: &Self) -> RResult<RVec<u8>, RString>,
+    /// The output ordering of the plan.
+    pub output_ordering:
+        unsafe extern "C" fn(plan: &Self) -> ROption<RVec<FFI_PhysicalSortExpr>>,
 
     /// Return the schema of the plan.
     pub schema: unsafe extern "C" fn(plan: &Self) -> WrappedSchema,
@@ -75,77 +57,70 @@ pub struct FFI_PlanProperties {
     /// Internal data. This is only to be accessed by the provider of the plan.
     /// The foreign library should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 struct PlanPropertiesPrivateData {
     props: PlanProperties,
 }
 
+impl FFI_PlanProperties {
+    fn inner(&self) -> &PlanProperties {
+        let private_data = self.private_data as *const PlanPropertiesPrivateData;
+        unsafe { &(*private_data).props }
+    }
+}
+
 unsafe extern "C" fn output_partitioning_fn_wrapper(
     properties: &FFI_PlanProperties,
-) -> RResult<RVec<u8>, RString> {
-    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
-    let props = &(*private_data).props;
-
-    let codec = DefaultPhysicalExtensionCodec {};
-    let partitioning_data =
-        rresult_return!(serialize_partitioning(props.output_partitioning(), &codec));
-    let output_partitioning = partitioning_data.encode_to_vec();
-
-    ROk(output_partitioning.into())
+) -> FFI_Partitioning {
+    properties.inner().output_partitioning().into()
 }
 
 unsafe extern "C" fn emission_type_fn_wrapper(
     properties: &FFI_PlanProperties,
 ) -> FFI_EmissionType {
-    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
-    let props = &(*private_data).props;
-    props.emission_type.into()
+    properties.inner().emission_type.into()
 }
 
 unsafe extern "C" fn boundedness_fn_wrapper(
     properties: &FFI_PlanProperties,
 ) -> FFI_Boundedness {
-    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
-    let props = &(*private_data).props;
-    props.boundedness.into()
+    properties.inner().boundedness.into()
 }
 
 unsafe extern "C" fn output_ordering_fn_wrapper(
     properties: &FFI_PlanProperties,
-) -> RResult<RVec<u8>, RString> {
-    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
-    let props = &(*private_data).props;
-
-    let codec = DefaultPhysicalExtensionCodec {};
-    let output_ordering = match props.output_ordering() {
-        Some(ordering) => {
-            let physical_sort_expr_nodes = rresult_return!(
-                serialize_physical_sort_exprs(ordering.to_owned(), &codec)
-            );
-            let ordering_data = PhysicalSortExprNodeCollection {
-                physical_sort_expr_nodes,
-            };
-
-            ordering_data.encode_to_vec()
-        }
-        None => Vec::default(),
-    };
-    ROk(output_ordering.into())
+) -> ROption<RVec<FFI_PhysicalSortExpr>> {
+    let ordering: Option<RVec<FFI_PhysicalSortExpr>> =
+        properties.inner().output_ordering().map(|lex_ordering| {
+            let vec_ordering: Vec<PhysicalSortExpr> = lex_ordering.clone().into();
+            vec_ordering
+                .iter()
+                .map(FFI_PhysicalSortExpr::from)
+                .collect()
+        });
+
+    ordering.into()
 }
 
 unsafe extern "C" fn schema_fn_wrapper(properties: &FFI_PlanProperties) -> WrappedSchema {
-    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
-    let props = &(*private_data).props;
-
-    let schema: SchemaRef = Arc::clone(props.eq_properties.schema());
+    let schema: SchemaRef = Arc::clone(properties.inner().eq_properties.schema());
     schema.into()
 }
 
 unsafe extern "C" fn release_fn_wrapper(props: &mut FFI_PlanProperties) {
-    let private_data =
-        Box::from_raw(props.private_data as *mut PlanPropertiesPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!props.private_data.is_null());
+        let private_data =
+            Box::from_raw(props.private_data as *mut PlanPropertiesPrivateData);
+        drop(private_data);
+        props.private_data = std::ptr::null_mut();
+    }
 }
 
 impl Drop for FFI_PlanProperties {
@@ -168,6 +143,7 @@ impl From<&PlanProperties> for FFI_PlanProperties {
             schema: schema_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -176,41 +152,25 @@ impl TryFrom<FFI_PlanProperties> for PlanProperties {
     type Error = DataFusionError;
 
     fn try_from(ffi_props: FFI_PlanProperties) -> Result<Self, Self::Error> {
+        if (ffi_props.library_marker_id)() == crate::get_library_marker_id() {
+            return Ok(ffi_props.inner().clone());
+        }
+
         let ffi_schema = unsafe { (ffi_props.schema)(&ffi_props) };
         let schema = (&ffi_schema.0).try_into()?;
 
-        // TODO Extend FFI to get the registry and codex
-        let default_ctx = SessionContext::new();
-        let task_context = default_ctx.task_ctx();
-        let codex = DefaultPhysicalExtensionCodec {};
-
-        let ffi_orderings = unsafe { (ffi_props.output_ordering)(&ffi_props) };
-
-        let proto_output_ordering =
-            PhysicalSortExprNodeCollection::decode(df_result!(ffi_orderings)?.as_ref())
-                .map_err(|e| DataFusionError::External(Box::new(e)))?;
-        let sort_exprs = parse_physical_sort_exprs(
-            &proto_output_ordering.physical_sort_expr_nodes,
-            &task_context,
-            &schema,
-            &codex,
-        )?;
-
-        let partitioning_vec =
-            unsafe { df_result!((ffi_props.output_partitioning)(&ffi_props))? };
-        let proto_output_partitioning =
-            Partitioning::decode(partitioning_vec.as_ref())
-                .map_err(|e| DataFusionError::External(Box::new(e)))?;
-        let partitioning = parse_protobuf_partitioning(
-            Some(&proto_output_partitioning),
-            &task_context,
-            &schema,
-            &codex,
-        )?
-        .ok_or(DataFusionError::Plan(
-            "Unable to deserialize partitioning protobuf in FFI_PlanProperties"
-                .to_string(),
-        ))?;
+        let ffi_orderings: Option<RVec<FFI_PhysicalSortExpr>> =
+            unsafe { (ffi_props.output_ordering)(&ffi_props) }.into();
+        let sort_exprs = ffi_orderings
+            .map(|ordering_vec| {
+                ordering_vec
+                    .iter()
+                    .map(PhysicalSortExpr::from)
+                    .collect::<Vec<_>>()
+            })
+            .unwrap_or_default();
+
+        let partitioning = unsafe { (ffi_props.output_partitioning)(&ffi_props) };
 
         let eq_properties = if sort_exprs.is_empty() {
             EquivalenceProperties::new(Arc::new(schema))
@@ -226,7 +186,7 @@ impl TryFrom<FFI_PlanProperties> for PlanProperties {
 
         Ok(PlanProperties::new(
             eq_properties,
-            partitioning,
+            (&partitioning).into(),
             emission_type,
             boundedness,
         ))
@@ -235,7 +195,6 @@ impl TryFrom<FFI_PlanProperties> for PlanProperties {
 
 /// FFI safe version of [`Boundedness`].
 #[repr(C)]
-#[allow(non_camel_case_types)]
 #[derive(Clone, StableAbi)]
 pub enum FFI_Boundedness {
     Bounded,
@@ -270,7 +229,6 @@ impl From<FFI_Boundedness> for Boundedness {
 
 /// FFI safe version of [`EmissionType`].
 #[repr(C)]
-#[allow(non_camel_case_types)]
 #[derive(Clone, StableAbi)]
 pub enum FFI_EmissionType {
     Incremental,
@@ -300,12 +258,12 @@ impl From<FFI_EmissionType> for EmissionType {
 
 #[cfg(test)]
 mod tests {
-    use datafusion::{physical_expr::PhysicalSortExpr, physical_plan::Partitioning};
+    use datafusion::physical_expr::PhysicalSortExpr;
+    use datafusion::physical_plan::Partitioning;
 
     use super::*;
 
-    #[test]
-    fn test_round_trip_ffi_plan_properties() -> Result<()> {
+    fn create_test_props() -> Result<PlanProperties> {
         use arrow::datatypes::{DataType, Field, Schema};
         let schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
@@ -314,14 +272,20 @@ mod tests {
         let _ = eqp.reorder([PhysicalSortExpr::new_default(
             datafusion::physical_plan::expressions::col("a", &schema)?,
         )]);
-        let original_props = PlanProperties::new(
+        Ok(PlanProperties::new(
             eqp,
             Partitioning::RoundRobinBatch(3),
             EmissionType::Incremental,
             Boundedness::Bounded,
-        );
+        ))
+    }
+
+    #[test]
+    fn test_round_trip_ffi_plan_properties() -> Result<()> {
+        let original_props = create_test_props()?;
 
-        let local_props_ptr = FFI_PlanProperties::from(&original_props);
+        let mut local_props_ptr = FFI_PlanProperties::from(&original_props);
+        local_props_ptr.library_marker_id = crate::mock_foreign_marker_id;
 
         let foreign_props: PlanProperties = local_props_ptr.try_into()?;
 
@@ -329,4 +293,23 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_plan_properties_local_bypass() -> Result<()> {
+        let props = create_test_props()?;
+
+        let ffi_plan = FFI_PlanProperties::from(&props);
+
+        // Verify local libraries
+        let foreign_plan: PlanProperties = ffi_plan.try_into()?;
+        assert_eq!(format!("{foreign_plan:?}"), format!("{props:?}"));
+
+        // Verify different library markers still can produce identical properties
+        let mut ffi_plan = FFI_PlanProperties::from(&props);
+        ffi_plan.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_plan: PlanProperties = ffi_plan.try_into()?;
+        assert_eq!(format!("{foreign_plan:?}"), format!("{props:?}"));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/proto/logical_extension_codec.rs b/datafusion/ffi/src/proto/logical_extension_codec.rs
new file mode 100644
index 0000000000000..3781a40539ed1
--- /dev/null
+++ b/datafusion/ffi/src/proto/logical_extension_codec.rs
@@ -0,0 +1,723 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RSlice, RStr, RVec};
+use arrow::datatypes::SchemaRef;
+use datafusion_catalog::TableProvider;
+use datafusion_common::error::Result;
+use datafusion_common::{TableReference, not_impl_err};
+use datafusion_datasource::file_format::FileFormatFactory;
+use datafusion_execution::{TaskContext, TaskContextProvider};
+use datafusion_expr::{
+    AggregateUDF, AggregateUDFImpl, Extension, LogicalPlan, ScalarUDF, ScalarUDFImpl,
+    WindowUDF, WindowUDFImpl,
+};
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+};
+use tokio::runtime::Handle;
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::execution::FFI_TaskContextProvider;
+use crate::table_provider::FFI_TableProvider;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udf::FFI_ScalarUDF;
+use crate::udwf::FFI_WindowUDF;
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
+
+/// A stable struct for sharing [`LogicalExtensionCodec`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_LogicalExtensionCodec {
+    /// Decode bytes into a table provider.
+    try_decode_table_provider: unsafe extern "C" fn(
+        &Self,
+        buf: RSlice<u8>,
+        table_ref: RStr,
+        schema: WrappedSchema,
+    ) -> FFIResult<FFI_TableProvider>,
+
+    /// Encode a table provider into bytes.
+    try_encode_table_provider: unsafe extern "C" fn(
+        &Self,
+        table_ref: RStr,
+        node: FFI_TableProvider,
+    ) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined scalar function.
+    try_decode_udf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_ScalarUDF>,
+
+    /// Encode a user defined scalar function into bytes.
+    try_encode_udf:
+        unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined aggregate function.
+    try_decode_udaf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_AggregateUDF>,
+
+    /// Encode a user defined aggregate function into bytes.
+    try_encode_udaf:
+        unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined window function.
+    try_decode_udwf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_WindowUDF>,
+
+    /// Encode a user defined window function into bytes.
+    try_encode_udwf:
+        unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult<RVec<u8>>,
+
+    pub task_ctx_provider: FFI_TaskContextProvider,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Return the major DataFusion version number of this provider.
+    pub version: unsafe extern "C" fn() -> u64,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignLogicalExtensionCodec`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_LogicalExtensionCodec {}
+unsafe impl Sync for FFI_LogicalExtensionCodec {}
+
+struct LogicalExtensionCodecPrivateData {
+    codec: Arc<dyn LogicalExtensionCodec>,
+    runtime: Option<Handle>,
+}
+
+impl FFI_LogicalExtensionCodec {
+    fn inner(&self) -> &Arc<dyn LogicalExtensionCodec> {
+        let private_data = self.private_data as *const LogicalExtensionCodecPrivateData;
+        unsafe { &(*private_data).codec }
+    }
+
+    fn runtime(&self) -> &Option<Handle> {
+        let private_data = self.private_data as *const LogicalExtensionCodecPrivateData;
+        unsafe { &(*private_data).runtime }
+    }
+
+    fn task_ctx(&self) -> Result<Arc<TaskContext>> {
+        (&self.task_ctx_provider).try_into()
+    }
+}
+
+unsafe extern "C" fn try_decode_table_provider_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    buf: RSlice<u8>,
+    table_ref: RStr,
+    schema: WrappedSchema,
+) -> FFIResult<FFI_TableProvider> {
+    let ctx = rresult_return!(codec.task_ctx());
+    let runtime = codec.runtime().clone();
+    let codec_inner = codec.inner();
+    let table_ref = TableReference::from(table_ref.as_str());
+    let schema: SchemaRef = schema.into();
+
+    let table_provider = rresult_return!(codec_inner.try_decode_table_provider(
+        buf.as_ref(),
+        &table_ref,
+        schema,
+        ctx.as_ref()
+    ));
+
+    RResult::ROk(FFI_TableProvider::new_with_ffi_codec(
+        table_provider,
+        true,
+        runtime,
+        codec.clone(),
+    ))
+}
+
+unsafe extern "C" fn try_encode_table_provider_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    table_ref: RStr,
+    node: FFI_TableProvider,
+) -> FFIResult<RVec<u8>> {
+    let table_ref = TableReference::from(table_ref.as_str());
+    let table_provider: Arc<dyn TableProvider> = (&node).into();
+    let codec = codec.inner();
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_table_provider(
+        &table_ref,
+        table_provider,
+        &mut bytes
+    ));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_ScalarUDF> {
+    let codec = codec.inner();
+
+    let udf = rresult_return!(codec.try_decode_udf(name.as_str(), buf.as_ref()));
+    let udf = FFI_ScalarUDF::from(udf);
+
+    RResult::ROk(udf)
+}
+
+unsafe extern "C" fn try_encode_udf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    node: FFI_ScalarUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let node: Arc<dyn ScalarUDFImpl> = (&node).into();
+    let node = ScalarUDF::new_from_shared_impl(node);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udf(&node, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udaf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_AggregateUDF> {
+    let codec_inner = codec.inner();
+    let udaf = rresult_return!(codec_inner.try_decode_udaf(name.into(), buf.as_ref()));
+    let udaf = FFI_AggregateUDF::from(udaf);
+
+    RResult::ROk(udaf)
+}
+
+unsafe extern "C" fn try_encode_udaf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    node: FFI_AggregateUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let udaf: Arc<dyn AggregateUDFImpl> = (&node).into();
+    let udaf = AggregateUDF::new_from_shared_impl(udaf);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udaf(&udaf, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udwf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_WindowUDF> {
+    let codec = codec.inner();
+    let udwf = rresult_return!(codec.try_decode_udwf(name.into(), buf.as_ref()));
+    let udwf = FFI_WindowUDF::from(udwf);
+
+    RResult::ROk(udwf)
+}
+
+unsafe extern "C" fn try_encode_udwf_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+    node: FFI_WindowUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let udwf: Arc<dyn WindowUDFImpl> = (&node).into();
+    let udwf = WindowUDF::new_from_shared_impl(udwf);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udwf(&udwf, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_LogicalExtensionCodec) {
+    unsafe {
+        let private_data =
+            Box::from_raw(provider.private_data as *mut LogicalExtensionCodecPrivateData);
+        drop(private_data);
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(
+    codec: &FFI_LogicalExtensionCodec,
+) -> FFI_LogicalExtensionCodec {
+    let old_codec = Arc::clone(codec.inner());
+    let runtime = codec.runtime().clone();
+
+    FFI_LogicalExtensionCodec::new(old_codec, runtime, codec.task_ctx_provider.clone())
+}
+
+impl Drop for FFI_LogicalExtensionCodec {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl FFI_LogicalExtensionCodec {
+    /// Creates a new [`FFI_LogicalExtensionCodec`].
+    pub fn new(
+        codec: Arc<dyn LogicalExtensionCodec + Send>,
+        runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let private_data = Box::new(LogicalExtensionCodecPrivateData { codec, runtime });
+
+        Self {
+            try_decode_table_provider: try_decode_table_provider_fn_wrapper,
+            try_encode_table_provider: try_encode_table_provider_fn_wrapper,
+            try_decode_udf: try_decode_udf_fn_wrapper,
+            try_encode_udf: try_encode_udf_fn_wrapper,
+            try_decode_udaf: try_decode_udaf_fn_wrapper,
+            try_encode_udaf: try_encode_udaf_fn_wrapper,
+            try_decode_udwf: try_decode_udwf_fn_wrapper,
+            try_encode_udwf: try_encode_udwf_fn_wrapper,
+            task_ctx_provider,
+
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: crate::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+
+    pub fn new_default(task_ctx_provider: &Arc<dyn TaskContextProvider>) -> Self {
+        let task_ctx_provider = FFI_TaskContextProvider::from(task_ctx_provider);
+        let codec = Arc::new(DefaultLogicalExtensionCodec {});
+
+        Self::new(codec, None, task_ctx_provider)
+    }
+}
+
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_LogicalExtensionCodec to interact with the foreign table provider.
+#[derive(Debug)]
+pub struct ForeignLogicalExtensionCodec(pub FFI_LogicalExtensionCodec);
+
+unsafe impl Send for ForeignLogicalExtensionCodec {}
+unsafe impl Sync for ForeignLogicalExtensionCodec {}
+
+impl From<&FFI_LogicalExtensionCodec> for Arc<dyn LogicalExtensionCodec> {
+    fn from(provider: &FFI_LogicalExtensionCodec) -> Self {
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(provider.inner())
+        } else {
+            Arc::new(ForeignLogicalExtensionCodec(provider.clone()))
+        }
+    }
+}
+
+impl Clone for FFI_LogicalExtensionCodec {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl LogicalExtensionCodec for ForeignLogicalExtensionCodec {
+    fn try_decode(
+        &self,
+        _buf: &[u8],
+        _inputs: &[LogicalPlan],
+        _ctx: &TaskContext,
+    ) -> Result<Extension> {
+        not_impl_err!("FFI does not support decode of Extensions")
+    }
+
+    fn try_encode(&self, _node: &Extension, _buf: &mut Vec<u8>) -> Result<()> {
+        not_impl_err!("FFI does not support encode of Extensions")
+    }
+
+    fn try_decode_table_provider(
+        &self,
+        buf: &[u8],
+        table_ref: &TableReference,
+        schema: SchemaRef,
+        _ctx: &TaskContext,
+    ) -> Result<Arc<dyn TableProvider>> {
+        let table_ref = table_ref.to_string();
+        let schema: WrappedSchema = schema.into();
+
+        let ffi_table_provider = unsafe {
+            df_result!((self.0.try_decode_table_provider)(
+                &self.0,
+                buf.into(),
+                table_ref.as_str().into(),
+                schema
+            ))
+        }?;
+
+        Ok((&ffi_table_provider).into())
+    }
+
+    fn try_encode_table_provider(
+        &self,
+        table_ref: &TableReference,
+        node: Arc<dyn TableProvider>,
+        buf: &mut Vec<u8>,
+    ) -> Result<()> {
+        let table_ref = table_ref.to_string();
+        let node =
+            FFI_TableProvider::new_with_ffi_codec(node, true, None, self.0.clone());
+
+        let bytes = df_result!(unsafe {
+            (self.0.try_encode_table_provider)(&self.0, table_ref.as_str().into(), node)
+        })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+
+    fn try_decode_file_format(
+        &self,
+        _buf: &[u8],
+        _ctx: &TaskContext,
+    ) -> Result<Arc<dyn FileFormatFactory>> {
+        not_impl_err!("FFI does not support decode_file_format")
+    }
+
+    fn try_encode_file_format(
+        &self,
+        _buf: &mut Vec<u8>,
+        _node: Arc<dyn FileFormatFactory>,
+    ) -> Result<()> {
+        not_impl_err!("FFI does not support encode_file_format")
+    }
+
+    fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+        let udf = unsafe {
+            df_result!((self.0.try_decode_udf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udf: Arc<dyn ScalarUDFImpl> = (&udf).into();
+
+        Ok(Arc::new(ScalarUDF::new_from_shared_impl(udf)))
+    }
+
+    fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = FFI_ScalarUDF::from(Arc::new(node.clone()));
+        let bytes = df_result!(unsafe { (self.0.try_encode_udf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+
+    fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        let udaf = unsafe {
+            df_result!((self.0.try_decode_udaf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udaf: Arc<dyn AggregateUDFImpl> = (&udaf).into();
+
+        Ok(Arc::new(AggregateUDF::new_from_shared_impl(udaf)))
+    }
+
+    fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = Arc::new(node.clone());
+        let node = FFI_AggregateUDF::from(node);
+        let bytes = df_result!(unsafe { (self.0.try_encode_udaf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+
+    fn try_decode_udwf(&self, name: &str, buf: &[u8]) -> Result<Arc<WindowUDF>> {
+        let udwf = unsafe {
+            df_result!((self.0.try_decode_udwf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udwf: Arc<dyn WindowUDFImpl> = (&udwf).into();
+
+        Ok(Arc::new(WindowUDF::new_from_shared_impl(udwf)))
+    }
+
+    fn try_encode_udwf(&self, node: &WindowUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = Arc::new(node.clone());
+        let node = FFI_WindowUDF::from(node);
+        let bytes = df_result!(unsafe { (self.0.try_encode_udwf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::record_batch;
+    use arrow_schema::{DataType, Field, Schema, SchemaRef};
+    use datafusion_catalog::{MemTable, TableProvider};
+    use datafusion_common::{Result, TableReference, exec_err};
+    use datafusion_datasource::file_format::FileFormatFactory;
+    use datafusion_execution::TaskContext;
+    use datafusion_expr::ptr_eq::arc_ptr_eq;
+    use datafusion_expr::{AggregateUDF, Extension, LogicalPlan, ScalarUDF, WindowUDF};
+    use datafusion_functions::math::abs::AbsFunc;
+    use datafusion_functions_aggregate::sum::Sum;
+    use datafusion_functions_window::rank::{Rank, RankType};
+    use datafusion_proto::logical_plan::LogicalExtensionCodec;
+    use datafusion_proto::physical_plan::PhysicalExtensionCodec;
+
+    use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+    use crate::proto::physical_extension_codec::tests::TestExtensionCodec;
+
+    fn create_test_table() -> MemTable {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+        let rb = record_batch!(("a", Int32, [1, 2, 3]))
+            .expect("should be able to create a record batch");
+        MemTable::try_new(schema, vec![vec![rb]])
+            .expect("should be able to create an in memory table")
+    }
+
+    impl LogicalExtensionCodec for TestExtensionCodec {
+        fn try_decode(
+            &self,
+            _buf: &[u8],
+            _inputs: &[LogicalPlan],
+            _ctx: &TaskContext,
+        ) -> Result<Extension> {
+            unimplemented!()
+        }
+
+        fn try_encode(&self, _node: &Extension, _buf: &mut Vec<u8>) -> Result<()> {
+            unimplemented!()
+        }
+
+        fn try_decode_table_provider(
+            &self,
+            buf: &[u8],
+            _table_ref: &TableReference,
+            schema: SchemaRef,
+            _ctx: &TaskContext,
+        ) -> Result<Arc<dyn TableProvider>> {
+            if buf[0] != Self::MAGIC_NUMBER {
+                return exec_err!(
+                    "TestExtensionCodec input buffer does not start with magic number"
+                );
+            }
+
+            if schema != create_test_table().schema() {
+                return exec_err!("Incorrect test table schema");
+            }
+
+            if buf.len() != 2 || buf[1] != Self::MEMTABLE_SERIALIZED {
+                return exec_err!("TestExtensionCodec unable to decode table provider");
+            }
+
+            Ok(Arc::new(create_test_table()) as Arc<dyn TableProvider>)
+        }
+
+        fn try_encode_table_provider(
+            &self,
+            _table_ref: &TableReference,
+            node: Arc<dyn TableProvider>,
+            buf: &mut Vec<u8>,
+        ) -> Result<()> {
+            buf.push(Self::MAGIC_NUMBER);
+
+            if !node.as_any().is::<MemTable>() {
+                return exec_err!("TestExtensionCodec only expects MemTable");
+            };
+
+            if node.schema() != create_test_table().schema() {
+                return exec_err!("Unexpected schema for encoding.");
+            }
+
+            buf.push(Self::MEMTABLE_SERIALIZED);
+
+            Ok(())
+        }
+
+        fn try_decode_file_format(
+            &self,
+            _buf: &[u8],
+            _ctx: &TaskContext,
+        ) -> Result<Arc<dyn FileFormatFactory>> {
+            unimplemented!()
+        }
+
+        fn try_encode_file_format(
+            &self,
+            _buf: &mut Vec<u8>,
+            _node: Arc<dyn FileFormatFactory>,
+        ) -> Result<()> {
+            unimplemented!()
+        }
+
+        fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+            PhysicalExtensionCodec::try_decode_udf(self, name, buf)
+        }
+
+        fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+            PhysicalExtensionCodec::try_encode_udf(self, node, buf)
+        }
+
+        fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+            PhysicalExtensionCodec::try_decode_udaf(self, name, buf)
+        }
+
+        fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+            PhysicalExtensionCodec::try_encode_udaf(self, node, buf)
+        }
+
+        fn try_decode_udwf(&self, name: &str, buf: &[u8]) -> Result<Arc<WindowUDF>> {
+            PhysicalExtensionCodec::try_decode_udwf(self, name, buf)
+        }
+
+        fn try_encode_udwf(&self, node: &WindowUDF, buf: &mut Vec<u8>) -> Result<()> {
+            PhysicalExtensionCodec::try_encode_udwf(self, node, buf)
+        }
+    }
+
+    #[test]
+    fn roundtrip_ffi_logical_extension_codec_table_provider() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_LogicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+
+        let table = Arc::new(create_test_table()) as Arc<dyn TableProvider>;
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_table_provider(&"my_table".into(), table, &mut bytes)?;
+
+        let returned_table = foreign_codec.try_decode_table_provider(
+            &bytes,
+            &"my_table".into(),
+            create_test_table().schema(),
+            ctx.task_ctx().as_ref(),
+        )?;
+
+        assert!(returned_table.as_any().is::<MemTable>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_logical_extension_codec_udf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_LogicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(ScalarUDF::from(AbsFunc::new()));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<AbsFunc>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_logical_extension_codec_udaf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_LogicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(AggregateUDF::from(Sum::new()));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udaf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udaf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<Sum>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_logical_extension_codec_udwf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_LogicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(WindowUDF::from(Rank::new(
+            "my_rank".to_owned(),
+            RankType::Basic,
+        )));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udwf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udwf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<Rank>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_logical_extension_codec_local_bypass() {
+        let codec =
+            Arc::new(TestExtensionCodec {}) as Arc<dyn LogicalExtensionCodec + Send>;
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_LogicalExtensionCodec::new(Arc::clone(&codec), None, task_ctx_provider);
+
+        let codec = codec as Arc<dyn LogicalExtensionCodec>;
+        // Verify local libraries can be downcast to their original
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+        assert!(arc_ptr_eq(&foreign_codec, &codec));
+
+        // Verify different library markers generate foreign providers
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn LogicalExtensionCodec> = (&ffi_codec).into();
+        assert!(!arc_ptr_eq(&foreign_codec, &codec));
+    }
+}
diff --git a/datafusion/ffi/src/proto/mod.rs b/datafusion/ffi/src/proto/mod.rs
new file mode 100644
index 0000000000000..ae76027ecb64e
--- /dev/null
+++ b/datafusion/ffi/src/proto/mod.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod logical_extension_codec;
+pub mod physical_extension_codec;
diff --git a/datafusion/ffi/src/proto/physical_extension_codec.rs b/datafusion/ffi/src/proto/physical_extension_codec.rs
new file mode 100644
index 0000000000000..0577e72366478
--- /dev/null
+++ b/datafusion/ffi/src/proto/physical_extension_codec.rs
@@ -0,0 +1,677 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RSlice, RStr, RVec};
+use datafusion_common::error::Result;
+use datafusion_execution::TaskContext;
+use datafusion_expr::{
+    AggregateUDF, AggregateUDFImpl, ScalarUDF, ScalarUDFImpl, WindowUDF, WindowUDFImpl,
+};
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::PhysicalExtensionCodec;
+use tokio::runtime::Handle;
+
+use crate::execution::FFI_TaskContextProvider;
+use crate::execution_plan::FFI_ExecutionPlan;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udf::FFI_ScalarUDF;
+use crate::udwf::FFI_WindowUDF;
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
+
+/// A stable struct for sharing [`PhysicalExtensionCodec`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct FFI_PhysicalExtensionCodec {
+    /// Decode bytes into an execution plan.
+    try_decode: unsafe extern "C" fn(
+        &Self,
+        buf: RSlice<u8>,
+        inputs: RVec<FFI_ExecutionPlan>,
+    ) -> FFIResult<FFI_ExecutionPlan>,
+
+    /// Encode an execution plan into bytes.
+    try_encode:
+        unsafe extern "C" fn(&Self, node: FFI_ExecutionPlan) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined scalar function.
+    try_decode_udf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_ScalarUDF>,
+
+    /// Encode a user defined scalar function into bytes.
+    try_encode_udf:
+        unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined aggregate function.
+    try_decode_udaf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_AggregateUDF>,
+
+    /// Encode a user defined aggregate function into bytes.
+    try_encode_udaf:
+        unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult<RVec<u8>>,
+
+    /// Decode bytes into a user defined window function.
+    try_decode_udwf: unsafe extern "C" fn(
+        &Self,
+        name: RStr,
+        buf: RSlice<u8>,
+    ) -> FFIResult<FFI_WindowUDF>,
+
+    /// Encode a user defined window function into bytes.
+    try_encode_udwf:
+        unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult<RVec<u8>>,
+
+    /// Access the current [`TaskContext`].
+    task_ctx_provider: FFI_TaskContextProvider,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Return the major DataFusion version number of this provider.
+    pub version: unsafe extern "C" fn() -> u64,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignPhysicalExtensionCodec`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_PhysicalExtensionCodec {}
+unsafe impl Sync for FFI_PhysicalExtensionCodec {}
+
+struct PhysicalExtensionCodecPrivateData {
+    provider: Arc<dyn PhysicalExtensionCodec>,
+    runtime: Option<Handle>,
+}
+
+impl FFI_PhysicalExtensionCodec {
+    fn inner(&self) -> &Arc<dyn PhysicalExtensionCodec> {
+        let private_data = self.private_data as *const PhysicalExtensionCodecPrivateData;
+        unsafe { &(*private_data).provider }
+    }
+
+    fn runtime(&self) -> &Option<Handle> {
+        let private_data = self.private_data as *const PhysicalExtensionCodecPrivateData;
+        unsafe { &(*private_data).runtime }
+    }
+}
+
+unsafe extern "C" fn try_decode_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    buf: RSlice<u8>,
+    inputs: RVec<FFI_ExecutionPlan>,
+) -> FFIResult<FFI_ExecutionPlan> {
+    let task_ctx: Arc<TaskContext> =
+        rresult_return!((&codec.task_ctx_provider).try_into());
+    let codec = codec.inner();
+    let inputs = inputs
+        .into_iter()
+        .map(|plan| <Arc<dyn ExecutionPlan>>::try_from(&plan))
+        .collect::<Result<Vec<_>>>();
+    let inputs = rresult_return!(inputs);
+
+    let plan =
+        rresult_return!(codec.try_decode(buf.as_ref(), &inputs, task_ctx.as_ref()));
+
+    RResult::ROk(FFI_ExecutionPlan::new(plan, None))
+}
+
+unsafe extern "C" fn try_encode_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    node: FFI_ExecutionPlan,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+
+    let plan: Arc<dyn ExecutionPlan> = rresult_return!((&node).try_into());
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode(plan, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_ScalarUDF> {
+    let codec = codec.inner();
+
+    let udf = rresult_return!(codec.try_decode_udf(name.as_str(), buf.as_ref()));
+    let udf = FFI_ScalarUDF::from(udf);
+
+    RResult::ROk(udf)
+}
+
+unsafe extern "C" fn try_encode_udf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    node: FFI_ScalarUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let node: Arc<dyn ScalarUDFImpl> = (&node).into();
+    let node = ScalarUDF::new_from_shared_impl(node);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udf(&node, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udaf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_AggregateUDF> {
+    let codec_inner = codec.inner();
+    let udaf = rresult_return!(codec_inner.try_decode_udaf(name.into(), buf.as_ref()));
+    let udaf = FFI_AggregateUDF::from(udaf);
+
+    RResult::ROk(udaf)
+}
+
+unsafe extern "C" fn try_encode_udaf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    node: FFI_AggregateUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let udaf: Arc<dyn AggregateUDFImpl> = (&node).into();
+    let udaf = AggregateUDF::new_from_shared_impl(udaf);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udaf(&udaf, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn try_decode_udwf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    name: RStr,
+    buf: RSlice<u8>,
+) -> FFIResult<FFI_WindowUDF> {
+    let codec = codec.inner();
+    let udwf = rresult_return!(codec.try_decode_udwf(name.into(), buf.as_ref()));
+    let udwf = FFI_WindowUDF::from(udwf);
+
+    RResult::ROk(udwf)
+}
+
+unsafe extern "C" fn try_encode_udwf_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+    node: FFI_WindowUDF,
+) -> FFIResult<RVec<u8>> {
+    let codec = codec.inner();
+    let udwf: Arc<dyn WindowUDFImpl> = (&node).into();
+    let udwf = WindowUDF::new_from_shared_impl(udwf);
+
+    let mut bytes = Vec::new();
+    rresult_return!(codec.try_encode_udwf(&udwf, &mut bytes));
+
+    RResult::ROk(bytes.into())
+}
+
+unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_PhysicalExtensionCodec) {
+    unsafe {
+        let private_data = Box::from_raw(
+            provider.private_data as *mut PhysicalExtensionCodecPrivateData,
+        );
+        drop(private_data);
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(
+    codec: &FFI_PhysicalExtensionCodec,
+) -> FFI_PhysicalExtensionCodec {
+    let old_codec = Arc::clone(codec.inner());
+    let runtime = codec.runtime().clone();
+
+    FFI_PhysicalExtensionCodec::new(old_codec, runtime, codec.task_ctx_provider.clone())
+}
+
+impl Drop for FFI_PhysicalExtensionCodec {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl FFI_PhysicalExtensionCodec {
+    /// Creates a new [`FFI_PhysicalExtensionCodec`].
+    pub fn new(
+        provider: Arc<dyn PhysicalExtensionCodec + Send>,
+        runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let private_data =
+            Box::new(PhysicalExtensionCodecPrivateData { provider, runtime });
+
+        Self {
+            try_decode: try_decode_fn_wrapper,
+            try_encode: try_encode_fn_wrapper,
+            try_decode_udf: try_decode_udf_fn_wrapper,
+            try_encode_udf: try_encode_udf_fn_wrapper,
+            try_decode_udaf: try_decode_udaf_fn_wrapper,
+            try_encode_udaf: try_encode_udaf_fn_wrapper,
+            try_decode_udwf: try_decode_udwf_fn_wrapper,
+            try_encode_udwf: try_encode_udwf_fn_wrapper,
+            task_ctx_provider,
+
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: crate::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_PhysicalExtensionCodec to interact with the foreign table provider.
+#[derive(Debug)]
+pub struct ForeignPhysicalExtensionCodec(pub FFI_PhysicalExtensionCodec);
+
+unsafe impl Send for ForeignPhysicalExtensionCodec {}
+unsafe impl Sync for ForeignPhysicalExtensionCodec {}
+
+impl From<&FFI_PhysicalExtensionCodec> for Arc<dyn PhysicalExtensionCodec> {
+    fn from(provider: &FFI_PhysicalExtensionCodec) -> Self {
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(provider.inner())
+        } else {
+            Arc::new(ForeignPhysicalExtensionCodec(provider.clone()))
+        }
+    }
+}
+
+impl Clone for FFI_PhysicalExtensionCodec {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl PhysicalExtensionCodec for ForeignPhysicalExtensionCodec {
+    fn try_decode(
+        &self,
+        buf: &[u8],
+        inputs: &[Arc<dyn ExecutionPlan>],
+        _ctx: &TaskContext,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let inputs = inputs
+            .iter()
+            .map(|plan| FFI_ExecutionPlan::new(Arc::clone(plan), None))
+            .collect();
+
+        let plan =
+            df_result!(unsafe { (self.0.try_decode)(&self.0, buf.into(), inputs) })?;
+        let plan: Arc<dyn ExecutionPlan> = (&plan).try_into()?;
+
+        Ok(plan)
+    }
+
+    fn try_encode(&self, node: Arc<dyn ExecutionPlan>, buf: &mut Vec<u8>) -> Result<()> {
+        let plan = FFI_ExecutionPlan::new(node, None);
+        let bytes = df_result!(unsafe { (self.0.try_encode)(&self.0, plan) })?;
+
+        buf.extend(bytes);
+        Ok(())
+    }
+
+    fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+        let udf = unsafe {
+            df_result!((self.0.try_decode_udf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udf: Arc<dyn ScalarUDFImpl> = (&udf).into();
+
+        Ok(Arc::new(ScalarUDF::new_from_shared_impl(udf)))
+    }
+
+    fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = FFI_ScalarUDF::from(Arc::new(node.clone()));
+        let bytes = df_result!(unsafe { (self.0.try_encode_udf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+
+    fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        let udaf = unsafe {
+            df_result!((self.0.try_decode_udaf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udaf: Arc<dyn AggregateUDFImpl> = (&udaf).into();
+
+        Ok(Arc::new(AggregateUDF::new_from_shared_impl(udaf)))
+    }
+
+    fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = Arc::new(node.clone());
+        let node = FFI_AggregateUDF::from(node);
+        let bytes = df_result!(unsafe { (self.0.try_encode_udaf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+
+    fn try_decode_udwf(&self, name: &str, buf: &[u8]) -> Result<Arc<WindowUDF>> {
+        let udwf = unsafe {
+            df_result!((self.0.try_decode_udwf)(&self.0, name.into(), buf.into()))
+        }?;
+        let udwf: Arc<dyn WindowUDFImpl> = (&udwf).into();
+
+        Ok(Arc::new(WindowUDF::new_from_shared_impl(udwf)))
+    }
+
+    fn try_encode_udwf(&self, node: &WindowUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let node = Arc::new(node.clone());
+        let node = FFI_WindowUDF::from(node);
+        let bytes = df_result!(unsafe { (self.0.try_encode_udwf)(&self.0, node) })?;
+
+        buf.extend(bytes);
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use std::sync::Arc;
+
+    use arrow_schema::{DataType, Field, Schema};
+    use datafusion_common::{Result, exec_err};
+    use datafusion_execution::TaskContext;
+    use datafusion_expr::ptr_eq::arc_ptr_eq;
+    use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF, WindowUDFImpl};
+    use datafusion_functions::math::abs::AbsFunc;
+    use datafusion_functions_aggregate::sum::Sum;
+    use datafusion_functions_window::rank::{Rank, RankType};
+    use datafusion_physical_plan::ExecutionPlan;
+    use datafusion_proto::physical_plan::PhysicalExtensionCodec;
+
+    use crate::execution_plan::tests::EmptyExec;
+    use crate::proto::physical_extension_codec::FFI_PhysicalExtensionCodec;
+
+    #[derive(Debug)]
+    pub(crate) struct TestExtensionCodec;
+
+    impl TestExtensionCodec {
+        pub(crate) const MAGIC_NUMBER: u8 = 127;
+        pub(crate) const EMPTY_EXEC_SERIALIZED: u8 = 1;
+        pub(crate) const ABS_FUNC_SERIALIZED: u8 = 2;
+        pub(crate) const SUM_UDAF_SERIALIZED: u8 = 3;
+        pub(crate) const RANK_UDWF_SERIALIZED: u8 = 4;
+        pub(crate) const MEMTABLE_SERIALIZED: u8 = 5;
+    }
+
+    impl PhysicalExtensionCodec for TestExtensionCodec {
+        fn try_decode(
+            &self,
+            buf: &[u8],
+            _inputs: &[Arc<dyn ExecutionPlan>],
+            _ctx: &TaskContext,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            if buf[0] != Self::MAGIC_NUMBER {
+                return exec_err!(
+                    "TestExtensionCodec input buffer does not start with magic number"
+                );
+            }
+
+            if buf.len() != 2 || buf[1] != Self::EMPTY_EXEC_SERIALIZED {
+                return exec_err!("TestExtensionCodec unable to decode execution plan");
+            }
+
+            Ok(create_test_exec())
+        }
+
+        fn try_encode(
+            &self,
+            node: Arc<dyn ExecutionPlan>,
+            buf: &mut Vec<u8>,
+        ) -> Result<()> {
+            buf.push(Self::MAGIC_NUMBER);
+
+            let Some(_) = node.as_any().downcast_ref::<EmptyExec>() else {
+                return exec_err!("TestExtensionCodec only expects EmptyExec");
+            };
+
+            buf.push(Self::EMPTY_EXEC_SERIALIZED);
+
+            Ok(())
+        }
+
+        fn try_decode_udf(&self, _name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+            if buf[0] != Self::MAGIC_NUMBER {
+                return exec_err!(
+                    "TestExtensionCodec input buffer does not start with magic number"
+                );
+            }
+
+            if buf.len() != 2 || buf[1] != Self::ABS_FUNC_SERIALIZED {
+                return exec_err!("TestExtensionCodec unable to decode udf");
+            }
+
+            Ok(Arc::new(ScalarUDF::from(AbsFunc::new())))
+        }
+
+        fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+            buf.push(Self::MAGIC_NUMBER);
+
+            let udf = node.inner();
+            if !udf.as_any().is::<AbsFunc>() {
+                return exec_err!("TestExtensionCodec only expects Abs UDF");
+            };
+
+            buf.push(Self::ABS_FUNC_SERIALIZED);
+
+            Ok(())
+        }
+
+        fn try_decode_udaf(&self, _name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+            if buf[0] != Self::MAGIC_NUMBER {
+                return exec_err!(
+                    "TestExtensionCodec input buffer does not start with magic number"
+                );
+            }
+
+            if buf.len() != 2 || buf[1] != Self::SUM_UDAF_SERIALIZED {
+                return exec_err!("TestExtensionCodec unable to decode udaf");
+            }
+
+            Ok(Arc::new(AggregateUDF::from(Sum::new())))
+        }
+
+        fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+            buf.push(Self::MAGIC_NUMBER);
+
+            let udf = node.inner();
+            let Some(_udf) = udf.as_any().downcast_ref::<Sum>() else {
+                return exec_err!("TestExtensionCodec only expects Sum UDAF");
+            };
+
+            buf.push(Self::SUM_UDAF_SERIALIZED);
+
+            Ok(())
+        }
+
+        fn try_decode_udwf(&self, _name: &str, buf: &[u8]) -> Result<Arc<WindowUDF>> {
+            if buf[0] != Self::MAGIC_NUMBER {
+                return exec_err!(
+                    "TestExtensionCodec input buffer does not start with magic number"
+                );
+            }
+
+            if buf.len() != 2 || buf[1] != Self::RANK_UDWF_SERIALIZED {
+                return exec_err!("TestExtensionCodec unable to decode udwf");
+            }
+
+            Ok(Arc::new(WindowUDF::from(Rank::new(
+                "my_rank".to_owned(),
+                RankType::Basic,
+            ))))
+        }
+
+        fn try_encode_udwf(&self, node: &WindowUDF, buf: &mut Vec<u8>) -> Result<()> {
+            buf.push(Self::MAGIC_NUMBER);
+
+            let udf = node.inner();
+            let Some(udf) = udf.as_any().downcast_ref::<Rank>() else {
+                return exec_err!("TestExtensionCodec only expects Rank UDWF");
+            };
+
+            if udf.name() != "my_rank" {
+                return exec_err!("TestExtensionCodec only expects my_rank UDWF name");
+            }
+
+            buf.push(Self::RANK_UDWF_SERIALIZED);
+
+            Ok(())
+        }
+    }
+
+    fn create_test_exec() -> Arc<dyn ExecutionPlan> {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+        Arc::new(EmptyExec::new(schema)) as Arc<dyn ExecutionPlan>
+    }
+
+    #[test]
+    fn roundtrip_ffi_physical_extension_codec_exec_plan() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_PhysicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+
+        let exec = create_test_exec();
+        let input_execs = [create_test_exec()];
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode(Arc::clone(&exec), &mut bytes)?;
+
+        let returned_exec =
+            foreign_codec.try_decode(&bytes, &input_execs, ctx.task_ctx().as_ref())?;
+
+        assert!(returned_exec.as_any().is::<EmptyExec>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_physical_extension_codec_udf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_PhysicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(ScalarUDF::from(AbsFunc::new()));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<AbsFunc>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_physical_extension_codec_udaf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_PhysicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(AggregateUDF::from(Sum::new()));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udaf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udaf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<Sum>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn roundtrip_ffi_physical_extension_codec_udwf() -> Result<()> {
+        let codec = Arc::new(TestExtensionCodec {});
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_PhysicalExtensionCodec::new(codec, None, task_ctx_provider);
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+
+        let udf = Arc::new(WindowUDF::from(Rank::new(
+            "my_rank".to_owned(),
+            RankType::Basic,
+        )));
+        let mut bytes = Vec::new();
+        foreign_codec.try_encode_udwf(udf.as_ref(), &mut bytes)?;
+
+        let returned_udf = foreign_codec.try_decode_udwf(udf.name(), &bytes)?;
+
+        assert!(returned_udf.inner().as_any().is::<Rank>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn ffi_physical_extension_codec_local_bypass() {
+        let codec =
+            Arc::new(TestExtensionCodec {}) as Arc<dyn PhysicalExtensionCodec + Send>;
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+
+        let mut ffi_codec =
+            FFI_PhysicalExtensionCodec::new(Arc::clone(&codec), None, task_ctx_provider);
+
+        let codec = codec as Arc<dyn PhysicalExtensionCodec>;
+        // Verify local libraries can be downcast to their original
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+        assert!(arc_ptr_eq(&foreign_codec, &codec));
+
+        // Verify different library markers generate foreign providers
+        ffi_codec.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_codec: Arc<dyn PhysicalExtensionCodec> = (&ffi_codec).into();
+        assert!(!arc_ptr_eq(&foreign_codec, &codec));
+    }
+}
diff --git a/datafusion/ffi/src/record_batch_stream.rs b/datafusion/ffi/src/record_batch_stream.rs
index 1739235d17036..73bfb9ddc8e9d 100644
--- a/datafusion/ffi/src/record_batch_stream.rs
+++ b/datafusion/ffi/src/record_batch_stream.rs
@@ -15,45 +15,34 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, task::Poll};
-
-use abi_stable::{
-    std_types::{ROption, RResult, RString},
-    StableAbi,
-};
-use arrow::array::{Array, RecordBatch};
-use arrow::{
-    array::{make_array, StructArray},
-    ffi::{from_ffi, to_ffi},
-};
+use std::ffi::c_void;
+use std::task::Poll;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult};
+use arrow::array::{Array, RecordBatch, StructArray, make_array};
+use arrow::ffi::{from_ffi, to_ffi};
 use async_ffi::{ContextExt, FfiContext, FfiPoll};
-use datafusion::error::Result;
-use datafusion::{
-    error::DataFusionError,
-    execution::{RecordBatchStream, SendableRecordBatchStream},
-};
-use datafusion_common::{exec_datafusion_err, exec_err};
+use datafusion_common::{DataFusionError, Result, ffi_datafusion_err, ffi_err};
+use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 use futures::{Stream, TryStreamExt};
 use tokio::runtime::Handle;
 
-use crate::{
-    arrow_wrappers::{WrappedArray, WrappedSchema},
-    rresult,
-};
+use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
+use crate::rresult;
+use crate::util::FFIResult;
 
 /// A stable struct for sharing [`RecordBatchStream`] across FFI boundaries.
 /// We use the async-ffi crate for handling async calls across libraries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_RecordBatchStream {
     /// This mirrors the `poll_next` of [`RecordBatchStream`] but does so
     /// in a FFI safe manner.
-    pub poll_next:
-        unsafe extern "C" fn(
-            stream: &Self,
-            cx: &mut FfiContext,
-        ) -> FfiPoll<ROption<RResult<WrappedArray, RString>>>,
+    pub poll_next: unsafe extern "C" fn(
+        stream: &Self,
+        cx: &mut FfiContext,
+    ) -> FfiPoll<ROption<FFIResult<WrappedArray>>>,
 
     /// Return the schema of the record batch
     pub schema: unsafe extern "C" fn(stream: &Self) -> WrappedSchema,
@@ -95,21 +84,27 @@ impl FFI_RecordBatchStream {
 unsafe impl Send for FFI_RecordBatchStream {}
 
 unsafe extern "C" fn schema_fn_wrapper(stream: &FFI_RecordBatchStream) -> WrappedSchema {
-    let private_data = stream.private_data as *const RecordBatchStreamPrivateData;
-    let stream = &(*private_data).rbs;
+    unsafe {
+        let private_data = stream.private_data as *const RecordBatchStreamPrivateData;
+        let stream = &(*private_data).rbs;
 
-    (*stream).schema().into()
+        (*stream).schema().into()
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_RecordBatchStream) {
-    let private_data =
-        Box::from_raw(provider.private_data as *mut RecordBatchStreamPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!provider.private_data.is_null());
+        let private_data =
+            Box::from_raw(provider.private_data as *mut RecordBatchStreamPrivateData);
+        drop(private_data);
+        provider.private_data = std::ptr::null_mut();
+    }
 }
 
-fn record_batch_to_wrapped_array(
+pub(crate) fn record_batch_to_wrapped_array(
     record_batch: RecordBatch,
-) -> RResult<WrappedArray, RString> {
+) -> FFIResult<WrappedArray> {
     let struct_array = StructArray::from(record_batch);
     rresult!(
         to_ffi(&struct_array.to_data()).map(|(array, schema)| WrappedArray {
@@ -122,7 +117,7 @@ fn record_batch_to_wrapped_array(
 // probably want to use pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
 fn maybe_record_batch_to_wrapped_stream(
     record_batch: Option<Result<RecordBatch>>,
-) -> ROption<RResult<WrappedArray, RString>> {
+) -> ROption<FFIResult<WrappedArray>> {
     match record_batch {
         Some(Ok(record_batch)) => {
             ROption::RSome(record_batch_to_wrapped_array(record_batch))
@@ -135,19 +130,21 @@ fn maybe_record_batch_to_wrapped_stream(
 unsafe extern "C" fn poll_next_fn_wrapper(
     stream: &FFI_RecordBatchStream,
     cx: &mut FfiContext,
-) -> FfiPoll<ROption<RResult<WrappedArray, RString>>> {
-    let private_data = stream.private_data as *mut RecordBatchStreamPrivateData;
-    let stream = &mut (*private_data).rbs;
+) -> FfiPoll<ROption<FFIResult<WrappedArray>>> {
+    unsafe {
+        let private_data = stream.private_data as *mut RecordBatchStreamPrivateData;
+        let stream = &mut (*private_data).rbs;
 
-    let _guard = (*private_data).runtime.as_ref().map(|rt| rt.enter());
+        let _guard = (*private_data).runtime.as_ref().map(|rt| rt.enter());
 
-    let poll_result = cx.with_context(|std_cx| {
-        (*stream)
-            .try_poll_next_unpin(std_cx)
-            .map(maybe_record_batch_to_wrapped_stream)
-    });
+        let poll_result = cx.with_context(|std_cx| {
+            (*stream)
+                .try_poll_next_unpin(std_cx)
+                .map(maybe_record_batch_to_wrapped_stream)
+        });
 
-    poll_result.into()
+        poll_result.into()
+    }
 }
 
 impl RecordBatchStream for FFI_RecordBatchStream {
@@ -157,14 +154,14 @@ impl RecordBatchStream for FFI_RecordBatchStream {
     }
 }
 
-fn wrapped_array_to_record_batch(array: WrappedArray) -> Result<RecordBatch> {
+pub(crate) fn wrapped_array_to_record_batch(array: WrappedArray) -> Result<RecordBatch> {
     let array_data =
         unsafe { from_ffi(array.array, &array.schema.0).map_err(DataFusionError::from)? };
     let array = make_array(array_data);
     let struct_array = array
         .as_any()
         .downcast_ref::<StructArray>()
-        .ok_or_else(|| exec_datafusion_err!(
+        .ok_or_else(|| ffi_datafusion_err!(
         "Unexpected array type during record batch collection in FFI_RecordBatchStream - expected StructArray"
     ))?;
 
@@ -172,13 +169,13 @@ fn wrapped_array_to_record_batch(array: WrappedArray) -> Result<RecordBatch> {
 }
 
 fn maybe_wrapped_array_to_record_batch(
-    array: ROption<RResult<WrappedArray, RString>>,
+    array: ROption<FFIResult<WrappedArray>>,
 ) -> Option<Result<RecordBatch>> {
     match array {
         ROption::RSome(RResult::ROk(wrapped_array)) => {
             Some(wrapped_array_to_record_batch(wrapped_array))
         }
-        ROption::RSome(RResult::RErr(e)) => Some(exec_err!("FFI error: {e}")),
+        ROption::RSome(RResult::RErr(e)) => Some(ffi_err!("{e}")),
         ROption::RNone => None,
     }
 }
@@ -198,7 +195,7 @@ impl Stream for FFI_RecordBatchStream {
                 Poll::Ready(maybe_wrapped_array_to_record_batch(array))
             }
             FfiPoll::Pending => Poll::Pending,
-            FfiPoll::Panicked => Poll::Ready(Some(exec_err!(
+            FfiPoll::Panicked => Poll::Ready(Some(ffi_err!(
                 "Panic occurred during poll_next on FFI_RecordBatchStream"
             ))),
         }
@@ -216,13 +213,13 @@ mod tests {
     use std::sync::Arc;
 
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion::{
-        common::record_batch, error::Result, execution::SendableRecordBatchStream,
-        test_util::bounded_stream,
-    };
+    use datafusion::common::record_batch;
+    use datafusion::error::Result;
+    use datafusion::execution::SendableRecordBatchStream;
+    use datafusion::test_util::bounded_stream;
+    use futures::StreamExt;
 
     use super::FFI_RecordBatchStream;
-    use futures::StreamExt;
 
     #[tokio::test]
     async fn test_round_trip_record_batch_stream() -> Result<()> {
diff --git a/datafusion/ffi/src/schema_provider.rs b/datafusion/ffi/src/schema_provider.rs
index b5970d5881d6e..b8e44b134f87b 100644
--- a/datafusion/ffi/src/schema_provider.rs
+++ b/datafusion/ffi/src/schema_provider.rs
@@ -15,31 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, ffi::c_void, sync::Arc};
+use std::any::Any;
+use std::ffi::c_void;
+use std::sync::Arc;
 
-use abi_stable::{
-    std_types::{ROption, RResult, RString, RVec},
-    StableAbi,
-};
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RString, RVec};
 use async_ffi::{FfiFuture, FutureExt};
 use async_trait::async_trait;
-use datafusion::{
-    catalog::{SchemaProvider, TableProvider},
-    error::DataFusionError,
+use datafusion_catalog::{SchemaProvider, TableProvider};
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
 };
 use tokio::runtime::Handle;
 
-use crate::{
-    df_result, rresult_return,
-    table_provider::{FFI_TableProvider, ForeignTableProvider},
-};
-
-use datafusion::error::Result;
+use crate::execution::FFI_TaskContextProvider;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::table_provider::{FFI_TableProvider, ForeignTableProvider};
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
 
 /// A stable struct for sharing [`SchemaProvider`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_SchemaProvider {
     pub owner_name: ROption<RString>,
 
@@ -48,25 +47,26 @@ pub struct FFI_SchemaProvider {
     pub table: unsafe extern "C" fn(
         provider: &Self,
         name: RString,
-    ) -> FfiFuture<
-        RResult<ROption<FFI_TableProvider>, RString>,
-    >,
-
-    pub register_table:
-        unsafe extern "C" fn(
-            provider: &Self,
-            name: RString,
-            table: FFI_TableProvider,
-        ) -> RResult<ROption<FFI_TableProvider>, RString>,
-
-    pub deregister_table:
-        unsafe extern "C" fn(
-            provider: &Self,
-            name: RString,
-        ) -> RResult<ROption<FFI_TableProvider>, RString>,
+    )
+        -> FfiFuture<FFIResult<ROption<FFI_TableProvider>>>,
+
+    pub register_table: unsafe extern "C" fn(
+        provider: &Self,
+        name: RString,
+        table: FFI_TableProvider,
+    )
+        -> FFIResult<ROption<FFI_TableProvider>>,
+
+    pub deregister_table: unsafe extern "C" fn(
+        provider: &Self,
+        name: RString,
+    )
+        -> FFIResult<ROption<FFI_TableProvider>>,
 
     pub table_exist: unsafe extern "C" fn(provider: &Self, name: RString) -> bool,
 
+    pub logical_codec: FFI_LogicalExtensionCodec,
+
     /// Used to create a clone on the provider of the execution plan. This should
     /// only need to be called by the receiver of the plan.
     pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
@@ -80,6 +80,11 @@ pub struct FFI_SchemaProvider {
     /// Internal data. This is only to be accessed by the provider of the plan.
     /// A [`ForeignSchemaProvider`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_SchemaProvider {}
@@ -92,105 +97,135 @@ struct ProviderPrivateData {
 
 impl FFI_SchemaProvider {
     unsafe fn inner(&self) -> &Arc<dyn SchemaProvider + Send> {
-        let private_data = self.private_data as *const ProviderPrivateData;
-        &(*private_data).provider
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            &(*private_data).provider
+        }
     }
 
     unsafe fn runtime(&self) -> Option<Handle> {
-        let private_data = self.private_data as *const ProviderPrivateData;
-        (*private_data).runtime.clone()
+        unsafe {
+            let private_data = self.private_data as *const ProviderPrivateData;
+            (*private_data).runtime.clone()
+        }
     }
 }
 
 unsafe extern "C" fn table_names_fn_wrapper(
     provider: &FFI_SchemaProvider,
 ) -> RVec<RString> {
-    let provider = provider.inner();
+    unsafe {
+        let provider = provider.inner();
 
-    let table_names = provider.table_names();
-    table_names.into_iter().map(|s| s.into()).collect()
+        let table_names = provider.table_names();
+        table_names.into_iter().map(|s| s.into()).collect()
+    }
 }
 
 unsafe extern "C" fn table_fn_wrapper(
     provider: &FFI_SchemaProvider,
     name: RString,
-) -> FfiFuture<RResult<ROption<FFI_TableProvider>, RString>> {
-    let runtime = provider.runtime();
-    let provider = Arc::clone(provider.inner());
-
-    async move {
-        let table = rresult_return!(provider.table(name.as_str()).await)
-            .map(|t| FFI_TableProvider::new(t, true, runtime))
-            .into();
-
-        RResult::ROk(table)
+) -> FfiFuture<FFIResult<ROption<FFI_TableProvider>>> {
+    unsafe {
+        let runtime = provider.runtime();
+        let logical_codec = provider.logical_codec.clone();
+        let provider = Arc::clone(provider.inner());
+
+        async move {
+            let table = rresult_return!(provider.table(name.as_str()).await)
+                .map(|t| {
+                    FFI_TableProvider::new_with_ffi_codec(t, true, runtime, logical_codec)
+                })
+                .into();
+
+            RResult::ROk(table)
+        }
+        .into_ffi()
     }
-    .into_ffi()
 }
 
 unsafe extern "C" fn register_table_fn_wrapper(
     provider: &FFI_SchemaProvider,
     name: RString,
     table: FFI_TableProvider,
-) -> RResult<ROption<FFI_TableProvider>, RString> {
-    let runtime = provider.runtime();
-    let provider = provider.inner();
+) -> FFIResult<ROption<FFI_TableProvider>> {
+    unsafe {
+        let runtime = provider.runtime();
+        let logical_codec = provider.logical_codec.clone();
+        let provider = provider.inner();
 
-    let table = Arc::new(ForeignTableProvider(table));
+        let table = Arc::new(ForeignTableProvider(table));
 
-    let returned_table = rresult_return!(provider.register_table(name.into(), table))
-        .map(|t| FFI_TableProvider::new(t, true, runtime));
+        let returned_table = rresult_return!(provider.register_table(name.into(), table))
+            .map(|t| {
+                FFI_TableProvider::new_with_ffi_codec(t, true, runtime, logical_codec)
+            });
 
-    RResult::ROk(returned_table.into())
+        RResult::ROk(returned_table.into())
+    }
 }
 
 unsafe extern "C" fn deregister_table_fn_wrapper(
     provider: &FFI_SchemaProvider,
     name: RString,
-) -> RResult<ROption<FFI_TableProvider>, RString> {
-    let runtime = provider.runtime();
-    let provider = provider.inner();
-
-    let returned_table = rresult_return!(provider.deregister_table(name.as_str()))
-        .map(|t| FFI_TableProvider::new(t, true, runtime));
+) -> FFIResult<ROption<FFI_TableProvider>> {
+    unsafe {
+        let runtime = provider.runtime();
+        let logical_codec = provider.logical_codec.clone();
+        let provider = provider.inner();
+
+        let returned_table = rresult_return!(provider.deregister_table(name.as_str()))
+            .map(|t| {
+                FFI_TableProvider::new_with_ffi_codec(t, true, runtime, logical_codec)
+            });
 
-    RResult::ROk(returned_table.into())
+        RResult::ROk(returned_table.into())
+    }
 }
 
 unsafe extern "C" fn table_exist_fn_wrapper(
     provider: &FFI_SchemaProvider,
     name: RString,
 ) -> bool {
-    provider.inner().table_exist(name.as_str())
+    unsafe { provider.inner().table_exist(name.as_str()) }
 }
 
 unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_SchemaProvider) {
-    let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!provider.private_data.is_null());
+        let private_data =
+            Box::from_raw(provider.private_data as *mut ProviderPrivateData);
+        drop(private_data);
+        provider.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(
     provider: &FFI_SchemaProvider,
 ) -> FFI_SchemaProvider {
-    let old_private_data = provider.private_data as *const ProviderPrivateData;
-    let runtime = (*old_private_data).runtime.clone();
-
-    let private_data = Box::into_raw(Box::new(ProviderPrivateData {
-        provider: Arc::clone(&(*old_private_data).provider),
-        runtime,
-    })) as *mut c_void;
-
-    FFI_SchemaProvider {
-        owner_name: provider.owner_name.clone(),
-        table_names: table_names_fn_wrapper,
-        clone: clone_fn_wrapper,
-        release: release_fn_wrapper,
-        version: super::version,
-        private_data,
-        table: table_fn_wrapper,
-        register_table: register_table_fn_wrapper,
-        deregister_table: deregister_table_fn_wrapper,
-        table_exist: table_exist_fn_wrapper,
+    unsafe {
+        let old_private_data = provider.private_data as *const ProviderPrivateData;
+        let runtime = (*old_private_data).runtime.clone();
+
+        let private_data = Box::into_raw(Box::new(ProviderPrivateData {
+            provider: Arc::clone(&(*old_private_data).provider),
+            runtime,
+        })) as *mut c_void;
+
+        FFI_SchemaProvider {
+            owner_name: provider.owner_name.clone(),
+            table_names: table_names_fn_wrapper,
+            table: table_fn_wrapper,
+            register_table: register_table_fn_wrapper,
+            deregister_table: deregister_table_fn_wrapper,
+            table_exist: table_exist_fn_wrapper,
+            logical_codec: provider.logical_codec.clone(),
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data,
+            library_marker_id: crate::get_library_marker_id,
+        }
     }
 }
 
@@ -205,6 +240,24 @@ impl FFI_SchemaProvider {
     pub fn new(
         provider: Arc<dyn SchemaProvider + Send>,
         runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+        logical_codec: Option<Arc<dyn LogicalExtensionCodec>>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let logical_codec =
+            logical_codec.unwrap_or_else(|| Arc::new(DefaultLogicalExtensionCodec {}));
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            logical_codec,
+            runtime.clone(),
+            task_ctx_provider.clone(),
+        );
+        Self::new_with_ffi_codec(provider, runtime, logical_codec)
+    }
+
+    pub fn new_with_ffi_codec(
+        provider: Arc<dyn SchemaProvider + Send>,
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
     ) -> Self {
         let owner_name = provider.owner_name().map(|s| s.into()).into();
         let private_data = Box::new(ProviderPrivateData { provider, runtime });
@@ -212,14 +265,16 @@ impl FFI_SchemaProvider {
         Self {
             owner_name,
             table_names: table_names_fn_wrapper,
-            clone: clone_fn_wrapper,
-            release: release_fn_wrapper,
-            version: super::version,
-            private_data: Box::into_raw(private_data) as *mut c_void,
             table: table_fn_wrapper,
             register_table: register_table_fn_wrapper,
             deregister_table: deregister_table_fn_wrapper,
             table_exist: table_exist_fn_wrapper,
+            logical_codec,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -234,9 +289,14 @@ pub struct ForeignSchemaProvider(pub FFI_SchemaProvider);
 unsafe impl Send for ForeignSchemaProvider {}
 unsafe impl Sync for ForeignSchemaProvider {}
 
-impl From<&FFI_SchemaProvider> for ForeignSchemaProvider {
+impl From<&FFI_SchemaProvider> for Arc<dyn SchemaProvider + Send> {
     fn from(provider: &FFI_SchemaProvider) -> Self {
-        Self(provider.clone())
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            return Arc::clone(unsafe { provider.inner() });
+        }
+
+        Arc::new(ForeignSchemaProvider(provider.clone()))
+            as Arc<dyn SchemaProvider + Send>
     }
 }
 
@@ -274,9 +334,7 @@ impl SchemaProvider for ForeignSchemaProvider {
             let table: Option<FFI_TableProvider> =
                 df_result!((self.0.table)(&self.0, name.into()).await)?.into();
 
-            let table = table.as_ref().map(|t| {
-                Arc::new(ForeignTableProvider::from(t)) as Arc<dyn TableProvider>
-            });
+            let table = table.as_ref().map(<Arc<dyn TableProvider>>::from);
 
             Ok(table)
         }
@@ -290,7 +348,12 @@ impl SchemaProvider for ForeignSchemaProvider {
         unsafe {
             let ffi_table = match table.as_any().downcast_ref::<ForeignTableProvider>() {
                 Some(t) => t.0.clone(),
-                None => FFI_TableProvider::new(table, true, None),
+                None => FFI_TableProvider::new_with_ffi_codec(
+                    table,
+                    true,
+                    None,
+                    self.0.logical_codec.clone(),
+                ),
             };
 
             let returned_provider: Option<FFI_TableProvider> =
@@ -320,7 +383,8 @@ impl SchemaProvider for ForeignSchemaProvider {
 #[cfg(test)]
 mod tests {
     use arrow::datatypes::Schema;
-    use datafusion::{catalog::MemorySchemaProvider, datasource::empty::EmptyTable};
+    use datafusion::catalog::MemorySchemaProvider;
+    use datafusion::datasource::empty::EmptyTable;
 
     use super::*;
 
@@ -331,15 +395,21 @@ mod tests {
     #[tokio::test]
     async fn test_round_trip_ffi_schema_provider() {
         let schema_provider = Arc::new(MemorySchemaProvider::new());
-        assert!(schema_provider
-            .as_ref()
-            .register_table("prior_table".to_string(), empty_table())
-            .unwrap()
-            .is_none());
+        assert!(
+            schema_provider
+                .as_ref()
+                .register_table("prior_table".to_string(), empty_table())
+                .unwrap()
+                .is_none()
+        );
 
-        let ffi_schema_provider = FFI_SchemaProvider::new(schema_provider, None);
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
 
-        let foreign_schema_provider: ForeignSchemaProvider =
+        let mut ffi_schema_provider =
+            FFI_SchemaProvider::new(schema_provider, None, task_ctx_provider, None);
+        ffi_schema_provider.library_marker_id = crate::mock_foreign_marker_id;
+
+        let foreign_schema_provider: Arc<dyn SchemaProvider + Send> =
             (&ffi_schema_provider).into();
 
         let prior_table_names = foreign_schema_provider.table_names();
@@ -382,4 +452,32 @@ mod tests {
         assert!(returned_schema.is_some());
         assert!(foreign_schema_provider.table_exist("second_table"));
     }
+
+    #[test]
+    fn test_ffi_schema_provider_local_bypass() {
+        let schema_provider = Arc::new(MemorySchemaProvider::new());
+
+        let (_ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+        let mut ffi_schema =
+            FFI_SchemaProvider::new(schema_provider, None, task_ctx_provider, None);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_schema: Arc<dyn SchemaProvider + Send> = (&ffi_schema).into();
+        assert!(
+            foreign_schema
+                .as_any()
+                .downcast_ref::<MemorySchemaProvider>()
+                .is_some()
+        );
+
+        // Verify different library markers generate foreign providers
+        ffi_schema.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_schema: Arc<dyn SchemaProvider + Send> = (&ffi_schema).into();
+        assert!(
+            foreign_schema
+                .as_any()
+                .downcast_ref::<ForeignSchemaProvider>()
+                .is_some()
+        );
+    }
 }
diff --git a/datafusion/ffi/src/session_config.rs b/datafusion/ffi/src/session/config.rs
similarity index 62%
rename from datafusion/ffi/src/session_config.rs
rename to datafusion/ffi/src/session/config.rs
index a07b66c601962..eb9c4e2c6986a 100644
--- a/datafusion/ffi/src/session_config.rs
+++ b/datafusion/ffi/src/session/config.rs
@@ -15,17 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use abi_stable::{
-    std_types::{RHashMap, RString},
-    StableAbi,
-};
-use datafusion::{config::ConfigOptions, error::Result};
-use datafusion::{error::DataFusionError, prelude::SessionConfig};
-use std::sync::Arc;
-use std::{
-    collections::HashMap,
-    ffi::{c_char, c_void, CString},
-};
+use std::collections::HashMap;
+use std::ffi::c_void;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RHashMap, RString};
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_execution::config::SessionConfig;
 
 /// A stable struct for sharing [`SessionConfig`] across FFI boundaries.
 /// Instead of attempting to expose the entire SessionConfig interface, we
@@ -40,7 +36,6 @@ use std::{
 /// value over this version.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_SessionConfig {
     /// Return a hash map from key to value of the config options represented
     /// by string values.
@@ -54,18 +49,28 @@ pub struct FFI_SessionConfig {
     pub release: unsafe extern "C" fn(arg: &mut Self),
 
     /// Internal data. This is only to be accessed by the provider of the plan.
-    /// A [`ForeignSessionConfig`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_SessionConfig {}
 unsafe impl Sync for FFI_SessionConfig {}
 
+impl FFI_SessionConfig {
+    fn inner(&self) -> &SessionConfig {
+        let private_data = self.private_data as *mut SessionConfigPrivateData;
+        unsafe { &(*private_data).config }
+    }
+}
+
 unsafe extern "C" fn config_options_fn_wrapper(
     config: &FFI_SessionConfig,
 ) -> RHashMap<RString, RString> {
-    let private_data = config.private_data as *mut SessionConfigPrivateData;
-    let config_options = &(*private_data).config;
+    let config_options = config.inner().options();
 
     let mut options = RHashMap::default();
     for config_entry in config_options.entries() {
@@ -78,47 +83,40 @@ unsafe extern "C" fn config_options_fn_wrapper(
 }
 
 unsafe extern "C" fn release_fn_wrapper(config: &mut FFI_SessionConfig) {
-    let private_data =
-        Box::from_raw(config.private_data as *mut SessionConfigPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!config.private_data.is_null());
+        let private_data =
+            Box::from_raw(config.private_data as *mut SessionConfigPrivateData);
+        drop(private_data);
+        config.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(config: &FFI_SessionConfig) -> FFI_SessionConfig {
-    let old_private_data = config.private_data as *mut SessionConfigPrivateData;
-    let old_config = Arc::clone(&(*old_private_data).config);
+    unsafe {
+        let old_private_data = config.private_data as *mut SessionConfigPrivateData;
+        let old_config = (*old_private_data).config.clone();
 
-    let private_data = Box::new(SessionConfigPrivateData { config: old_config });
+        let private_data = Box::new(SessionConfigPrivateData { config: old_config });
 
-    FFI_SessionConfig {
-        config_options: config_options_fn_wrapper,
-        private_data: Box::into_raw(private_data) as *mut c_void,
-        clone: clone_fn_wrapper,
-        release: release_fn_wrapper,
+        FFI_SessionConfig {
+            config_options: config_options_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            library_marker_id: crate::get_library_marker_id,
+        }
     }
 }
 
 struct SessionConfigPrivateData {
-    pub config: Arc<ConfigOptions>,
+    pub config: SessionConfig,
 }
 
 impl From<&SessionConfig> for FFI_SessionConfig {
     fn from(session: &SessionConfig) -> Self {
-        let mut config_keys = Vec::new();
-        let mut config_values = Vec::new();
-        for config_entry in session.options().entries() {
-            if let Some(value) = config_entry.value {
-                let key_cstr = CString::new(config_entry.key).unwrap_or_default();
-                let key_ptr = key_cstr.into_raw() as *const c_char;
-                config_keys.push(key_ptr);
-
-                config_values
-                    .push(CString::new(value).unwrap_or_default().into_raw()
-                        as *const c_char);
-            }
-        }
-
         let private_data = Box::new(SessionConfigPrivateData {
-            config: Arc::clone(session.options()),
+            config: session.clone(),
         });
 
         Self {
@@ -126,6 +124,7 @@ impl From<&SessionConfig> for FFI_SessionConfig {
             private_data: Box::into_raw(private_data) as *mut c_void,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -142,16 +141,14 @@ impl Drop for FFI_SessionConfig {
     }
 }
 
-/// A wrapper struct for accessing [`SessionConfig`] across a FFI boundary.
-/// The [`SessionConfig`] will be generated from a hash map of the config
-/// options in the provider and will be reconstructed on this side of the
-/// interface.s
-pub struct ForeignSessionConfig(pub SessionConfig);
-
-impl TryFrom<&FFI_SessionConfig> for ForeignSessionConfig {
+impl TryFrom<&FFI_SessionConfig> for SessionConfig {
     type Error = DataFusionError;
 
     fn try_from(config: &FFI_SessionConfig) -> Result<Self, Self::Error> {
+        if (config.library_marker_id)() == crate::get_library_marker_id() {
+            return Ok(config.inner().clone());
+        }
+
         let config_options = unsafe { (config.config_options)(config) };
 
         let mut options_map = HashMap::new();
@@ -159,7 +156,7 @@ impl TryFrom<&FFI_SessionConfig> for ForeignSessionConfig {
             options_map.insert(kv_pair.0.to_string(), kv_pair.1.to_string());
         });
 
-        Ok(Self(SessionConfig::from_string_hash_map(&options_map)?))
+        SessionConfig::from_string_hash_map(&options_map)
     }
 }
 
@@ -172,13 +169,15 @@ mod tests {
         let session_config = SessionConfig::new();
         let original_options = session_config.options().entries();
 
-        let ffi_config: FFI_SessionConfig = (&session_config).into();
+        let mut ffi_config: FFI_SessionConfig = (&session_config).into();
+        let _ = ffi_config.clone();
+        ffi_config.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_config: ForeignSessionConfig = (&ffi_config).try_into()?;
+        let foreign_config: SessionConfig = (&ffi_config).try_into()?;
 
-        let returned_options = foreign_config.0.options().entries();
+        let returned_options = foreign_config.options().entries();
 
-        assert!(original_options.len() == returned_options.len());
+        assert_eq!(original_options.len(), returned_options.len());
 
         Ok(())
     }
diff --git a/datafusion/ffi/src/session/mod.rs b/datafusion/ffi/src/session/mod.rs
new file mode 100644
index 0000000000000..aa910abb9149a
--- /dev/null
+++ b/datafusion/ffi/src/session/mod.rs
@@ -0,0 +1,621 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RHashMap, RResult, RStr, RString, RVec};
+use arrow_schema::SchemaRef;
+use arrow_schema::ffi::FFI_ArrowSchema;
+use async_ffi::{FfiFuture, FutureExt};
+use async_trait::async_trait;
+use datafusion_common::config::{ConfigOptions, TableOptions};
+use datafusion_common::{DFSchema, DataFusionError};
+use datafusion_execution::TaskContext;
+use datafusion_execution::config::SessionConfig;
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::{
+    AggregateUDF, AggregateUDFImpl, Expr, LogicalPlan, ScalarUDF, ScalarUDFImpl,
+    WindowUDF, WindowUDFImpl,
+};
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_proto::bytes::{logical_plan_from_bytes, logical_plan_to_bytes};
+use datafusion_proto::logical_plan::LogicalExtensionCodec;
+use datafusion_proto::logical_plan::from_proto::parse_expr;
+use datafusion_proto::logical_plan::to_proto::serialize_expr;
+use datafusion_proto::protobuf::LogicalExprNode;
+use datafusion_session::Session;
+use prost::Message;
+use tokio::runtime::Handle;
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::execution::FFI_TaskContext;
+use crate::execution_plan::FFI_ExecutionPlan;
+use crate::physical_expr::FFI_PhysicalExpr;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::session::config::FFI_SessionConfig;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udf::FFI_ScalarUDF;
+use crate::udwf::FFI_WindowUDF;
+use crate::util::FFIResult;
+use crate::{df_result, rresult, rresult_return};
+
+pub mod config;
+
+/// A stable struct for sharing [`Session`] across FFI boundaries.
+///
+/// Care must be taken when using this struct. Unlike most of the structs in
+/// this crate, the private data for [`FFI_SessionRef`] contains borrowed data.
+/// The lifetime of the borrow is lost when hidden within the ``*mut c_void``
+/// of the private data. For this reason, it is the user's responsibility to
+/// ensure the lifetime of the [`Session`] remains valid.
+///
+/// The reason for storing `&dyn Session` is because the primary motivation
+/// for implementing this struct is [`crate::table_provider::FFI_TableProvider`]
+/// which has methods that require `&dyn Session`. For usage within this crate
+/// we know the [`Session`] lifetimes are valid.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub(crate) struct FFI_SessionRef {
+    session_id: unsafe extern "C" fn(&Self) -> RStr,
+
+    config: unsafe extern "C" fn(&Self) -> FFI_SessionConfig,
+
+    create_physical_plan: unsafe extern "C" fn(
+        &Self,
+        logical_plan_serialized: RVec<u8>,
+    )
+        -> FfiFuture<FFIResult<FFI_ExecutionPlan>>,
+
+    create_physical_expr: unsafe extern "C" fn(
+        &Self,
+        expr_serialized: RVec<u8>,
+        schema: WrappedSchema,
+    ) -> FFIResult<FFI_PhysicalExpr>,
+
+    scalar_functions: unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_ScalarUDF>,
+
+    aggregate_functions:
+        unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_AggregateUDF>,
+
+    window_functions: unsafe extern "C" fn(&Self) -> RHashMap<RString, FFI_WindowUDF>,
+
+    table_options: unsafe extern "C" fn(&Self) -> RHashMap<RString, RString>,
+
+    default_table_options: unsafe extern "C" fn(&Self) -> RHashMap<RString, RString>,
+
+    task_ctx: unsafe extern "C" fn(&Self) -> FFI_TaskContext,
+
+    logical_codec: FFI_LogicalExtensionCodec,
+
+    /// Used to create a clone on the provider of the registry. This should
+    /// only need to be called by the receiver of the plan.
+    clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Return the major DataFusion version number of this registry.
+    pub version: unsafe extern "C" fn() -> u64,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignSession`] should never attempt to access this data.
+    private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
+
+unsafe impl Send for FFI_SessionRef {}
+unsafe impl Sync for FFI_SessionRef {}
+
+struct SessionPrivateData<'a> {
+    session: &'a (dyn Session + Send + Sync),
+    runtime: Option<Handle>,
+}
+
+impl FFI_SessionRef {
+    fn inner(&self) -> &(dyn Session + Send + Sync) {
+        let private_data = self.private_data as *const SessionPrivateData;
+        unsafe { (*private_data).session }
+    }
+
+    unsafe fn runtime(&self) -> &Option<Handle> {
+        unsafe {
+            let private_data = self.private_data as *const SessionPrivateData;
+            &(*private_data).runtime
+        }
+    }
+}
+
+unsafe extern "C" fn session_id_fn_wrapper(session: &FFI_SessionRef) -> RStr<'_> {
+    let session = session.inner();
+    session.session_id().into()
+}
+
+unsafe extern "C" fn config_fn_wrapper(session: &FFI_SessionRef) -> FFI_SessionConfig {
+    let session = session.inner();
+    session.config().into()
+}
+
+unsafe extern "C" fn create_physical_plan_fn_wrapper(
+    session: &FFI_SessionRef,
+    logical_plan_serialized: RVec<u8>,
+) -> FfiFuture<FFIResult<FFI_ExecutionPlan>> {
+    unsafe {
+        let runtime = session.runtime().clone();
+        let session = session.clone();
+        async move {
+            let session = session.inner();
+            let task_ctx = session.task_ctx();
+
+            let logical_plan = rresult_return!(logical_plan_from_bytes(
+                logical_plan_serialized.as_slice(),
+                task_ctx.as_ref(),
+            ));
+
+            let physical_plan = session.create_physical_plan(&logical_plan).await;
+
+            rresult!(physical_plan.map(|plan| FFI_ExecutionPlan::new(plan, runtime)))
+        }
+        .into_ffi()
+    }
+}
+
+unsafe extern "C" fn create_physical_expr_fn_wrapper(
+    session: &FFI_SessionRef,
+    expr_serialized: RVec<u8>,
+    schema: WrappedSchema,
+) -> FFIResult<FFI_PhysicalExpr> {
+    let codec: Arc<dyn LogicalExtensionCodec> = (&session.logical_codec).into();
+    let session = session.inner();
+
+    let logical_expr = LogicalExprNode::decode(expr_serialized.as_slice()).unwrap();
+    let logical_expr =
+        parse_expr(&logical_expr, session.task_ctx().as_ref(), codec.as_ref()).unwrap();
+    let schema: SchemaRef = schema.into();
+    let schema: DFSchema = rresult_return!(schema.try_into());
+
+    let physical_expr =
+        rresult_return!(session.create_physical_expr(logical_expr, &schema));
+
+    RResult::ROk(physical_expr.into())
+}
+
+unsafe extern "C" fn scalar_functions_fn_wrapper(
+    session: &FFI_SessionRef,
+) -> RHashMap<RString, FFI_ScalarUDF> {
+    let session = session.inner();
+    session
+        .scalar_functions()
+        .iter()
+        .map(|(name, udf)| (name.clone().into(), FFI_ScalarUDF::from(Arc::clone(udf))))
+        .collect()
+}
+
+unsafe extern "C" fn aggregate_functions_fn_wrapper(
+    session: &FFI_SessionRef,
+) -> RHashMap<RString, FFI_AggregateUDF> {
+    let session = session.inner();
+    session
+        .aggregate_functions()
+        .iter()
+        .map(|(name, udaf)| {
+            (
+                name.clone().into(),
+                FFI_AggregateUDF::from(Arc::clone(udaf)),
+            )
+        })
+        .collect()
+}
+
+unsafe extern "C" fn window_functions_fn_wrapper(
+    session: &FFI_SessionRef,
+) -> RHashMap<RString, FFI_WindowUDF> {
+    let session = session.inner();
+    session
+        .window_functions()
+        .iter()
+        .map(|(name, udwf)| (name.clone().into(), FFI_WindowUDF::from(Arc::clone(udwf))))
+        .collect()
+}
+
+fn table_options_to_rhash(options: &TableOptions) -> RHashMap<RString, RString> {
+    options
+        .entries()
+        .into_iter()
+        .filter_map(|entry| entry.value.map(|v| (entry.key.into(), v.into())))
+        .collect()
+}
+
+unsafe extern "C" fn table_options_fn_wrapper(
+    session: &FFI_SessionRef,
+) -> RHashMap<RString, RString> {
+    let session = session.inner();
+    let table_options = session.table_options();
+    table_options_to_rhash(table_options)
+}
+
+unsafe extern "C" fn default_table_options_fn_wrapper(
+    session: &FFI_SessionRef,
+) -> RHashMap<RString, RString> {
+    let session = session.inner();
+    let table_options = session.default_table_options();
+
+    table_options_to_rhash(&table_options)
+}
+
+unsafe extern "C" fn task_ctx_fn_wrapper(session: &FFI_SessionRef) -> FFI_TaskContext {
+    session.inner().task_ctx().into()
+}
+
+unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_SessionRef) {
+    unsafe {
+        let private_data =
+            Box::from_raw(provider.private_data as *mut SessionPrivateData);
+        drop(private_data);
+    }
+}
+
+unsafe extern "C" fn clone_fn_wrapper(provider: &FFI_SessionRef) -> FFI_SessionRef {
+    unsafe {
+        let old_private_data = provider.private_data as *const SessionPrivateData;
+
+        let private_data = Box::into_raw(Box::new(SessionPrivateData {
+            session: (*old_private_data).session,
+            runtime: (*old_private_data).runtime.clone(),
+        })) as *mut c_void;
+
+        FFI_SessionRef {
+            session_id: session_id_fn_wrapper,
+            config: config_fn_wrapper,
+            create_physical_plan: create_physical_plan_fn_wrapper,
+            create_physical_expr: create_physical_expr_fn_wrapper,
+            scalar_functions: scalar_functions_fn_wrapper,
+            aggregate_functions: aggregate_functions_fn_wrapper,
+            window_functions: window_functions_fn_wrapper,
+            table_options: table_options_fn_wrapper,
+            default_table_options: default_table_options_fn_wrapper,
+            task_ctx: task_ctx_fn_wrapper,
+            logical_codec: provider.logical_codec.clone(),
+
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+impl Drop for FFI_SessionRef {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl FFI_SessionRef {
+    /// Creates a new [`FFI_SessionRef`].
+    pub fn new(
+        session: &(dyn Session + Send + Sync),
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
+    ) -> Self {
+        let private_data = Box::new(SessionPrivateData { session, runtime });
+
+        Self {
+            session_id: session_id_fn_wrapper,
+            config: config_fn_wrapper,
+            create_physical_plan: create_physical_plan_fn_wrapper,
+            create_physical_expr: create_physical_expr_fn_wrapper,
+            scalar_functions: scalar_functions_fn_wrapper,
+            aggregate_functions: aggregate_functions_fn_wrapper,
+            window_functions: window_functions_fn_wrapper,
+            table_options: table_options_fn_wrapper,
+            default_table_options: default_table_options_fn_wrapper,
+            task_ctx: task_ctx_fn_wrapper,
+            logical_codec,
+
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            version: super::version,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
+    }
+}
+
+/// This wrapper struct exists on the receiver side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_Session to interact with the foreign table provider.
+#[derive(Debug)]
+pub struct ForeignSession {
+    session: FFI_SessionRef,
+    config: SessionConfig,
+    scalar_functions: HashMap<String, Arc<ScalarUDF>>,
+    aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
+    window_functions: HashMap<String, Arc<WindowUDF>>,
+    table_options: TableOptions,
+    runtime_env: Arc<RuntimeEnv>,
+    props: ExecutionProps,
+}
+
+unsafe impl Send for ForeignSession {}
+unsafe impl Sync for ForeignSession {}
+
+impl FFI_SessionRef {
+    pub fn as_local(&self) -> Option<&(dyn Session + Send + Sync)> {
+        if (self.library_marker_id)() == crate::get_library_marker_id() {
+            return Some(self.inner());
+        }
+        None
+    }
+}
+
+impl TryFrom<&FFI_SessionRef> for ForeignSession {
+    type Error = DataFusionError;
+    fn try_from(session: &FFI_SessionRef) -> Result<Self, Self::Error> {
+        unsafe {
+            let table_options =
+                table_options_from_rhashmap((session.table_options)(session));
+
+            let config = (session.config)(session);
+            let config = SessionConfig::try_from(&config)?;
+
+            let scalar_functions = (session.scalar_functions)(session)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udf = <Arc<dyn ScalarUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(ScalarUDF::new_from_shared_impl(udf)),
+                    )
+                })
+                .collect();
+            let aggregate_functions = (session.aggregate_functions)(session)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udaf = <Arc<dyn AggregateUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(AggregateUDF::new_from_shared_impl(udaf)),
+                    )
+                })
+                .collect();
+            let window_functions = (session.window_functions)(session)
+                .into_iter()
+                .map(|kv_pair| {
+                    let udwf = <Arc<dyn WindowUDFImpl>>::from(&kv_pair.1);
+
+                    (
+                        kv_pair.0.into_string(),
+                        Arc::new(WindowUDF::new_from_shared_impl(udwf)),
+                    )
+                })
+                .collect();
+
+            Ok(Self {
+                session: session.clone(),
+                config,
+                table_options,
+                scalar_functions,
+                aggregate_functions,
+                window_functions,
+                runtime_env: Default::default(),
+                props: Default::default(),
+            })
+        }
+    }
+}
+
+impl Clone for FFI_SessionRef {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+fn table_options_from_rhashmap(options: RHashMap<RString, RString>) -> TableOptions {
+    let options = options
+        .into_iter()
+        .map(|kv_pair| (kv_pair.0.into_string(), kv_pair.1.into_string()))
+        .collect();
+
+    TableOptions::from_string_hash_map(&options).unwrap_or_else(|err| {
+        log::warn!("Error parsing default table options: {err}");
+        TableOptions::default()
+    })
+}
+
+#[async_trait]
+impl Session for ForeignSession {
+    fn session_id(&self) -> &str {
+        unsafe { (self.session.session_id)(&self.session).as_str() }
+    }
+
+    fn config(&self) -> &SessionConfig {
+        &self.config
+    }
+
+    fn config_options(&self) -> &ConfigOptions {
+        self.config.options()
+    }
+
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        unsafe {
+            let logical_plan = logical_plan_to_bytes(logical_plan)?;
+            let physical_plan = df_result!(
+                (self.session.create_physical_plan)(
+                    &self.session,
+                    logical_plan.as_ref().into()
+                )
+                .await
+            )?;
+            let physical_plan = <Arc<dyn ExecutionPlan>>::try_from(&physical_plan)?;
+
+            Ok(physical_plan)
+        }
+    }
+
+    fn create_physical_expr(
+        &self,
+        expr: Expr,
+        df_schema: &DFSchema,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        unsafe {
+            let codec: Arc<dyn LogicalExtensionCodec> =
+                (&self.session.logical_codec).into();
+            let logical_expr = serialize_expr(&expr, codec.as_ref())?.encode_to_vec();
+            let schema = WrappedSchema(FFI_ArrowSchema::try_from(df_schema.as_arrow())?);
+
+            let physical_expr = df_result!((self.session.create_physical_expr)(
+                &self.session,
+                logical_expr.into(),
+                schema
+            ))?;
+
+            Ok((&physical_expr).into())
+        }
+    }
+
+    fn scalar_functions(&self) -> &HashMap<String, Arc<ScalarUDF>> {
+        &self.scalar_functions
+    }
+
+    fn aggregate_functions(&self) -> &HashMap<String, Arc<AggregateUDF>> {
+        &self.aggregate_functions
+    }
+
+    fn window_functions(&self) -> &HashMap<String, Arc<WindowUDF>> {
+        &self.window_functions
+    }
+
+    fn runtime_env(&self) -> &Arc<RuntimeEnv> {
+        &self.runtime_env
+    }
+
+    fn execution_props(&self) -> &ExecutionProps {
+        &self.props
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn table_options(&self) -> &TableOptions {
+        &self.table_options
+    }
+
+    fn default_table_options(&self) -> TableOptions {
+        unsafe {
+            table_options_from_rhashmap((self.session.default_table_options)(
+                &self.session,
+            ))
+        }
+    }
+
+    fn table_options_mut(&mut self) -> &mut TableOptions {
+        log::warn!(
+            "Mutating table options is not supported via FFI. Changes will not have an effect."
+        );
+        &mut self.table_options
+    }
+
+    fn task_ctx(&self) -> Arc<TaskContext> {
+        unsafe { (self.session.task_ctx)(&self.session).into() }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow_schema::{DataType, Field, Schema};
+    use datafusion_common::DataFusionError;
+    use datafusion_expr::col;
+    use datafusion_expr::registry::FunctionRegistry;
+    use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_ffi_session() -> Result<(), DataFusionError> {
+        let (ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx();
+        let state = ctx.state();
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            Arc::new(DefaultLogicalExtensionCodec {}),
+            None,
+            task_ctx_provider,
+        );
+
+        let local_session = FFI_SessionRef::new(&state, None, logical_codec);
+        let foreign_session = ForeignSession::try_from(&local_session)?;
+
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+        let df_schema = schema.try_into()?;
+        let physical_expr = foreign_session.create_physical_expr(col("a"), &df_schema)?;
+        assert_eq!(
+            format!("{physical_expr:?}"),
+            "Column { name: \"a\", index: 0 }"
+        );
+
+        assert_eq!(foreign_session.session_id(), state.session_id());
+
+        let logical_plan = LogicalPlan::default();
+        let physical_plan = foreign_session.create_physical_plan(&logical_plan).await?;
+        assert_eq!(
+            format!("{physical_plan:?}"),
+            "EmptyExec { schema: Schema { fields: [], metadata: {} }, partitions: 1, cache: PlanProperties { eq_properties: EquivalenceProperties { eq_group: EquivalenceGroup { map: {}, classes: [] }, oeq_class: OrderingEquivalenceClass { orderings: [] }, oeq_cache: OrderingEquivalenceCache { normal_cls: OrderingEquivalenceClass { orderings: [] }, leading_map: {} }, constraints: Constraints { inner: [] }, schema: Schema { fields: [], metadata: {} } }, partitioning: UnknownPartitioning(1), emission_type: Incremental, boundedness: Bounded, evaluation_type: Lazy, scheduling_type: Cooperative, output_ordering: None } }"
+        );
+
+        assert_eq!(
+            format!("{:?}", foreign_session.default_table_options()),
+            format!("{:?}", state.default_table_options())
+        );
+
+        assert_eq!(
+            format!("{:?}", foreign_session.table_options()),
+            format!("{:?}", state.table_options())
+        );
+
+        let local_udfs = state.udfs();
+        for udf in foreign_session.scalar_functions().keys() {
+            assert!(local_udfs.contains(udf));
+        }
+        let local_udafs = state.udafs();
+        for udaf in foreign_session.aggregate_functions().keys() {
+            assert!(local_udafs.contains(udaf));
+        }
+        let local_udwfs = state.udwfs();
+        for udwf in foreign_session.window_functions().keys() {
+            assert!(local_udwfs.contains(udwf));
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs
index 890511997a706..df8b648026d3e 100644
--- a/datafusion/ffi/src/table_provider.rs
+++ b/datafusion/ffi/src/table_provider.rs
@@ -15,46 +15,39 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, ffi::c_void, sync::Arc};
+use std::any::Any;
+use std::ffi::c_void;
+use std::sync::Arc;
 
-use abi_stable::{
-    std_types::{ROption, RResult, RString, RVec},
-    StableAbi,
-};
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RVec};
 use arrow::datatypes::SchemaRef;
 use async_ffi::{FfiFuture, FutureExt};
 use async_trait::async_trait;
-use datafusion::{
-    catalog::{Session, TableProvider},
-    datasource::TableType,
-    error::DataFusionError,
-    execution::{session_state::SessionStateBuilder, TaskContext},
-    logical_expr::{logical_plan::dml::InsertOp, TableProviderFilterPushDown},
-    physical_plan::ExecutionPlan,
-    prelude::{Expr, SessionContext},
-};
-use datafusion_proto::{
-    logical_plan::{
-        from_proto::parse_exprs, to_proto::serialize_exprs, DefaultLogicalExtensionCodec,
-    },
-    protobuf::LogicalExprList,
+use datafusion_catalog::{Session, TableProvider};
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_execution::TaskContext;
+use datafusion_expr::dml::InsertOp;
+use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_proto::logical_plan::from_proto::parse_exprs;
+use datafusion_proto::logical_plan::to_proto::serialize_exprs;
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
 };
+use datafusion_proto::protobuf::LogicalExprList;
 use prost::Message;
 use tokio::runtime::Handle;
 
-use crate::{
-    arrow_wrappers::WrappedSchema,
-    df_result, rresult_return,
-    session_config::ForeignSessionConfig,
-    table_source::{FFI_TableProviderFilterPushDown, FFI_TableType},
-};
-
-use super::{
-    execution_plan::{FFI_ExecutionPlan, ForeignExecutionPlan},
-    insert_op::FFI_InsertOp,
-    session_config::FFI_SessionConfig,
-};
-use datafusion::error::Result;
+use super::execution_plan::FFI_ExecutionPlan;
+use super::insert_op::FFI_InsertOp;
+use crate::arrow_wrappers::WrappedSchema;
+use crate::execution::FFI_TaskContextProvider;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::session::{FFI_SessionRef, ForeignSession};
+use crate::table_source::{FFI_TableProviderFilterPushDown, FFI_TableType};
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
 
 /// A stable struct for sharing [`TableProvider`] across FFI boundaries.
 ///
@@ -97,65 +90,69 @@ use datafusion::error::Result;
 /// side of the interface each object refers to.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_TableProvider {
     /// Return the table schema
-    pub schema: unsafe extern "C" fn(provider: &Self) -> WrappedSchema,
+    schema: unsafe extern "C" fn(provider: &Self) -> WrappedSchema,
 
     /// Perform a scan on the table. See [`TableProvider`] for detailed usage information.
     ///
     /// # Arguments
     ///
     /// * `provider` - the table provider
-    /// * `session_config` - session configuration
+    /// * `session` - session
     /// * `projections` - if specified, only a subset of the columns are returned
     /// * `filters_serialized` - filters to apply to the scan, which are a
     ///   [`LogicalExprList`] protobuf message serialized into bytes to pass
     ///   across the FFI boundary.
     /// * `limit` - if specified, limit the number of rows returned
-    pub scan: unsafe extern "C" fn(
+    scan: unsafe extern "C" fn(
         provider: &Self,
-        session_config: &FFI_SessionConfig,
+        session: FFI_SessionRef,
         projections: RVec<usize>,
         filters_serialized: RVec<u8>,
         limit: ROption<usize>,
-    ) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>>,
+    ) -> FfiFuture<FFIResult<FFI_ExecutionPlan>>,
 
     /// Return the type of table. See [`TableType`] for options.
-    pub table_type: unsafe extern "C" fn(provider: &Self) -> FFI_TableType,
+    table_type: unsafe extern "C" fn(provider: &Self) -> FFI_TableType,
 
     /// Based upon the input filters, identify which are supported. The filters
     /// are a [`LogicalExprList`] protobuf message serialized into bytes to pass
     /// across the FFI boundary.
-    pub supports_filters_pushdown: Option<
+    supports_filters_pushdown: Option<
         unsafe extern "C" fn(
             provider: &FFI_TableProvider,
             filters_serialized: RVec<u8>,
-        )
-            -> RResult<RVec<FFI_TableProviderFilterPushDown>, RString>,
+        ) -> FFIResult<RVec<FFI_TableProviderFilterPushDown>>,
     >,
 
-    pub insert_into:
-        unsafe extern "C" fn(
-            provider: &Self,
-            session_config: &FFI_SessionConfig,
-            input: &FFI_ExecutionPlan,
-            insert_op: FFI_InsertOp,
-        ) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>>,
+    insert_into: unsafe extern "C" fn(
+        provider: &Self,
+        session: FFI_SessionRef,
+        input: &FFI_ExecutionPlan,
+        insert_op: FFI_InsertOp,
+    ) -> FfiFuture<FFIResult<FFI_ExecutionPlan>>,
+
+    pub logical_codec: FFI_LogicalExtensionCodec,
 
     /// Used to create a clone on the provider of the execution plan. This should
     /// only need to be called by the receiver of the plan.
-    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+    clone: unsafe extern "C" fn(plan: &Self) -> Self,
 
     /// Release the memory of the private data when it is no longer being used.
-    pub release: unsafe extern "C" fn(arg: &mut Self),
+    release: unsafe extern "C" fn(arg: &mut Self),
 
     /// Return the major DataFusion version number of this provider.
     pub version: unsafe extern "C" fn() -> u64,
 
     /// Internal data. This is only to be accessed by the provider of the plan.
-    /// A [`ForeignExecutionPlan`] should never attempt to access this data.
-    pub private_data: *mut c_void,
+    /// A [`ForeignTableProvider`] should never attempt to access this data.
+    private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_TableProvider {}
@@ -166,36 +163,41 @@ struct ProviderPrivateData {
     runtime: Option<Handle>,
 }
 
-unsafe extern "C" fn schema_fn_wrapper(provider: &FFI_TableProvider) -> WrappedSchema {
-    let private_data = provider.private_data as *const ProviderPrivateData;
-    let provider = &(*private_data).provider;
+impl FFI_TableProvider {
+    fn inner(&self) -> &Arc<dyn TableProvider + Send> {
+        let private_data = self.private_data as *const ProviderPrivateData;
+        unsafe { &(*private_data).provider }
+    }
 
-    provider.schema().into()
+    fn runtime(&self) -> &Option<Handle> {
+        let private_data = self.private_data as *const ProviderPrivateData;
+        unsafe { &(*private_data).runtime }
+    }
+}
+
+unsafe extern "C" fn schema_fn_wrapper(provider: &FFI_TableProvider) -> WrappedSchema {
+    provider.inner().schema().into()
 }
 
 unsafe extern "C" fn table_type_fn_wrapper(
     provider: &FFI_TableProvider,
 ) -> FFI_TableType {
-    let private_data = provider.private_data as *const ProviderPrivateData;
-    let provider = &(*private_data).provider;
-
-    provider.table_type().into()
+    provider.inner().table_type().into()
 }
 
 fn supports_filters_pushdown_internal(
     provider: &Arc<dyn TableProvider + Send>,
     filters_serialized: &[u8],
+    task_ctx: &Arc<TaskContext>,
+    codec: &dyn LogicalExtensionCodec,
 ) -> Result<RVec<FFI_TableProviderFilterPushDown>> {
-    let default_ctx = SessionContext::new();
-    let codec = DefaultLogicalExtensionCodec {};
-
     let filters = match filters_serialized.is_empty() {
         true => vec![],
         false => {
             let proto_filters = LogicalExprList::decode(filters_serialized)
                 .map_err(|e| DataFusionError::Plan(e.to_string()))?;
 
-            parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec)?
+            parse_exprs(proto_filters.expr.iter(), task_ctx.as_ref(), codec)?
         }
     };
     let filters_borrowed: Vec<&Expr> = filters.iter().collect();
@@ -212,48 +214,57 @@ fn supports_filters_pushdown_internal(
 unsafe extern "C" fn supports_filters_pushdown_fn_wrapper(
     provider: &FFI_TableProvider,
     filters_serialized: RVec<u8>,
-) -> RResult<RVec<FFI_TableProviderFilterPushDown>, RString> {
-    let private_data = provider.private_data as *const ProviderPrivateData;
-    let provider = &(*private_data).provider;
-
-    supports_filters_pushdown_internal(provider, &filters_serialized)
-        .map_err(|e| e.to_string().into())
-        .into()
+) -> FFIResult<RVec<FFI_TableProviderFilterPushDown>> {
+    let logical_codec: Arc<dyn LogicalExtensionCodec> = (&provider.logical_codec).into();
+    let task_ctx = rresult_return!(<Arc<TaskContext>>::try_from(
+        &provider.logical_codec.task_ctx_provider
+    ));
+    supports_filters_pushdown_internal(
+        provider.inner(),
+        &filters_serialized,
+        &task_ctx,
+        logical_codec.as_ref(),
+    )
+    .map_err(|e| e.to_string().into())
+    .into()
 }
 
 unsafe extern "C" fn scan_fn_wrapper(
     provider: &FFI_TableProvider,
-    session_config: &FFI_SessionConfig,
+    session: FFI_SessionRef,
     projections: RVec<usize>,
     filters_serialized: RVec<u8>,
     limit: ROption<usize>,
-) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>> {
-    let private_data = provider.private_data as *mut ProviderPrivateData;
-    let internal_provider = &(*private_data).provider;
-    let session_config = session_config.clone();
-    let runtime = &(*private_data).runtime;
+) -> FfiFuture<FFIResult<FFI_ExecutionPlan>> {
+    let task_ctx: Result<Arc<TaskContext>, DataFusionError> =
+        (&provider.logical_codec.task_ctx_provider).try_into();
+    let runtime = provider.runtime().clone();
+    let logical_codec: Arc<dyn LogicalExtensionCodec> = (&provider.logical_codec).into();
+    let internal_provider = Arc::clone(provider.inner());
 
     async move {
-        let config = rresult_return!(ForeignSessionConfig::try_from(&session_config));
-        let session = SessionStateBuilder::new()
-            .with_default_features()
-            .with_config(config.0)
-            .build();
-        let ctx = SessionContext::new_with_state(session);
+        let mut foreign_session = None;
+        let session = rresult_return!(
+            session
+                .as_local()
+                .map(Ok::<&(dyn Session + Send + Sync), DataFusionError>)
+                .unwrap_or_else(|| {
+                    foreign_session = Some(ForeignSession::try_from(&session)?);
+                    Ok(foreign_session.as_ref().unwrap())
+                })
+        );
 
+        let task_ctx = rresult_return!(task_ctx);
         let filters = match filters_serialized.is_empty() {
             true => vec![],
             false => {
-                let default_ctx = SessionContext::new();
-                let codec = DefaultLogicalExtensionCodec {};
-
                 let proto_filters =
                     rresult_return!(LogicalExprList::decode(filters_serialized.as_ref()));
 
                 rresult_return!(parse_exprs(
                     proto_filters.expr.iter(),
-                    &default_ctx,
-                    &codec
+                    task_ctx.as_ref(),
+                    logical_codec.as_ref(),
                 ))
             }
         };
@@ -262,69 +273,68 @@ unsafe extern "C" fn scan_fn_wrapper(
 
         let plan = rresult_return!(
             internal_provider
-                .scan(&ctx.state(), Some(&projections), &filters, limit.into())
+                .scan(session, Some(&projections), &filters, limit.into())
                 .await
         );
 
-        RResult::ROk(FFI_ExecutionPlan::new(
-            plan,
-            ctx.task_ctx(),
-            runtime.clone(),
-        ))
+        RResult::ROk(FFI_ExecutionPlan::new(plan, runtime.clone()))
     }
     .into_ffi()
 }
 
 unsafe extern "C" fn insert_into_fn_wrapper(
     provider: &FFI_TableProvider,
-    session_config: &FFI_SessionConfig,
+    session: FFI_SessionRef,
     input: &FFI_ExecutionPlan,
     insert_op: FFI_InsertOp,
-) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>> {
-    let private_data = provider.private_data as *mut ProviderPrivateData;
-    let internal_provider = &(*private_data).provider;
-    let session_config = session_config.clone();
+) -> FfiFuture<FFIResult<FFI_ExecutionPlan>> {
+    let runtime = provider.runtime().clone();
+    let internal_provider = Arc::clone(provider.inner());
     let input = input.clone();
-    let runtime = &(*private_data).runtime;
 
     async move {
-        let config = rresult_return!(ForeignSessionConfig::try_from(&session_config));
-        let session = SessionStateBuilder::new()
-            .with_default_features()
-            .with_config(config.0)
-            .build();
-        let ctx = SessionContext::new_with_state(session);
+        let mut foreign_session = None;
+        let session = rresult_return!(
+            session
+                .as_local()
+                .map(Ok::<&(dyn Session + Send + Sync), DataFusionError>)
+                .unwrap_or_else(|| {
+                    foreign_session = Some(ForeignSession::try_from(&session)?);
+                    Ok(foreign_session.as_ref().unwrap())
+                })
+        );
 
-        let input = rresult_return!(ForeignExecutionPlan::try_from(&input).map(Arc::new));
+        let input = rresult_return!(<Arc<dyn ExecutionPlan>>::try_from(&input));
 
         let insert_op = InsertOp::from(insert_op);
 
         let plan = rresult_return!(
             internal_provider
-                .insert_into(&ctx.state(), input, insert_op)
+                .insert_into(session, input, insert_op)
                 .await
         );
 
-        RResult::ROk(FFI_ExecutionPlan::new(
-            plan,
-            ctx.task_ctx(),
-            runtime.clone(),
-        ))
+        RResult::ROk(FFI_ExecutionPlan::new(plan, runtime.clone()))
     }
     .into_ffi()
 }
 
 unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_TableProvider) {
-    let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!provider.private_data.is_null());
+        let private_data =
+            Box::from_raw(provider.private_data as *mut ProviderPrivateData);
+        drop(private_data);
+        provider.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(provider: &FFI_TableProvider) -> FFI_TableProvider {
-    let old_private_data = provider.private_data as *const ProviderPrivateData;
-    let runtime = (*old_private_data).runtime.clone();
+    let runtime = provider.runtime().clone();
+    let old_provider = Arc::clone(provider.inner());
 
     let private_data = Box::into_raw(Box::new(ProviderPrivateData {
-        provider: Arc::clone(&(*old_private_data).provider),
+        provider: old_provider,
         runtime,
     })) as *mut c_void;
 
@@ -334,10 +344,12 @@ unsafe extern "C" fn clone_fn_wrapper(provider: &FFI_TableProvider) -> FFI_Table
         table_type: table_type_fn_wrapper,
         supports_filters_pushdown: provider.supports_filters_pushdown,
         insert_into: provider.insert_into,
+        logical_codec: provider.logical_codec.clone(),
         clone: clone_fn_wrapper,
         release: release_fn_wrapper,
         version: super::version,
         private_data,
+        library_marker_id: crate::get_library_marker_id,
     }
 }
 
@@ -353,6 +365,30 @@ impl FFI_TableProvider {
         provider: Arc<dyn TableProvider + Send>,
         can_support_pushdown_filters: bool,
         runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+        logical_codec: Option<Arc<dyn LogicalExtensionCodec>>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let logical_codec =
+            logical_codec.unwrap_or_else(|| Arc::new(DefaultLogicalExtensionCodec {}));
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            logical_codec,
+            runtime.clone(),
+            task_ctx_provider.clone(),
+        );
+        Self::new_with_ffi_codec(
+            provider,
+            can_support_pushdown_filters,
+            runtime,
+            logical_codec,
+        )
+    }
+
+    pub fn new_with_ffi_codec(
+        provider: Arc<dyn TableProvider + Send>,
+        can_support_pushdown_filters: bool,
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
     ) -> Self {
         let private_data = Box::new(ProviderPrivateData { provider, runtime });
 
@@ -365,10 +401,12 @@ impl FFI_TableProvider {
                 false => None,
             },
             insert_into: insert_into_fn_wrapper,
+            logical_codec,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             version: super::version,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -383,9 +421,13 @@ pub struct ForeignTableProvider(pub FFI_TableProvider);
 unsafe impl Send for ForeignTableProvider {}
 unsafe impl Sync for ForeignTableProvider {}
 
-impl From<&FFI_TableProvider> for ForeignTableProvider {
+impl From<&FFI_TableProvider> for Arc<dyn TableProvider> {
     fn from(provider: &FFI_TableProvider) -> Self {
-        Self(provider.clone())
+        if (provider.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(provider.inner()) as Arc<dyn TableProvider>
+        } else {
+            Arc::new(ForeignTableProvider(provider.clone()))
+        }
     }
 }
 
@@ -417,31 +459,31 @@ impl TableProvider for ForeignTableProvider {
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let session_config: FFI_SessionConfig = session.config().into();
+        let session = FFI_SessionRef::new(session, None, self.0.logical_codec.clone());
 
         let projections: Option<RVec<usize>> =
             projection.map(|p| p.iter().map(|v| v.to_owned()).collect());
 
-        let codec = DefaultLogicalExtensionCodec {};
+        let codec: Arc<dyn LogicalExtensionCodec> = (&self.0.logical_codec).into();
         let filter_list = LogicalExprList {
-            expr: serialize_exprs(filters, &codec)?,
+            expr: serialize_exprs(filters, codec.as_ref())?,
         };
         let filters_serialized = filter_list.encode_to_vec().into();
 
         let plan = unsafe {
             let maybe_plan = (self.0.scan)(
                 &self.0,
-                &session_config,
+                session,
                 projections.unwrap_or_default(),
                 filters_serialized,
                 limit.into(),
             )
             .await;
 
-            ForeignExecutionPlan::try_from(&df_result!(maybe_plan)?)?
+            <Arc<dyn ExecutionPlan>>::try_from(&df_result!(maybe_plan)?)?
         };
 
-        Ok(Arc::new(plan))
+        Ok(plan)
     }
 
     /// Tests whether the table provider can make use of a filter expression
@@ -457,14 +499,17 @@ impl TableProvider for ForeignTableProvider {
                     return Ok(vec![
                         TableProviderFilterPushDown::Unsupported;
                         filters.len()
-                    ])
+                    ]);
                 }
             };
 
-            let codec = DefaultLogicalExtensionCodec {};
+            let codec: Arc<dyn LogicalExtensionCodec> = (&self.0.logical_codec).into();
 
             let expr_list = LogicalExprList {
-                expr: serialize_exprs(filters.iter().map(|f| f.to_owned()), &codec)?,
+                expr: serialize_exprs(
+                    filters.iter().map(|f| f.to_owned()),
+                    codec.as_ref(),
+                )?,
             };
             let serialized_filters = expr_list.encode_to_vec();
 
@@ -480,37 +525,36 @@ impl TableProvider for ForeignTableProvider {
         input: Arc<dyn ExecutionPlan>,
         insert_op: InsertOp,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let session_config: FFI_SessionConfig = session.config().into();
+        let session = FFI_SessionRef::new(session, None, self.0.logical_codec.clone());
 
         let rc = Handle::try_current().ok();
-        let input =
-            FFI_ExecutionPlan::new(input, Arc::new(TaskContext::from(session)), rc);
+        let input = FFI_ExecutionPlan::new(input, rc);
         let insert_op: FFI_InsertOp = insert_op.into();
 
         let plan = unsafe {
             let maybe_plan =
-                (self.0.insert_into)(&self.0, &session_config, &input, insert_op).await;
+                (self.0.insert_into)(&self.0, session, &input, insert_op).await;
 
-            ForeignExecutionPlan::try_from(&df_result!(maybe_plan)?)?
+            <Arc<dyn ExecutionPlan>>::try_from(&df_result!(maybe_plan)?)?
         };
 
-        Ok(Arc::new(plan))
+        Ok(plan)
     }
 }
 
 #[cfg(test)]
 mod tests {
     use arrow::datatypes::Schema;
-    use datafusion::prelude::{col, lit};
+    use datafusion::prelude::{SessionContext, col, lit};
+    use datafusion_execution::TaskContextProvider;
 
     use super::*;
 
-    #[tokio::test]
-    async fn test_round_trip_ffi_table_provider_scan() -> Result<()> {
+    fn create_test_table_provider() -> Result<Arc<dyn TableProvider>> {
         use arrow::datatypes::Field;
-        use datafusion::arrow::{
-            array::Float32Array, datatypes::DataType, record_batch::RecordBatch,
-        };
+        use datafusion::arrow::array::Float32Array;
+        use datafusion::arrow::datatypes::DataType;
+        use datafusion::arrow::record_batch::RecordBatch;
         use datafusion::datasource::MemTable;
 
         let schema =
@@ -526,16 +570,26 @@ mod tests {
             vec![Arc::new(Float32Array::from(vec![64.0]))],
         )?;
 
-        let ctx = SessionContext::new();
+        Ok(Arc::new(MemTable::try_new(
+            schema,
+            vec![vec![batch1], vec![batch2]],
+        )?))
+    }
 
-        let provider =
-            Arc::new(MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?);
+    #[tokio::test]
+    async fn test_round_trip_ffi_table_provider_scan() -> Result<()> {
+        let provider = create_test_table_provider()?;
+        let ctx = Arc::new(SessionContext::new());
+        let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
 
-        let ffi_provider = FFI_TableProvider::new(provider, true, None);
+        let mut ffi_provider =
+            FFI_TableProvider::new(provider, true, None, task_ctx_provider, None);
+        ffi_provider.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into();
+        let foreign_table_provider: Arc<dyn TableProvider> = (&ffi_provider).into();
 
-        ctx.register_table("t", Arc::new(foreign_table_provider))?;
+        ctx.register_table("t", foreign_table_provider)?;
 
         let df = ctx.table("t").await?;
 
@@ -549,35 +603,18 @@ mod tests {
 
     #[tokio::test]
     async fn test_round_trip_ffi_table_provider_insert_into() -> Result<()> {
-        use arrow::datatypes::Field;
-        use datafusion::arrow::{
-            array::Float32Array, datatypes::DataType, record_batch::RecordBatch,
-        };
-        use datafusion::datasource::MemTable;
-
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
-
-        // define data in two partitions
-        let batch1 = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
-        )?;
-        let batch2 = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(Float32Array::from(vec![64.0]))],
-        )?;
-
-        let ctx = SessionContext::new();
-
-        let provider =
-            Arc::new(MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?);
+        let provider = create_test_table_provider()?;
+        let ctx = Arc::new(SessionContext::new());
+        let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
 
-        let ffi_provider = FFI_TableProvider::new(provider, true, None);
+        let mut ffi_provider =
+            FFI_TableProvider::new(provider, true, None, task_ctx_provider, None);
+        ffi_provider.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into();
+        let foreign_table_provider: Arc<dyn TableProvider> = (&ffi_provider).into();
 
-        ctx.register_table("t", Arc::new(foreign_table_provider))?;
+        ctx.register_table("t", foreign_table_provider)?;
 
         let result = ctx
             .sql("INSERT INTO t VALUES (128.0);")
@@ -600,9 +637,9 @@ mod tests {
     #[tokio::test]
     async fn test_aggregation() -> Result<()> {
         use arrow::datatypes::Field;
-        use datafusion::arrow::{
-            array::Float32Array, datatypes::DataType, record_batch::RecordBatch,
-        };
+        use datafusion::arrow::array::Float32Array;
+        use datafusion::arrow::datatypes::DataType;
+        use datafusion::arrow::record_batch::RecordBatch;
         use datafusion::common::assert_batches_eq;
         use datafusion::datasource::MemTable;
 
@@ -615,15 +652,18 @@ mod tests {
             vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
         )?;
 
-        let ctx = SessionContext::new();
+        let ctx = Arc::new(SessionContext::new());
+        let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
 
         let provider = Arc::new(MemTable::try_new(schema, vec![vec![batch1]])?);
 
-        let ffi_provider = FFI_TableProvider::new(provider, true, None);
+        let ffi_provider =
+            FFI_TableProvider::new(provider, true, None, task_ctx_provider, None);
 
-        let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into();
+        let foreign_table_provider: Arc<dyn TableProvider> = (&ffi_provider).into();
 
-        ctx.register_table("t", Arc::new(foreign_table_provider))?;
+        ctx.register_table("t", foreign_table_provider)?;
 
         let result = ctx
             .sql("SELECT COUNT(*) as cnt FROM t")
@@ -641,4 +681,35 @@ mod tests {
         assert_batches_eq!(expected, &result);
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_table_provider_local_bypass() -> Result<()> {
+        let table_provider = create_test_table_provider()?;
+
+        let ctx = Arc::new(SessionContext::new()) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&ctx);
+        let mut ffi_table =
+            FFI_TableProvider::new(table_provider, false, None, task_ctx_provider, None);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_table: Arc<dyn TableProvider> = (&ffi_table).into();
+        assert!(
+            foreign_table
+                .as_any()
+                .downcast_ref::<datafusion::datasource::MemTable>()
+                .is_some()
+        );
+
+        // Verify different library markers generate foreign providers
+        ffi_table.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_table: Arc<dyn TableProvider> = (&ffi_table).into();
+        assert!(
+            foreign_table
+                .as_any()
+                .downcast_ref::<ForeignTableProvider>()
+                .is_some()
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/table_source.rs b/datafusion/ffi/src/table_source.rs
index 418fdf16a564f..2f17d9235a088 100644
--- a/datafusion/ffi/src/table_source.rs
+++ b/datafusion/ffi/src/table_source.rs
@@ -16,12 +16,11 @@
 // under the License.
 
 use abi_stable::StableAbi;
-use datafusion::{datasource::TableType, logical_expr::TableProviderFilterPushDown};
+use datafusion_expr::{TableProviderFilterPushDown, TableType};
 
 /// FFI safe version of [`TableProviderFilterPushDown`].
 #[repr(C)]
 #[derive(StableAbi)]
-#[allow(non_camel_case_types)]
 pub enum FFI_TableProviderFilterPushDown {
     Unsupported,
     Inexact,
@@ -58,7 +57,6 @@ impl From<&TableProviderFilterPushDown> for FFI_TableProviderFilterPushDown {
 
 /// FFI safe version of [`TableType`].
 #[repr(C)]
-#[allow(non_camel_case_types)]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, StableAbi)]
 pub enum FFI_TableType {
     Base,
@@ -88,9 +86,10 @@ impl From<TableType> for FFI_TableType {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use datafusion::error::Result;
 
+    use super::*;
+
     fn round_trip_filter_pushdown(pushdown: TableProviderFilterPushDown) -> Result<()> {
         let ffi_pushdown: FFI_TableProviderFilterPushDown = (&pushdown).into();
         let round_trip: TableProviderFilterPushDown = (&ffi_pushdown).into();
diff --git a/datafusion/ffi/src/tests/async_provider.rs b/datafusion/ffi/src/tests/async_provider.rs
index cef4161d8c1fc..6149736c58555 100644
--- a/datafusion/ffi/src/tests/async_provider.rs
+++ b/datafusion/ffi/src/tests/async_provider.rs
@@ -25,28 +25,27 @@
 //! access the runtime, then you will get a panic when trying to do operations
 //! such as spawning a tokio task.
 
-use std::{any::Any, fmt::Debug, sync::Arc};
+use std::any::Any;
+use std::fmt::Debug;
+use std::sync::Arc;
 
-use crate::table_provider::FFI_TableProvider;
 use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
 use async_trait::async_trait;
-use datafusion::{
-    catalog::{Session, TableProvider},
-    error::Result,
-    execution::RecordBatchStream,
-    physical_expr::EquivalenceProperties,
-    physical_plan::{ExecutionPlan, Partitioning},
-    prelude::Expr,
-};
-use datafusion_common::exec_err;
+use datafusion_catalog::TableProvider;
+use datafusion_common::{Result, exec_err};
+use datafusion_execution::RecordBatchStream;
+use datafusion_expr::Expr;
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_session::Session;
 use futures::Stream;
-use tokio::{
-    runtime::Handle,
-    sync::{broadcast, mpsc},
-};
+use tokio::runtime::Handle;
+use tokio::sync::{broadcast, mpsc};
 
 use super::create_record_batch;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::table_provider::FFI_TableProvider;
 
 #[derive(Debug)]
 pub struct AsyncTableProvider {
@@ -60,7 +59,7 @@ fn async_table_provider_thread(
     mut shutdown: mpsc::Receiver<bool>,
     mut batch_request: mpsc::Receiver<bool>,
     batch_sender: broadcast::Sender<Option<RecordBatch>>,
-    tokio_rt: mpsc::Sender<Handle>,
+    tokio_rt: &mpsc::Sender<Handle>,
 ) {
     let runtime = Arc::new(
         tokio::runtime::Builder::new_current_thread()
@@ -107,7 +106,7 @@ pub fn start_async_provider() -> (AsyncTableProvider, Handle) {
             shutdown_rx,
             batch_request_rx,
             record_batch_tx,
-            tokio_rt_tx,
+            &tokio_rt_tx,
         )
     }));
 
@@ -135,8 +134,8 @@ impl TableProvider for AsyncTableProvider {
         super::create_test_schema()
     }
 
-    fn table_type(&self) -> datafusion::logical_expr::TableType {
-        datafusion::logical_expr::TableType::Base
+    fn table_type(&self) -> datafusion_expr::TableType {
+        datafusion_expr::TableType::Base
     }
 
     async fn scan(
@@ -163,7 +162,7 @@ impl Drop for AsyncTableProvider {
 
 #[derive(Debug)]
 struct AsyncTestExecutionPlan {
-    properties: datafusion::physical_plan::PlanProperties,
+    properties: datafusion_physical_plan::PlanProperties,
     batch_request: mpsc::Sender<bool>,
     batch_receiver: broadcast::Receiver<Option<RecordBatch>>,
 }
@@ -174,11 +173,11 @@ impl AsyncTestExecutionPlan {
         batch_receiver: broadcast::Receiver<Option<RecordBatch>>,
     ) -> Self {
         Self {
-            properties: datafusion::physical_plan::PlanProperties::new(
+            properties: datafusion_physical_plan::PlanProperties::new(
                 EquivalenceProperties::new(super::create_test_schema()),
                 Partitioning::UnknownPartitioning(3),
-                datafusion::physical_plan::execution_plan::EmissionType::Incremental,
-                datafusion::physical_plan::execution_plan::Boundedness::Bounded,
+                datafusion_physical_plan::execution_plan::EmissionType::Incremental,
+                datafusion_physical_plan::execution_plan::Boundedness::Bounded,
             ),
             batch_request,
             batch_receiver,
@@ -195,7 +194,7 @@ impl ExecutionPlan for AsyncTestExecutionPlan {
         self
     }
 
-    fn properties(&self) -> &datafusion::physical_plan::PlanProperties {
+    fn properties(&self) -> &datafusion_physical_plan::PlanProperties {
         &self.properties
     }
 
@@ -213,8 +212,8 @@ impl ExecutionPlan for AsyncTestExecutionPlan {
     fn execute(
         &self,
         _partition: usize,
-        _context: Arc<datafusion::execution::TaskContext>,
-    ) -> Result<datafusion::execution::SendableRecordBatchStream> {
+        _context: Arc<datafusion_execution::TaskContext>,
+    ) -> Result<datafusion_execution::SendableRecordBatchStream> {
         Ok(Box::pin(AsyncTestRecordBatchStream {
             batch_request: self.batch_request.clone(),
             batch_receiver: self.batch_receiver.resubscribe(),
@@ -222,10 +221,10 @@ impl ExecutionPlan for AsyncTestExecutionPlan {
     }
 }
 
-impl datafusion::physical_plan::DisplayAs for AsyncTestExecutionPlan {
+impl datafusion_physical_plan::DisplayAs for AsyncTestExecutionPlan {
     fn fmt_as(
         &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
+        _t: datafusion_physical_plan::DisplayFormatType,
         _f: &mut std::fmt::Formatter,
     ) -> std::fmt::Result {
         // Do nothing, just a test
@@ -253,7 +252,7 @@ impl Stream for AsyncTestRecordBatchStream {
     ) -> std::task::Poll<Option<Self::Item>> {
         let mut this = self.as_mut();
 
-        #[allow(clippy::disallowed_methods)]
+        #[expect(clippy::disallowed_methods)]
         tokio::spawn(async move {
             // Nothing to do. We just need to simulate an async
             // task running
@@ -277,7 +276,14 @@ impl Stream for AsyncTestRecordBatchStream {
     }
 }
 
-pub(crate) fn create_async_table_provider() -> FFI_TableProvider {
+pub(crate) fn create_async_table_provider(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_TableProvider {
     let (table_provider, tokio_rt) = start_async_provider();
-    FFI_TableProvider::new(Arc::new(table_provider), true, Some(tokio_rt))
+    FFI_TableProvider::new_with_ffi_codec(
+        Arc::new(table_provider),
+        true,
+        Some(tokio_rt),
+        codec,
+    )
 }
diff --git a/datafusion/ffi/src/tests/catalog.rs b/datafusion/ffi/src/tests/catalog.rs
index f4293adb41b94..76d60ee379a7c 100644
--- a/datafusion/ffi/src/tests/catalog.rs
+++ b/datafusion/ffi/src/tests/catalog.rs
@@ -25,20 +25,21 @@
 //! access the runtime, then you will get a panic when trying to do operations
 //! such as spawning a tokio task.
 
-use std::{any::Any, fmt::Debug, sync::Arc};
+use std::any::Any;
+use std::fmt::Debug;
+use std::sync::Arc;
 
-use crate::catalog_provider::FFI_CatalogProvider;
 use arrow::datatypes::Schema;
 use async_trait::async_trait;
-use datafusion::{
-    catalog::{
-        CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider,
-        TableProvider,
-    },
-    common::exec_err,
-    datasource::MemTable,
-    error::{DataFusionError, Result},
+use datafusion_catalog::{
+    CatalogProvider, CatalogProviderList, MemTable, MemoryCatalogProvider,
+    MemoryCatalogProviderList, MemorySchemaProvider, SchemaProvider, TableProvider,
 };
+use datafusion_common::{Result, exec_err};
+
+use crate::catalog_provider::FFI_CatalogProvider;
+use crate::catalog_provider_list::FFI_CatalogProviderList;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
 
 /// This schema provider is intended only for unit tests. It prepopulates with one
 /// table and only allows for tables named sales and purchases.
@@ -49,7 +50,7 @@ pub struct FixedSchemaProvider {
 
 pub fn fruit_table() -> Arc<dyn TableProvider + 'static> {
     use arrow::datatypes::{DataType, Field};
-    use datafusion::common::record_batch;
+    use datafusion_common::record_batch;
 
     let schema = Arc::new(Schema::new(vec![
         Field::new("units", DataType::Int32, true),
@@ -96,10 +97,7 @@ impl SchemaProvider for FixedSchemaProvider {
         self.inner.table_names()
     }
 
-    async fn table(
-        &self,
-        name: &str,
-    ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError> {
+    async fn table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
         self.inner.table(name).await
     }
 
@@ -162,7 +160,9 @@ impl CatalogProvider for FixedCatalogProvider {
         schema: Arc<dyn SchemaProvider>,
     ) -> Result<Option<Arc<dyn SchemaProvider>>> {
         if !["apple", "banana", "cherry", "date"].contains(&name) {
-            return exec_err!("FixedCatalogProvider only provides four schemas: apple, banana, cherry, date");
+            return exec_err!(
+                "FixedCatalogProvider only provides four schemas: apple, banana, cherry, date"
+            );
         }
 
         self.inner.register_schema(name, schema)
@@ -177,7 +177,65 @@ impl CatalogProvider for FixedCatalogProvider {
     }
 }
 
-pub(crate) extern "C" fn create_catalog_provider() -> FFI_CatalogProvider {
+pub(crate) extern "C" fn create_catalog_provider(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_CatalogProvider {
     let catalog_provider = Arc::new(FixedCatalogProvider::default());
-    FFI_CatalogProvider::new(catalog_provider, None)
+    FFI_CatalogProvider::new_with_ffi_codec(catalog_provider, None, codec)
+}
+
+/// This catalog provider list is intended only for unit tests. It prepopulates with one
+/// catalog and only allows for catalogs named after four colors.
+#[derive(Debug)]
+pub struct FixedCatalogProviderList {
+    inner: MemoryCatalogProviderList,
+}
+
+impl Default for FixedCatalogProviderList {
+    fn default() -> Self {
+        let inner = MemoryCatalogProviderList::new();
+
+        let _ = inner.register_catalog(
+            "blue".to_owned(),
+            Arc::new(FixedCatalogProvider::default()),
+        );
+
+        Self { inner }
+    }
+}
+
+impl CatalogProviderList for FixedCatalogProviderList {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn catalog_names(&self) -> Vec<String> {
+        self.inner.catalog_names()
+    }
+
+    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
+        self.inner.catalog(name)
+    }
+
+    fn register_catalog(
+        &self,
+        name: String,
+        catalog: Arc<dyn CatalogProvider>,
+    ) -> Option<Arc<dyn CatalogProvider>> {
+        if !["blue", "red", "green", "yellow"].contains(&name.as_str()) {
+            log::warn!(
+                "FixedCatalogProviderList only provides four catalogs: blue, red, green, yellow"
+            );
+            return None;
+        }
+
+        self.inner.register_catalog(name, catalog)
+    }
+}
+
+pub(crate) extern "C" fn create_catalog_provider_list(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_CatalogProviderList {
+    let catalog_provider_list = Arc::new(FixedCatalogProviderList::default());
+    FFI_CatalogProviderList::new_with_ffi_codec(catalog_provider_list, None, codec)
 }
diff --git a/datafusion/ffi/src/tests/mod.rs b/datafusion/ffi/src/tests/mod.rs
index 816086c320415..9bcd7e0031083 100644
--- a/datafusion/ffi/src/tests/mod.rs
+++ b/datafusion/ffi/src/tests/mod.rs
@@ -17,35 +17,33 @@
 
 use std::sync::Arc;
 
+use abi_stable::library::{LibraryError, RootModule};
+use abi_stable::prefix_type::PrefixTypeTrait;
+use abi_stable::sabi_types::VersionStrings;
 use abi_stable::{
-    declare_root_module_statics, export_root_module,
-    library::{LibraryError, RootModule},
-    package_version_strings,
-    prefix_type::PrefixTypeTrait,
-    sabi_types::VersionStrings,
-    StableAbi,
+    StableAbi, declare_root_module_statics, export_root_module, package_version_strings,
 };
-use catalog::create_catalog_provider;
-
-use crate::{catalog_provider::FFI_CatalogProvider, udtf::FFI_TableFunction};
-
-use crate::udaf::FFI_AggregateUDF;
-
-use crate::udwf::FFI_WindowUDF;
-
-use super::{table_provider::FFI_TableProvider, udf::FFI_ScalarUDF};
 use arrow::array::RecordBatch;
+use arrow_schema::{DataType, Field, Schema};
 use async_provider::create_async_table_provider;
-use datafusion::{
-    arrow::datatypes::{DataType, Field, Schema},
-    common::record_batch,
-};
+use catalog::create_catalog_provider;
+use datafusion_common::record_batch;
 use sync_provider::create_sync_table_provider;
 use udf_udaf_udwf::{
     create_ffi_abs_func, create_ffi_random_func, create_ffi_rank_func,
     create_ffi_stddev_func, create_ffi_sum_func, create_ffi_table_func,
 };
 
+use super::table_provider::FFI_TableProvider;
+use super::udf::FFI_ScalarUDF;
+use crate::catalog_provider::FFI_CatalogProvider;
+use crate::catalog_provider_list::FFI_CatalogProviderList;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::tests::catalog::create_catalog_provider_list;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udtf::FFI_TableFunction;
+use crate::udwf::FFI_WindowUDF;
+
 mod async_provider;
 pub mod catalog;
 mod sync_provider;
@@ -60,17 +58,26 @@ pub mod utils;
 /// module.
 pub struct ForeignLibraryModule {
     /// Construct an opinionated catalog provider
-    pub create_catalog: extern "C" fn() -> FFI_CatalogProvider,
+    pub create_catalog:
+        extern "C" fn(codec: FFI_LogicalExtensionCodec) -> FFI_CatalogProvider,
+
+    /// Construct an opinionated catalog provider list
+    pub create_catalog_list:
+        extern "C" fn(codec: FFI_LogicalExtensionCodec) -> FFI_CatalogProviderList,
 
     /// Constructs the table provider
-    pub create_table: extern "C" fn(synchronous: bool) -> FFI_TableProvider,
+    pub create_table: extern "C" fn(
+        synchronous: bool,
+        codec: FFI_LogicalExtensionCodec,
+    ) -> FFI_TableProvider,
 
     /// Create a scalar UDF
     pub create_scalar_udf: extern "C" fn() -> FFI_ScalarUDF,
 
     pub create_nullary_udf: extern "C" fn() -> FFI_ScalarUDF,
 
-    pub create_table_function: extern "C" fn() -> FFI_TableFunction,
+    pub create_table_function:
+        extern "C" fn(FFI_LogicalExtensionCodec) -> FFI_TableFunction,
 
     /// Create an aggregate UDAF using sum
     pub create_sum_udaf: extern "C" fn() -> FFI_AggregateUDF,
@@ -111,10 +118,13 @@ pub fn create_record_batch(start_value: i32, num_values: usize) -> RecordBatch {
 
 /// Here we only wish to create a simple table provider as an example.
 /// We create an in-memory table and convert it to it's FFI counterpart.
-extern "C" fn construct_table_provider(synchronous: bool) -> FFI_TableProvider {
+extern "C" fn construct_table_provider(
+    synchronous: bool,
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_TableProvider {
     match synchronous {
-        true => create_sync_table_provider(),
-        false => create_async_table_provider(),
+        true => create_sync_table_provider(codec),
+        false => create_async_table_provider(codec),
     }
 }
 
@@ -123,6 +133,7 @@ extern "C" fn construct_table_provider(synchronous: bool) -> FFI_TableProvider {
 pub fn get_foreign_library_module() -> ForeignLibraryModuleRef {
     ForeignLibraryModule {
         create_catalog: create_catalog_provider,
+        create_catalog_list: create_catalog_provider_list,
         create_table: construct_table_provider,
         create_scalar_udf: create_ffi_abs_func,
         create_nullary_udf: create_ffi_random_func,
diff --git a/datafusion/ffi/src/tests/sync_provider.rs b/datafusion/ffi/src/tests/sync_provider.rs
index ff85e0b15b395..e3cb54fff90eb 100644
--- a/datafusion/ffi/src/tests/sync_provider.rs
+++ b/datafusion/ffi/src/tests/sync_provider.rs
@@ -17,12 +17,15 @@
 
 use std::sync::Arc;
 
-use crate::table_provider::FFI_TableProvider;
-use datafusion::datasource::MemTable;
+use datafusion_catalog::MemTable;
 
 use super::{create_record_batch, create_test_schema};
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::table_provider::FFI_TableProvider;
 
-pub(crate) fn create_sync_table_provider() -> FFI_TableProvider {
+pub(crate) fn create_sync_table_provider(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_TableProvider {
     let schema = create_test_schema();
 
     // It is useful to create these as multiple record batches
@@ -35,5 +38,5 @@ pub(crate) fn create_sync_table_provider() -> FFI_TableProvider {
 
     let table_provider = MemTable::try_new(schema, vec![batches]).unwrap();
 
-    FFI_TableProvider::new(Arc::new(table_provider), true, None)
+    FFI_TableProvider::new_with_ffi_codec(Arc::new(table_provider), true, None, codec)
 }
diff --git a/datafusion/ffi/src/tests/udf_udaf_udwf.rs b/datafusion/ffi/src/tests/udf_udaf_udwf.rs
index 55e31ef3ab770..d88ddfb28dd56 100644
--- a/datafusion/ffi/src/tests/udf_udaf_udwf.rs
+++ b/datafusion/ffi/src/tests/udf_udaf_udwf.rs
@@ -15,37 +15,75 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{
-    udaf::FFI_AggregateUDF, udf::FFI_ScalarUDF, udtf::FFI_TableFunction,
-    udwf::FFI_WindowUDF,
-};
-use datafusion::{
-    catalog::TableFunctionImpl,
-    functions::math::{abs::AbsFunc, random::RandomFunc},
-    functions_aggregate::{stddev::Stddev, sum::Sum},
-    functions_table::generate_series::RangeFunc,
-    functions_window::rank::Rank,
-    logical_expr::{AggregateUDF, ScalarUDF, WindowUDF},
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow_schema::DataType;
+use datafusion_catalog::TableFunctionImpl;
+use datafusion_expr::{
+    AggregateUDF, ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
+    WindowUDF,
 };
+use datafusion_functions::math::abs::AbsFunc;
+use datafusion_functions::math::random::RandomFunc;
+use datafusion_functions_aggregate::stddev::Stddev;
+use datafusion_functions_aggregate::sum::Sum;
+use datafusion_functions_table::generate_series::RangeFunc;
+use datafusion_functions_window::rank::Rank;
 
-use std::sync::Arc;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::udaf::FFI_AggregateUDF;
+use crate::udf::FFI_ScalarUDF;
+use crate::udtf::FFI_TableFunction;
+use crate::udwf::FFI_WindowUDF;
 
 pub(crate) extern "C" fn create_ffi_abs_func() -> FFI_ScalarUDF {
-    let udf: Arc<ScalarUDF> = Arc::new(AbsFunc::new().into());
+    let inner = WrappedAbs(Arc::new(AbsFunc::new().into()));
+    let udf: Arc<ScalarUDF> = Arc::new(inner.into());
 
     udf.into()
 }
 
+#[derive(Debug, Hash, Eq, PartialEq)]
+struct WrappedAbs(Arc<ScalarUDF>);
+
+impl ScalarUDFImpl for WrappedAbs {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "ffi_abs"
+    }
+
+    fn signature(&self) -> &Signature {
+        self.0.signature()
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
+        self.0.return_type(arg_types)
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        self.0.invoke_with_args(args)
+    }
+}
+
 pub(crate) extern "C" fn create_ffi_random_func() -> FFI_ScalarUDF {
     let udf: Arc<ScalarUDF> = Arc::new(RandomFunc::new().into());
 
     udf.into()
 }
 
-pub(crate) extern "C" fn create_ffi_table_func() -> FFI_TableFunction {
+pub(crate) extern "C" fn create_ffi_table_func(
+    codec: FFI_LogicalExtensionCodec,
+) -> FFI_TableFunction {
     let udtf: Arc<dyn TableFunctionImpl> = Arc::new(RangeFunc {});
 
-    FFI_TableFunction::new(udtf, None)
+    FFI_TableFunction::new_with_ffi_codec(udtf, None, codec)
 }
 
 pub(crate) extern "C" fn create_ffi_sum_func() -> FFI_AggregateUDF {
@@ -64,7 +102,7 @@ pub(crate) extern "C" fn create_ffi_rank_func() -> FFI_WindowUDF {
     let udwf: Arc<WindowUDF> = Arc::new(
         Rank::new(
             "rank_demo".to_string(),
-            datafusion::functions_window::rank::RankType::Basic,
+            datafusion_functions_window::rank::RankType::Basic,
         )
         .into(),
     );
diff --git a/datafusion/ffi/src/tests/utils.rs b/datafusion/ffi/src/tests/utils.rs
index 6465b17d9b60c..9659a51f04b01 100644
--- a/datafusion/ffi/src/tests/utils.rs
+++ b/datafusion/ffi/src/tests/utils.rs
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::tests::ForeignLibraryModuleRef;
-use abi_stable::library::RootModule;
-use datafusion::error::{DataFusionError, Result};
 use std::path::Path;
 
+use abi_stable::library::RootModule;
+use datafusion_common::{DataFusionError, Result};
+
+use crate::tests::ForeignLibraryModuleRef;
+
 /// Compute the path to the library. It would be preferable to simply use
 /// abi_stable::library::development_utils::compute_library_path however
 /// our current CI pipeline has a `ci` profile that we need to use to
diff --git a/datafusion/ffi/src/udaf/accumulator.rs b/datafusion/ffi/src/udaf/accumulator.rs
index 80b872159f483..6d2b86a3f2e62 100644
--- a/datafusion/ffi/src/udaf/accumulator.rs
+++ b/datafusion/ffi/src/udaf/accumulator.rs
@@ -15,52 +15,50 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, ops::Deref};
-
-use abi_stable::{
-    std_types::{RResult, RString, RVec},
-    StableAbi,
-};
-use arrow::{array::ArrayRef, error::ArrowError};
-use datafusion::{
-    error::{DataFusionError, Result},
-    logical_expr::Accumulator,
-    scalar::ScalarValue,
-};
+use std::ffi::c_void;
+use std::ops::Deref;
+use std::ptr::null_mut;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RVec};
+use arrow::array::ArrayRef;
+use arrow::error::ArrowError;
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_common::scalar::ScalarValue;
+use datafusion_expr::Accumulator;
 use prost::Message;
 
-use crate::{arrow_wrappers::WrappedArray, df_result, rresult, rresult_return};
+use crate::arrow_wrappers::WrappedArray;
+use crate::util::FFIResult;
+use crate::{df_result, rresult, rresult_return};
 
 /// A stable struct for sharing [`Accumulator`] across FFI boundaries.
 /// For an explanation of each field, see the corresponding function
 /// defined in [`Accumulator`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_Accumulator {
     pub update_batch: unsafe extern "C" fn(
         accumulator: &mut Self,
         values: RVec<WrappedArray>,
-    ) -> RResult<(), RString>,
+    ) -> FFIResult<()>,
 
     // Evaluate and return a ScalarValues as protobuf bytes
-    pub evaluate:
-        unsafe extern "C" fn(accumulator: &mut Self) -> RResult<RVec<u8>, RString>,
+    pub evaluate: unsafe extern "C" fn(accumulator: &mut Self) -> FFIResult<RVec<u8>>,
 
     pub size: unsafe extern "C" fn(accumulator: &Self) -> usize,
 
-    pub state:
-        unsafe extern "C" fn(accumulator: &mut Self) -> RResult<RVec<RVec<u8>>, RString>,
+    pub state: unsafe extern "C" fn(accumulator: &mut Self) -> FFIResult<RVec<RVec<u8>>>,
 
     pub merge_batch: unsafe extern "C" fn(
         accumulator: &mut Self,
         states: RVec<WrappedArray>,
-    ) -> RResult<(), RString>,
+    ) -> FFIResult<()>,
 
     pub retract_batch: unsafe extern "C" fn(
         accumulator: &mut Self,
         values: RVec<WrappedArray>,
-    ) -> RResult<(), RString>,
+    ) -> FFIResult<()>,
 
     pub supports_retract_batch: bool,
 
@@ -70,6 +68,11 @@ pub struct FFI_Accumulator {
     /// Internal data. This is only to be accessed by the provider of the accumulator.
     /// A [`ForeignAccumulator`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_Accumulator {}
@@ -82,100 +85,121 @@ pub struct AccumulatorPrivateData {
 impl FFI_Accumulator {
     #[inline]
     unsafe fn inner_mut(&mut self) -> &mut Box<dyn Accumulator> {
-        let private_data = self.private_data as *mut AccumulatorPrivateData;
-        &mut (*private_data).accumulator
+        unsafe {
+            let private_data = self.private_data as *mut AccumulatorPrivateData;
+            &mut (*private_data).accumulator
+        }
     }
 
     #[inline]
     unsafe fn inner(&self) -> &dyn Accumulator {
-        let private_data = self.private_data as *const AccumulatorPrivateData;
-        (*private_data).accumulator.deref()
+        unsafe {
+            let private_data = self.private_data as *const AccumulatorPrivateData;
+            (*private_data).accumulator.deref()
+        }
     }
 }
 
 unsafe extern "C" fn update_batch_fn_wrapper(
     accumulator: &mut FFI_Accumulator,
     values: RVec<WrappedArray>,
-) -> RResult<(), RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<()> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let values_arrays = values
-        .into_iter()
-        .map(|v| v.try_into().map_err(DataFusionError::from))
-        .collect::<Result<Vec<ArrayRef>>>();
-    let values_arrays = rresult_return!(values_arrays);
+        let values_arrays = values
+            .into_iter()
+            .map(|v| v.try_into().map_err(DataFusionError::from))
+            .collect::<Result<Vec<ArrayRef>>>();
+        let values_arrays = rresult_return!(values_arrays);
 
-    rresult!(accumulator.update_batch(&values_arrays))
+        rresult!(accumulator.update_batch(&values_arrays))
+    }
 }
 
 unsafe extern "C" fn evaluate_fn_wrapper(
     accumulator: &mut FFI_Accumulator,
-) -> RResult<RVec<u8>, RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<RVec<u8>> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let scalar_result = rresult_return!(accumulator.evaluate());
-    let proto_result: datafusion_proto::protobuf::ScalarValue =
-        rresult_return!((&scalar_result).try_into());
+        let scalar_result = rresult_return!(accumulator.evaluate());
+        let proto_result: datafusion_proto::protobuf::ScalarValue =
+            rresult_return!((&scalar_result).try_into());
 
-    RResult::ROk(proto_result.encode_to_vec().into())
+        RResult::ROk(proto_result.encode_to_vec().into())
+    }
 }
 
 unsafe extern "C" fn size_fn_wrapper(accumulator: &FFI_Accumulator) -> usize {
-    accumulator.inner().size()
+    unsafe { accumulator.inner().size() }
 }
 
 unsafe extern "C" fn state_fn_wrapper(
     accumulator: &mut FFI_Accumulator,
-) -> RResult<RVec<RVec<u8>>, RString> {
-    let accumulator = accumulator.inner_mut();
-
-    let state = rresult_return!(accumulator.state());
-    let state = state
-        .into_iter()
-        .map(|state_val| {
-            datafusion_proto::protobuf::ScalarValue::try_from(&state_val)
-                .map_err(DataFusionError::from)
-                .map(|v| RVec::from(v.encode_to_vec()))
-        })
-        .collect::<Result<Vec<_>>>()
-        .map(|state_vec| state_vec.into());
-
-    rresult!(state)
+) -> FFIResult<RVec<RVec<u8>>> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
+
+        let state = rresult_return!(accumulator.state());
+        let state = state
+            .into_iter()
+            .map(|state_val| {
+                datafusion_proto::protobuf::ScalarValue::try_from(&state_val)
+                    .map_err(DataFusionError::from)
+                    .map(|v| RVec::from(v.encode_to_vec()))
+            })
+            .collect::<Result<Vec<_>>>()
+            .map(|state_vec| state_vec.into());
+
+        rresult!(state)
+    }
 }
 
 unsafe extern "C" fn merge_batch_fn_wrapper(
     accumulator: &mut FFI_Accumulator,
     states: RVec<WrappedArray>,
-) -> RResult<(), RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<()> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let states = rresult_return!(states
-        .into_iter()
-        .map(|state| ArrayRef::try_from(state).map_err(DataFusionError::from))
-        .collect::<Result<Vec<_>>>());
+        let states = rresult_return!(
+            states
+                .into_iter()
+                .map(|state| ArrayRef::try_from(state).map_err(DataFusionError::from))
+                .collect::<Result<Vec<_>>>()
+        );
 
-    rresult!(accumulator.merge_batch(&states))
+        rresult!(accumulator.merge_batch(&states))
+    }
 }
 
 unsafe extern "C" fn retract_batch_fn_wrapper(
     accumulator: &mut FFI_Accumulator,
     values: RVec<WrappedArray>,
-) -> RResult<(), RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<()> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let values_arrays = values
-        .into_iter()
-        .map(|v| v.try_into().map_err(DataFusionError::from))
-        .collect::<Result<Vec<ArrayRef>>>();
-    let values_arrays = rresult_return!(values_arrays);
+        let values_arrays = values
+            .into_iter()
+            .map(|v| v.try_into().map_err(DataFusionError::from))
+            .collect::<Result<Vec<ArrayRef>>>();
+        let values_arrays = rresult_return!(values_arrays);
 
-    rresult!(accumulator.retract_batch(&values_arrays))
+        rresult!(accumulator.retract_batch(&values_arrays))
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(accumulator: &mut FFI_Accumulator) {
-    let private_data =
-        Box::from_raw(accumulator.private_data as *mut AccumulatorPrivateData);
-    drop(private_data);
+    unsafe {
+        if !accumulator.private_data.is_null() {
+            let private_data =
+                Box::from_raw(accumulator.private_data as *mut AccumulatorPrivateData);
+            drop(private_data);
+            accumulator.private_data = null_mut();
+        }
+    }
 }
 
 impl From<Box<dyn Accumulator>> for FFI_Accumulator {
@@ -193,6 +217,7 @@ impl From<Box<dyn Accumulator>> for FFI_Accumulator {
             supports_retract_batch,
             release: release_fn_wrapper,
             private_data: Box::into_raw(Box::new(private_data)) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -214,12 +239,20 @@ pub struct ForeignAccumulator {
     accumulator: FFI_Accumulator,
 }
 
-unsafe impl Send for ForeignAccumulator {}
-unsafe impl Sync for ForeignAccumulator {}
-
-impl From<FFI_Accumulator> for ForeignAccumulator {
-    fn from(accumulator: FFI_Accumulator) -> Self {
-        Self { accumulator }
+impl From<FFI_Accumulator> for Box<dyn Accumulator> {
+    fn from(mut accumulator: FFI_Accumulator) -> Self {
+        if (accumulator.library_marker_id)() == crate::get_library_marker_id() {
+            unsafe {
+                let private_data = Box::from_raw(
+                    accumulator.private_data as *mut AccumulatorPrivateData,
+                );
+                // We must set this to null to avoid a double free
+                accumulator.private_data = null_mut();
+                private_data.accumulator
+            }
+        } else {
+            Box::new(ForeignAccumulator { accumulator })
+        }
     }
 }
 
@@ -306,12 +339,12 @@ impl Accumulator for ForeignAccumulator {
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{make_array, Array};
-    use datafusion::{
-        common::create_array, error::Result,
-        functions_aggregate::average::AvgAccumulator, logical_expr::Accumulator,
-        scalar::ScalarValue,
-    };
+    use arrow::array::{Array, make_array};
+    use datafusion::common::create_array;
+    use datafusion::error::Result;
+    use datafusion::functions_aggregate::average::AvgAccumulator;
+    use datafusion::logical_expr::Accumulator;
+    use datafusion::scalar::ScalarValue;
 
     use super::{FFI_Accumulator, ForeignAccumulator};
 
@@ -322,8 +355,9 @@ mod tests {
         let original_supports_retract = original_accum.supports_retract_batch();
 
         let boxed_accum: Box<dyn Accumulator> = Box::new(original_accum);
-        let ffi_accum: FFI_Accumulator = boxed_accum.into();
-        let mut foreign_accum: ForeignAccumulator = ffi_accum.into();
+        let mut ffi_accum: FFI_Accumulator = boxed_accum.into();
+        ffi_accum.library_marker_id = crate::mock_foreign_marker_id;
+        let mut foreign_accum: Box<dyn Accumulator> = ffi_accum.into();
 
         // Send in an array to average. There are 5 values and it should average to 30.0
         let values = create_array!(Float64, vec![10., 20., 30., 40., 50.]);
@@ -363,4 +397,35 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_accumulator_local_bypass() -> Result<()> {
+        let original_accum = AvgAccumulator::default();
+        let boxed_accum: Box<dyn Accumulator> = Box::new(original_accum);
+        let original_size = boxed_accum.size();
+
+        let ffi_accum: FFI_Accumulator = boxed_accum.into();
+
+        // Verify local libraries can be downcast to their original
+        let foreign_accum: Box<dyn Accumulator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn Accumulator
+                as *const AvgAccumulator);
+            assert_eq!(original_size, concrete.size());
+        }
+
+        // Verify different library markers generate foreign accumulator
+        let original_accum = AvgAccumulator::default();
+        let boxed_accum: Box<dyn Accumulator> = Box::new(original_accum);
+        let mut ffi_accum: FFI_Accumulator = boxed_accum.into();
+        ffi_accum.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_accum: Box<dyn Accumulator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn Accumulator
+                as *const ForeignAccumulator);
+            assert_eq!(original_size, concrete.size());
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udaf/accumulator_args.rs b/datafusion/ffi/src/udaf/accumulator_args.rs
index 6ac0a0b21d2d7..a3359231073c4 100644
--- a/datafusion/ffi/src/udaf/accumulator_args.rs
+++ b/datafusion/ffi/src/udaf/accumulator_args.rs
@@ -17,75 +17,69 @@
 
 use std::sync::Arc;
 
-use crate::arrow_wrappers::WrappedSchema;
-use abi_stable::{
-    std_types::{RString, RVec},
-    StableAbi,
-};
-use arrow::{datatypes::Schema, ffi::FFI_ArrowSchema};
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RString, RVec};
+use arrow::datatypes::Schema;
+use arrow::ffi::FFI_ArrowSchema;
 use arrow_schema::FieldRef;
-use datafusion::{
-    error::DataFusionError,
-    logical_expr::function::AccumulatorArgs,
-    physical_expr::{PhysicalExpr, PhysicalSortExpr},
-    prelude::SessionContext,
-};
-use datafusion_common::exec_datafusion_err;
-use datafusion_proto::{
-    physical_plan::{
-        from_proto::{parse_physical_exprs, parse_physical_sort_exprs},
-        to_proto::{serialize_physical_exprs, serialize_physical_sort_exprs},
-        DefaultPhysicalExtensionCodec,
-    },
-    protobuf::PhysicalAggregateExprNode,
-};
-use prost::Message;
+use datafusion_common::error::DataFusionError;
+use datafusion_expr::function::AccumulatorArgs;
+use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::physical_expr::FFI_PhysicalExpr;
+use crate::physical_expr::sort::FFI_PhysicalSortExpr;
+use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped};
 
 /// A stable struct for sharing [`AccumulatorArgs`] across FFI boundaries.
 /// For an explanation of each field, see the corresponding field
 /// defined in [`AccumulatorArgs`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_AccumulatorArgs {
     return_field: WrappedSchema,
     schema: WrappedSchema,
+    ignore_nulls: bool,
+    order_bys: RVec<FFI_PhysicalSortExpr>,
     is_reversed: bool,
     name: RString,
-    physical_expr_def: RVec<u8>,
+    is_distinct: bool,
+    exprs: RVec<FFI_PhysicalExpr>,
+    expr_fields: RVec<WrappedSchema>,
 }
 
 impl TryFrom<AccumulatorArgs<'_>> for FFI_AccumulatorArgs {
     type Error = DataFusionError;
-
-    fn try_from(args: AccumulatorArgs) -> Result<Self, Self::Error> {
+    fn try_from(args: AccumulatorArgs) -> Result<Self, DataFusionError> {
         let return_field =
             WrappedSchema(FFI_ArrowSchema::try_from(args.return_field.as_ref())?);
         let schema = WrappedSchema(FFI_ArrowSchema::try_from(args.schema)?);
 
-        let codec = DefaultPhysicalExtensionCodec {};
-        let ordering_req =
-            serialize_physical_sort_exprs(args.order_bys.to_owned(), &codec)?;
+        let order_bys: RVec<_> = args
+            .order_bys
+            .iter()
+            .map(FFI_PhysicalSortExpr::from)
+            .collect();
 
-        let expr = serialize_physical_exprs(args.exprs, &codec)?;
+        let exprs = args
+            .exprs
+            .iter()
+            .map(Arc::clone)
+            .map(FFI_PhysicalExpr::from)
+            .collect();
 
-        let physical_expr_def = PhysicalAggregateExprNode {
-            expr,
-            ordering_req,
-            distinct: args.is_distinct,
-            ignore_nulls: args.ignore_nulls,
-            fun_definition: None,
-            aggregate_function: None,
-            human_display: args.name.to_string(),
-        };
-        let physical_expr_def = physical_expr_def.encode_to_vec().into();
+        let expr_fields = vec_fieldref_to_rvec_wrapped(args.expr_fields)?;
 
         Ok(Self {
             return_field,
             schema,
+            ignore_nulls: args.ignore_nulls,
+            order_bys,
             is_reversed: args.is_reversed,
             name: args.name.into(),
-            physical_expr_def,
+            is_distinct: args.is_distinct,
+            exprs,
+            expr_fields,
         })
     }
 }
@@ -110,43 +104,28 @@ impl TryFrom<FFI_AccumulatorArgs> for ForeignAccumulatorArgs {
     type Error = DataFusionError;
 
     fn try_from(value: FFI_AccumulatorArgs) -> Result<Self, Self::Error> {
-        let proto_def = PhysicalAggregateExprNode::decode(
-            value.physical_expr_def.as_ref(),
-        )
-        .map_err(|e| {
-            exec_datafusion_err!("Failed to decode PhysicalAggregateExprNode: {e}")
-        })?;
-
         let return_field = Arc::new((&value.return_field.0).try_into()?);
         let schema = Schema::try_from(&value.schema.0)?;
 
-        let default_ctx = SessionContext::new();
-        let task_ctx = default_ctx.task_ctx();
-        let codex = DefaultPhysicalExtensionCodec {};
-
-        let order_bys = parse_physical_sort_exprs(
-            &proto_def.ordering_req,
-            &task_ctx,
-            &schema,
-            &codex,
-        )?;
+        let order_bys = value.order_bys.iter().map(PhysicalSortExpr::from).collect();
 
-        let exprs = parse_physical_exprs(&proto_def.expr, &task_ctx, &schema, &codex)?;
-
-        let expr_fields = exprs
+        let exprs = value
+            .exprs
             .iter()
-            .map(|e| e.return_field(&schema))
-            .collect::<Result<Vec<_>, _>>()?;
+            .map(<Arc<dyn PhysicalExpr>>::from)
+            .collect();
+
+        let expr_fields = rvec_wrapped_to_vec_fieldref(&value.expr_fields)?;
 
         Ok(Self {
             return_field,
             schema,
             expr_fields,
-            ignore_nulls: proto_def.ignore_nulls,
+            ignore_nulls: value.ignore_nulls,
             order_bys,
             is_reversed: value.is_reversed,
             name: value.name.to_string(),
-            is_distinct: proto_def.distinct,
+            is_distinct: value.is_distinct,
             exprs,
         })
     }
@@ -170,12 +149,13 @@ impl<'a> From<&'a ForeignAccumulatorArgs> for AccumulatorArgs<'a> {
 
 #[cfg(test)]
 mod tests {
-    use super::{FFI_AccumulatorArgs, ForeignAccumulatorArgs};
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion::{
-        error::Result, logical_expr::function::AccumulatorArgs,
-        physical_expr::PhysicalSortExpr, physical_plan::expressions::col,
-    };
+    use datafusion::error::Result;
+    use datafusion::logical_expr::function::AccumulatorArgs;
+    use datafusion::physical_expr::PhysicalSortExpr;
+    use datafusion::physical_plan::expressions::col;
+
+    use super::{FFI_AccumulatorArgs, ForeignAccumulatorArgs};
 
     #[test]
     fn test_round_trip_accumulator_args() -> Result<()> {
@@ -193,7 +173,7 @@ mod tests {
         };
         let orig_str = format!("{orig_args:?}");
 
-        let ffi_args: FFI_AccumulatorArgs = orig_args.try_into()?;
+        let ffi_args = FFI_AccumulatorArgs::try_from(orig_args)?;
         let foreign_args: ForeignAccumulatorArgs = ffi_args.try_into()?;
         let round_trip_args: AccumulatorArgs = (&foreign_args).into();
 
diff --git a/datafusion/ffi/src/udaf/groups_accumulator.rs b/datafusion/ffi/src/udaf/groups_accumulator.rs
index 58a18c69db7c8..fc4ce4b8ba9d0 100644
--- a/datafusion/ffi/src/udaf/groups_accumulator.rs
+++ b/datafusion/ffi/src/udaf/groups_accumulator.rs
@@ -15,32 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, ops::Deref, sync::Arc};
-
-use crate::{
-    arrow_wrappers::{WrappedArray, WrappedSchema},
-    df_result, rresult, rresult_return,
-};
-use abi_stable::{
-    std_types::{ROption, RResult, RString, RVec},
-    StableAbi,
-};
-use arrow::{
-    array::{Array, ArrayRef, BooleanArray},
-    error::ArrowError,
-    ffi::to_ffi,
-};
-use datafusion::{
-    error::{DataFusionError, Result},
-    logical_expr::{EmitTo, GroupsAccumulator},
-};
+use std::ffi::c_void;
+use std::ops::Deref;
+use std::ptr::null_mut;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RVec};
+use arrow::array::{Array, ArrayRef, BooleanArray};
+use arrow::error::ArrowError;
+use arrow::ffi::to_ffi;
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_expr::{EmitTo, GroupsAccumulator};
+
+use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
+use crate::util::FFIResult;
+use crate::{df_result, rresult, rresult_return};
 
 /// A stable struct for sharing [`GroupsAccumulator`] across FFI boundaries.
 /// For an explanation of each field, see the corresponding function
 /// defined in [`GroupsAccumulator`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_GroupsAccumulator {
     pub update_batch: unsafe extern "C" fn(
         accumulator: &mut Self,
@@ -48,20 +44,20 @@ pub struct FFI_GroupsAccumulator {
         group_indices: RVec<usize>,
         opt_filter: ROption<WrappedArray>,
         total_num_groups: usize,
-    ) -> RResult<(), RString>,
+    ) -> FFIResult<()>,
 
     // Evaluate and return a ScalarValues as protobuf bytes
     pub evaluate: unsafe extern "C" fn(
         accumulator: &mut Self,
         emit_to: FFI_EmitTo,
-    ) -> RResult<WrappedArray, RString>,
+    ) -> FFIResult<WrappedArray>,
 
     pub size: unsafe extern "C" fn(accumulator: &Self) -> usize,
 
     pub state: unsafe extern "C" fn(
         accumulator: &mut Self,
         emit_to: FFI_EmitTo,
-    ) -> RResult<RVec<WrappedArray>, RString>,
+    ) -> FFIResult<RVec<WrappedArray>>,
 
     pub merge_batch: unsafe extern "C" fn(
         accumulator: &mut Self,
@@ -69,14 +65,13 @@ pub struct FFI_GroupsAccumulator {
         group_indices: RVec<usize>,
         opt_filter: ROption<WrappedArray>,
         total_num_groups: usize,
-    ) -> RResult<(), RString>,
+    ) -> FFIResult<()>,
 
     pub convert_to_state: unsafe extern "C" fn(
         accumulator: &Self,
         values: RVec<WrappedArray>,
         opt_filter: ROption<WrappedArray>,
-    )
-        -> RResult<RVec<WrappedArray>, RString>,
+    ) -> FFIResult<RVec<WrappedArray>>,
 
     pub supports_convert_to_state: bool,
 
@@ -86,10 +81,12 @@ pub struct FFI_GroupsAccumulator {
     /// Internal data. This is only to be accessed by the provider of the accumulator.
     /// A [`ForeignGroupsAccumulator`] should never attempt to access this data.
     pub private_data: *mut c_void,
-}
 
-unsafe impl Send for FFI_GroupsAccumulator {}
-unsafe impl Sync for FFI_GroupsAccumulator {}
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
+}
 
 pub struct GroupsAccumulatorPrivateData {
     pub accumulator: Box<dyn GroupsAccumulator>,
@@ -98,14 +95,18 @@ pub struct GroupsAccumulatorPrivateData {
 impl FFI_GroupsAccumulator {
     #[inline]
     unsafe fn inner_mut(&mut self) -> &mut Box<dyn GroupsAccumulator> {
-        let private_data = self.private_data as *mut GroupsAccumulatorPrivateData;
-        &mut (*private_data).accumulator
+        unsafe {
+            let private_data = self.private_data as *mut GroupsAccumulatorPrivateData;
+            &mut (*private_data).accumulator
+        }
     }
 
     #[inline]
     unsafe fn inner(&self) -> &dyn GroupsAccumulator {
-        let private_data = self.private_data as *const GroupsAccumulatorPrivateData;
-        (*private_data).accumulator.deref()
+        unsafe {
+            let private_data = self.private_data as *const GroupsAccumulatorPrivateData;
+            (*private_data).accumulator.deref()
+        }
     }
 }
 
@@ -134,47 +135,57 @@ unsafe extern "C" fn update_batch_fn_wrapper(
     group_indices: RVec<usize>,
     opt_filter: ROption<WrappedArray>,
     total_num_groups: usize,
-) -> RResult<(), RString> {
-    let accumulator = accumulator.inner_mut();
-    let values = rresult_return!(process_values(values));
-    let group_indices: Vec<usize> = group_indices.into_iter().collect();
-    let opt_filter = rresult_return!(process_opt_filter(opt_filter));
-
-    rresult!(accumulator.update_batch(
-        &values,
-        &group_indices,
-        opt_filter.as_ref(),
-        total_num_groups
-    ))
+) -> FFIResult<()> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
+        let values = rresult_return!(process_values(values));
+        let group_indices: Vec<usize> = group_indices.into_iter().collect();
+        let opt_filter = rresult_return!(process_opt_filter(opt_filter));
+
+        rresult!(accumulator.update_batch(
+            &values,
+            &group_indices,
+            opt_filter.as_ref(),
+            total_num_groups
+        ))
+    }
 }
 
 unsafe extern "C" fn evaluate_fn_wrapper(
     accumulator: &mut FFI_GroupsAccumulator,
     emit_to: FFI_EmitTo,
-) -> RResult<WrappedArray, RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<WrappedArray> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let result = rresult_return!(accumulator.evaluate(emit_to.into()));
+        let result = rresult_return!(accumulator.evaluate(emit_to.into()));
 
-    rresult!(WrappedArray::try_from(&result))
+        rresult!(WrappedArray::try_from(&result))
+    }
 }
 
 unsafe extern "C" fn size_fn_wrapper(accumulator: &FFI_GroupsAccumulator) -> usize {
-    let accumulator = accumulator.inner();
-    accumulator.size()
+    unsafe {
+        let accumulator = accumulator.inner();
+        accumulator.size()
+    }
 }
 
 unsafe extern "C" fn state_fn_wrapper(
     accumulator: &mut FFI_GroupsAccumulator,
     emit_to: FFI_EmitTo,
-) -> RResult<RVec<WrappedArray>, RString> {
-    let accumulator = accumulator.inner_mut();
+) -> FFIResult<RVec<WrappedArray>> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
 
-    let state = rresult_return!(accumulator.state(emit_to.into()));
-    rresult!(state
-        .into_iter()
-        .map(|arr| WrappedArray::try_from(&arr).map_err(DataFusionError::from))
-        .collect::<Result<RVec<_>>>())
+        let state = rresult_return!(accumulator.state(emit_to.into()));
+        rresult!(
+            state
+                .into_iter()
+                .map(|arr| WrappedArray::try_from(&arr).map_err(DataFusionError::from))
+                .collect::<Result<RVec<_>>>()
+        )
+    }
 }
 
 unsafe extern "C" fn merge_batch_fn_wrapper(
@@ -183,41 +194,53 @@ unsafe extern "C" fn merge_batch_fn_wrapper(
     group_indices: RVec<usize>,
     opt_filter: ROption<WrappedArray>,
     total_num_groups: usize,
-) -> RResult<(), RString> {
-    let accumulator = accumulator.inner_mut();
-    let values = rresult_return!(process_values(values));
-    let group_indices: Vec<usize> = group_indices.into_iter().collect();
-    let opt_filter = rresult_return!(process_opt_filter(opt_filter));
-
-    rresult!(accumulator.merge_batch(
-        &values,
-        &group_indices,
-        opt_filter.as_ref(),
-        total_num_groups
-    ))
+) -> FFIResult<()> {
+    unsafe {
+        let accumulator = accumulator.inner_mut();
+        let values = rresult_return!(process_values(values));
+        let group_indices: Vec<usize> = group_indices.into_iter().collect();
+        let opt_filter = rresult_return!(process_opt_filter(opt_filter));
+
+        rresult!(accumulator.merge_batch(
+            &values,
+            &group_indices,
+            opt_filter.as_ref(),
+            total_num_groups
+        ))
+    }
 }
 
 unsafe extern "C" fn convert_to_state_fn_wrapper(
     accumulator: &FFI_GroupsAccumulator,
     values: RVec<WrappedArray>,
     opt_filter: ROption<WrappedArray>,
-) -> RResult<RVec<WrappedArray>, RString> {
-    let accumulator = accumulator.inner();
-    let values = rresult_return!(process_values(values));
-    let opt_filter = rresult_return!(process_opt_filter(opt_filter));
-    let state =
-        rresult_return!(accumulator.convert_to_state(&values, opt_filter.as_ref()));
-
-    rresult!(state
-        .iter()
-        .map(|arr| WrappedArray::try_from(arr).map_err(DataFusionError::from))
-        .collect::<Result<RVec<_>>>())
+) -> FFIResult<RVec<WrappedArray>> {
+    unsafe {
+        let accumulator = accumulator.inner();
+        let values = rresult_return!(process_values(values));
+        let opt_filter = rresult_return!(process_opt_filter(opt_filter));
+        let state =
+            rresult_return!(accumulator.convert_to_state(&values, opt_filter.as_ref()));
+
+        rresult!(
+            state
+                .iter()
+                .map(|arr| WrappedArray::try_from(arr).map_err(DataFusionError::from))
+                .collect::<Result<RVec<_>>>()
+        )
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(accumulator: &mut FFI_GroupsAccumulator) {
-    let private_data =
-        Box::from_raw(accumulator.private_data as *mut GroupsAccumulatorPrivateData);
-    drop(private_data);
+    unsafe {
+        if !accumulator.private_data.is_null() {
+            let private_data = Box::from_raw(
+                accumulator.private_data as *mut GroupsAccumulatorPrivateData,
+            );
+            drop(private_data);
+            accumulator.private_data = null_mut();
+        }
+    }
 }
 
 impl From<Box<dyn GroupsAccumulator>> for FFI_GroupsAccumulator {
@@ -236,6 +259,7 @@ impl From<Box<dyn GroupsAccumulator>> for FFI_GroupsAccumulator {
 
             release: release_fn_wrapper,
             private_data: Box::into_raw(Box::new(private_data)) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -260,9 +284,20 @@ pub struct ForeignGroupsAccumulator {
 unsafe impl Send for ForeignGroupsAccumulator {}
 unsafe impl Sync for ForeignGroupsAccumulator {}
 
-impl From<FFI_GroupsAccumulator> for ForeignGroupsAccumulator {
-    fn from(accumulator: FFI_GroupsAccumulator) -> Self {
-        Self { accumulator }
+impl From<FFI_GroupsAccumulator> for Box<dyn GroupsAccumulator> {
+    fn from(mut accumulator: FFI_GroupsAccumulator) -> Self {
+        if (accumulator.library_marker_id)() == crate::get_library_marker_id() {
+            unsafe {
+                let private_data = Box::from_raw(
+                    accumulator.private_data as *mut GroupsAccumulatorPrivateData,
+                );
+                // We must set this to null to avoid a double free
+                accumulator.private_data = null_mut();
+                private_data.accumulator
+            }
+        } else {
+            Box::new(ForeignGroupsAccumulator { accumulator })
+        }
     }
 }
 
@@ -402,7 +437,6 @@ impl GroupsAccumulator for ForeignGroupsAccumulator {
 
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub enum FFI_EmitTo {
     All,
     First(usize),
@@ -428,13 +462,13 @@ impl From<FFI_EmitTo> for EmitTo {
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{make_array, Array, BooleanArray};
-    use datafusion::{
-        common::create_array,
-        error::Result,
-        logical_expr::{EmitTo, GroupsAccumulator},
-    };
+    use arrow::array::{Array, BooleanArray, make_array};
+    use datafusion::common::create_array;
+    use datafusion::error::Result;
+    use datafusion::functions_aggregate::stddev::StddevGroupsAccumulator;
+    use datafusion::logical_expr::{EmitTo, GroupsAccumulator};
     use datafusion_functions_aggregate_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
+    use datafusion_functions_aggregate_common::stats::StatsType;
 
     use super::{FFI_EmitTo, FFI_GroupsAccumulator, ForeignGroupsAccumulator};
 
@@ -442,8 +476,9 @@ mod tests {
     fn test_foreign_avg_accumulator() -> Result<()> {
         let boxed_accum: Box<dyn GroupsAccumulator> =
             Box::new(BooleanGroupsAccumulator::new(|a, b| a && b, true));
-        let ffi_accum: FFI_GroupsAccumulator = boxed_accum.into();
-        let mut foreign_accum: ForeignGroupsAccumulator = ffi_accum.into();
+        let mut ffi_accum: FFI_GroupsAccumulator = boxed_accum.into();
+        ffi_accum.library_marker_id = crate::mock_foreign_marker_id;
+        let mut foreign_accum: Box<dyn GroupsAccumulator> = ffi_accum.into();
 
         // Send in an array to evaluate. We want a mean of 30 and standard deviation of 4.
         let values = create_array!(Boolean, vec![true, true, true, false, true, true]);
@@ -510,4 +545,35 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_groups_accumulator_local_bypass_inner() -> Result<()> {
+        let original_accum = StddevGroupsAccumulator::new(StatsType::Population);
+        let boxed_accum: Box<dyn GroupsAccumulator> = Box::new(original_accum);
+        let original_size = boxed_accum.size();
+
+        let ffi_accum: FFI_GroupsAccumulator = boxed_accum.into();
+
+        // Verify local libraries can be downcast to their original
+        let foreign_accum: Box<dyn GroupsAccumulator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn GroupsAccumulator
+                as *const StddevGroupsAccumulator);
+            assert_eq!(original_size, concrete.size());
+        }
+
+        // Verify different library markers generate foreign accumulator
+        let original_accum = StddevGroupsAccumulator::new(StatsType::Population);
+        let boxed_accum: Box<dyn GroupsAccumulator> = Box::new(original_accum);
+        let mut ffi_accum: FFI_GroupsAccumulator = boxed_accum.into();
+        ffi_accum.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_accum: Box<dyn GroupsAccumulator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn GroupsAccumulator
+                as *const ForeignGroupsAccumulator);
+            assert_eq!(original_size, concrete.size());
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs
index ce5611590b677..c485c9a71bc46 100644
--- a/datafusion/ffi/src/udaf/mod.rs
+++ b/datafusion/ffi/src/udaf/mod.rs
@@ -15,43 +15,39 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use abi_stable::{
-    std_types::{ROption, RResult, RStr, RString, RVec},
-    StableAbi,
-};
-use accumulator::{FFI_Accumulator, ForeignAccumulator};
+use std::ffi::c_void;
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RStr, RString, RVec};
+use accumulator::FFI_Accumulator;
 use accumulator_args::{FFI_AccumulatorArgs, ForeignAccumulatorArgs};
 use arrow::datatypes::{DataType, Field};
 use arrow::ffi::FFI_ArrowSchema;
 use arrow_schema::FieldRef;
-use datafusion::{
-    error::DataFusionError,
-    logical_expr::{
-        function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs},
-        type_coercion::functions::fields_with_aggregate_udf,
-        utils::AggregateOrderSensitivity,
-        Accumulator, GroupsAccumulator,
-    },
+use datafusion_common::{DataFusionError, Result, ffi_datafusion_err};
+use datafusion_expr::function::AggregateFunctionSimplification;
+use datafusion_expr::type_coercion::functions::fields_with_aggregate_udf;
+use datafusion_expr::{
+    Accumulator, AggregateUDF, AggregateUDFImpl, GroupsAccumulator, Signature,
 };
-use datafusion::{
-    error::Result,
-    logical_expr::{AggregateUDF, AggregateUDFImpl, Signature},
+use datafusion_functions_aggregate_common::accumulator::{
+    AccumulatorArgs, StateFieldsArgs,
 };
-use datafusion_common::exec_datafusion_err;
+use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
 use datafusion_proto_common::from_proto::parse_proto_fields_to_fields;
-use groups_accumulator::{FFI_GroupsAccumulator, ForeignGroupsAccumulator};
-use std::hash::{Hash, Hasher};
-use std::{ffi::c_void, sync::Arc};
-
-use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped};
-use crate::{
-    arrow_wrappers::WrappedSchema,
-    df_result, rresult, rresult_return,
-    util::{rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped},
-    volatility::FFI_Volatility,
-};
+use groups_accumulator::FFI_GroupsAccumulator;
 use prost::{DecodeError, Message};
 
+use crate::arrow_wrappers::WrappedSchema;
+use crate::util::{
+    FFIResult, rvec_wrapped_to_vec_datatype, rvec_wrapped_to_vec_fieldref,
+    vec_datatype_to_rvec_wrapped, vec_fieldref_to_rvec_wrapped,
+};
+use crate::volatility::FFI_Volatility;
+use crate::{df_result, rresult, rresult_return};
+
 mod accumulator;
 mod accumulator_args;
 mod groups_accumulator;
@@ -59,7 +55,6 @@ mod groups_accumulator;
 /// A stable struct for sharing a [`AggregateUDF`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_AggregateUDF {
     /// FFI equivalent to the `name` of a [`AggregateUDF`]
     pub name: RString,
@@ -75,7 +70,7 @@ pub struct FFI_AggregateUDF {
     pub return_field: unsafe extern "C" fn(
         udaf: &Self,
         arg_fields: RVec<WrappedSchema>,
-    ) -> RResult<WrappedSchema, RString>,
+    ) -> FFIResult<WrappedSchema>,
 
     /// FFI equivalent to the `is_nullable` of a [`AggregateUDF`]
     pub is_nullable: bool,
@@ -88,17 +83,16 @@ pub struct FFI_AggregateUDF {
     pub accumulator: unsafe extern "C" fn(
         udaf: &FFI_AggregateUDF,
         args: FFI_AccumulatorArgs,
-    ) -> RResult<FFI_Accumulator, RString>,
+    ) -> FFIResult<FFI_Accumulator>,
 
     /// FFI equivalent to [`AggregateUDF::create_sliding_accumulator`]
-    pub create_sliding_accumulator:
-        unsafe extern "C" fn(
-            udaf: &FFI_AggregateUDF,
-            args: FFI_AccumulatorArgs,
-        ) -> RResult<FFI_Accumulator, RString>,
+    pub create_sliding_accumulator: unsafe extern "C" fn(
+        udaf: &FFI_AggregateUDF,
+        args: FFI_AccumulatorArgs,
+    )
+        -> FFIResult<FFI_Accumulator>,
 
     /// FFI equivalent to [`AggregateUDF::state_fields`]
-    #[allow(clippy::type_complexity)]
     pub state_fields: unsafe extern "C" fn(
         udaf: &FFI_AggregateUDF,
         name: &RStr,
@@ -106,21 +100,21 @@ pub struct FFI_AggregateUDF {
         return_field: WrappedSchema,
         ordering_fields: RVec<RVec<u8>>,
         is_distinct: bool,
-    ) -> RResult<RVec<RVec<u8>>, RString>,
+    ) -> FFIResult<RVec<RVec<u8>>>,
 
     /// FFI equivalent to [`AggregateUDF::create_groups_accumulator`]
     pub create_groups_accumulator:
         unsafe extern "C" fn(
             udaf: &FFI_AggregateUDF,
             args: FFI_AccumulatorArgs,
-        ) -> RResult<FFI_GroupsAccumulator, RString>,
+        ) -> FFIResult<FFI_GroupsAccumulator>,
 
     /// FFI equivalent to [`AggregateUDF::with_beneficial_ordering`]
     pub with_beneficial_ordering:
         unsafe extern "C" fn(
             udaf: &FFI_AggregateUDF,
             beneficial_ordering: bool,
-        ) -> RResult<ROption<FFI_AggregateUDF>, RString>,
+        ) -> FFIResult<ROption<FFI_AggregateUDF>>,
 
     /// FFI equivalent to [`AggregateUDF::order_sensitivity`]
     pub order_sensitivity:
@@ -133,7 +127,7 @@ pub struct FFI_AggregateUDF {
     pub coerce_types: unsafe extern "C" fn(
         udf: &Self,
         arg_types: RVec<WrappedSchema>,
-    ) -> RResult<RVec<WrappedSchema>, RString>,
+    ) -> FFIResult<RVec<WrappedSchema>>,
 
     /// Used to create a clone on the provider of the udaf. This should
     /// only need to be called by the receiver of the udaf.
@@ -145,6 +139,11 @@ pub struct FFI_AggregateUDF {
     /// Internal data. This is only to be accessed by the provider of the udaf.
     /// A [`ForeignAggregateUDF`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_AggregateUDF {}
@@ -156,96 +155,115 @@ pub struct AggregateUDFPrivateData {
 
 impl FFI_AggregateUDF {
     unsafe fn inner(&self) -> &Arc<AggregateUDF> {
-        let private_data = self.private_data as *const AggregateUDFPrivateData;
-        &(*private_data).udaf
+        unsafe {
+            let private_data = self.private_data as *const AggregateUDFPrivateData;
+            &(*private_data).udaf
+        }
     }
 }
 
 unsafe extern "C" fn return_field_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     arg_fields: RVec<WrappedSchema>,
-) -> RResult<WrappedSchema, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<WrappedSchema> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let arg_fields = rresult_return!(rvec_wrapped_to_vec_fieldref(&arg_fields));
+        let arg_fields = rresult_return!(rvec_wrapped_to_vec_fieldref(&arg_fields));
 
-    let return_field = udaf
-        .return_field(&arg_fields)
-        .and_then(|v| {
-            FFI_ArrowSchema::try_from(v.as_ref()).map_err(DataFusionError::from)
-        })
-        .map(WrappedSchema);
+        let return_field = udaf
+            .return_field(&arg_fields)
+            .and_then(|v| {
+                FFI_ArrowSchema::try_from(v.as_ref()).map_err(DataFusionError::from)
+            })
+            .map(WrappedSchema);
 
-    rresult!(return_field)
+        rresult!(return_field)
+    }
 }
 
 unsafe extern "C" fn accumulator_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     args: FFI_AccumulatorArgs,
-) -> RResult<FFI_Accumulator, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<FFI_Accumulator> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
+        let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
 
-    rresult!(udaf
-        .accumulator(accumulator_args.into())
-        .map(FFI_Accumulator::from))
+        rresult!(
+            udaf.accumulator(accumulator_args.into())
+                .map(FFI_Accumulator::from)
+        )
+    }
 }
 
 unsafe extern "C" fn create_sliding_accumulator_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     args: FFI_AccumulatorArgs,
-) -> RResult<FFI_Accumulator, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<FFI_Accumulator> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
+        let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
 
-    rresult!(udaf
-        .create_sliding_accumulator(accumulator_args.into())
-        .map(FFI_Accumulator::from))
+        rresult!(
+            udaf.create_sliding_accumulator(accumulator_args.into())
+                .map(FFI_Accumulator::from)
+        )
+    }
 }
 
 unsafe extern "C" fn create_groups_accumulator_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     args: FFI_AccumulatorArgs,
-) -> RResult<FFI_GroupsAccumulator, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<FFI_GroupsAccumulator> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
+        let accumulator_args = &rresult_return!(ForeignAccumulatorArgs::try_from(args));
 
-    rresult!(udaf
-        .create_groups_accumulator(accumulator_args.into())
-        .map(FFI_GroupsAccumulator::from))
+        rresult!(
+            udaf.create_groups_accumulator(accumulator_args.into())
+                .map(FFI_GroupsAccumulator::from)
+        )
+    }
 }
 
 unsafe extern "C" fn groups_accumulator_supported_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     args: FFI_AccumulatorArgs,
 ) -> bool {
-    let udaf = udaf.inner();
-
-    ForeignAccumulatorArgs::try_from(args)
-        .map(|a| udaf.groups_accumulator_supported((&a).into()))
-        .unwrap_or_else(|e| {
-            log::warn!("Unable to parse accumulator args. {e}");
-            false
-        })
+    unsafe {
+        let udaf = udaf.inner();
+
+        ForeignAccumulatorArgs::try_from(args)
+            .map(|a| udaf.groups_accumulator_supported((&a).into()))
+            .unwrap_or_else(|e| {
+                log::warn!("Unable to parse accumulator args. {e}");
+                false
+            })
+    }
 }
 
 unsafe extern "C" fn with_beneficial_ordering_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     beneficial_ordering: bool,
-) -> RResult<ROption<FFI_AggregateUDF>, RString> {
-    let udaf = udaf.inner().as_ref().clone();
+) -> FFIResult<ROption<FFI_AggregateUDF>> {
+    unsafe {
+        let udaf = udaf.inner().as_ref().clone();
 
-    let result = rresult_return!(udaf.with_beneficial_ordering(beneficial_ordering));
-    let result = rresult_return!(result
-        .map(|func| func.with_beneficial_ordering(beneficial_ordering))
-        .transpose())
-    .flatten()
-    .map(|func| FFI_AggregateUDF::from(Arc::new(func)));
+        let result = rresult_return!(udaf.with_beneficial_ordering(beneficial_ordering));
+        let result = rresult_return!(
+            result
+                .map(|func| func.with_beneficial_ordering(beneficial_ordering))
+                .transpose()
+        )
+        .flatten()
+        .map(|func| FFI_AggregateUDF::from(Arc::new(func)));
 
-    RResult::ROk(result.into())
+        RResult::ROk(result.into())
+    }
 }
 
 unsafe extern "C" fn state_fields_fn_wrapper(
@@ -255,78 +273,94 @@ unsafe extern "C" fn state_fields_fn_wrapper(
     return_field: WrappedSchema,
     ordering_fields: RVec<RVec<u8>>,
     is_distinct: bool,
-) -> RResult<RVec<RVec<u8>>, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<RVec<RVec<u8>>> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let input_fields = &rresult_return!(rvec_wrapped_to_vec_fieldref(&input_fields));
-    let return_field = rresult_return!(Field::try_from(&return_field.0)).into();
+        let input_fields = &rresult_return!(rvec_wrapped_to_vec_fieldref(&input_fields));
+        let return_field = rresult_return!(Field::try_from(&return_field.0)).into();
 
-    let ordering_fields = &rresult_return!(ordering_fields
-        .into_iter()
-        .map(|field_bytes| datafusion_proto_common::Field::decode(field_bytes.as_ref()))
-        .collect::<std::result::Result<Vec<_>, DecodeError>>());
+        let ordering_fields = &rresult_return!(
+            ordering_fields
+                .into_iter()
+                .map(|field_bytes| datafusion_proto_common::Field::decode(
+                    field_bytes.as_ref()
+                ))
+                .collect::<std::result::Result<Vec<_>, DecodeError>>()
+        );
 
-    let ordering_fields = &rresult_return!(parse_proto_fields_to_fields(ordering_fields))
+        let ordering_fields =
+            &rresult_return!(parse_proto_fields_to_fields(ordering_fields))
+                .into_iter()
+                .map(Arc::new)
+                .collect::<Vec<_>>();
+
+        let args = StateFieldsArgs {
+            name: name.as_str(),
+            input_fields,
+            return_field,
+            ordering_fields,
+            is_distinct,
+        };
+
+        let state_fields = rresult_return!(udaf.state_fields(args));
+        let state_fields = rresult_return!(
+            state_fields
+                .iter()
+                .map(|f| f.as_ref())
+                .map(datafusion_proto::protobuf::Field::try_from)
+                .map(|v| v.map_err(DataFusionError::from))
+                .collect::<Result<Vec<_>>>()
+        )
         .into_iter()
-        .map(Arc::new)
-        .collect::<Vec<_>>();
-
-    let args = StateFieldsArgs {
-        name: name.as_str(),
-        input_fields,
-        return_field,
-        ordering_fields,
-        is_distinct,
-    };
-
-    let state_fields = rresult_return!(udaf.state_fields(args));
-    let state_fields = rresult_return!(state_fields
-        .iter()
-        .map(|f| f.as_ref())
-        .map(datafusion_proto::protobuf::Field::try_from)
-        .map(|v| v.map_err(DataFusionError::from))
-        .collect::<Result<Vec<_>>>())
-    .into_iter()
-    .map(|field| field.encode_to_vec().into())
-    .collect();
-
-    RResult::ROk(state_fields)
+        .map(|field| field.encode_to_vec().into())
+        .collect();
+
+        RResult::ROk(state_fields)
+    }
 }
 
 unsafe extern "C" fn order_sensitivity_fn_wrapper(
     udaf: &FFI_AggregateUDF,
 ) -> FFI_AggregateOrderSensitivity {
-    udaf.inner().order_sensitivity().into()
+    unsafe { udaf.inner().order_sensitivity().into() }
 }
 
 unsafe extern "C" fn coerce_types_fn_wrapper(
     udaf: &FFI_AggregateUDF,
     arg_types: RVec<WrappedSchema>,
-) -> RResult<RVec<WrappedSchema>, RString> {
-    let udaf = udaf.inner();
+) -> FFIResult<RVec<WrappedSchema>> {
+    unsafe {
+        let udaf = udaf.inner();
 
-    let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types));
+        let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types));
 
-    let arg_fields = arg_types
-        .iter()
-        .map(|dt| Field::new("f", dt.clone(), true))
-        .map(Arc::new)
-        .collect::<Vec<_>>();
-    let return_types = rresult_return!(fields_with_aggregate_udf(&arg_fields, udaf))
-        .into_iter()
-        .map(|f| f.data_type().to_owned())
-        .collect::<Vec<_>>();
+        let arg_fields = arg_types
+            .iter()
+            .map(|dt| Field::new("f", dt.clone(), true))
+            .map(Arc::new)
+            .collect::<Vec<_>>();
+        let return_types = rresult_return!(fields_with_aggregate_udf(&arg_fields, udaf))
+            .into_iter()
+            .map(|f| f.data_type().to_owned())
+            .collect::<Vec<_>>();
 
-    rresult!(vec_datatype_to_rvec_wrapped(&return_types))
+        rresult!(vec_datatype_to_rvec_wrapped(&return_types))
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(udaf: &mut FFI_AggregateUDF) {
-    let private_data = Box::from_raw(udaf.private_data as *mut AggregateUDFPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!udaf.private_data.is_null());
+        let private_data =
+            Box::from_raw(udaf.private_data as *mut AggregateUDFPrivateData);
+        drop(private_data);
+        udaf.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(udaf: &FFI_AggregateUDF) -> FFI_AggregateUDF {
-    Arc::clone(udaf.inner()).into()
+    unsafe { Arc::clone(udaf.inner()).into() }
 }
 
 impl Clone for FFI_AggregateUDF {
@@ -361,6 +395,7 @@ impl From<Arc<AggregateUDF>> for FFI_AggregateUDF {
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -400,14 +435,16 @@ impl Hash for ForeignAggregateUDF {
     }
 }
 
-impl TryFrom<&FFI_AggregateUDF> for ForeignAggregateUDF {
-    type Error = DataFusionError;
+impl From<&FFI_AggregateUDF> for Arc<dyn AggregateUDFImpl> {
+    fn from(udaf: &FFI_AggregateUDF) -> Self {
+        if (udaf.library_marker_id)() == crate::get_library_marker_id() {
+            return Arc::clone(unsafe { udaf.inner().inner() });
+        }
 
-    fn try_from(udaf: &FFI_AggregateUDF) -> Result<Self, Self::Error> {
         let signature = Signature::user_defined((&udaf.volatility).into());
         let aliases = udaf.aliases.iter().map(|s| s.to_string()).collect();
 
-        Ok(Self {
+        Arc::new(ForeignAggregateUDF {
             udaf: udaf.clone(),
             signature,
             aliases,
@@ -453,9 +490,8 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         let args = acc_args.try_into()?;
         unsafe {
-            df_result!((self.udaf.accumulator)(&self.udaf, args)).map(|accum| {
-                Box::new(ForeignAccumulator::from(accum)) as Box<dyn Accumulator>
-            })
+            df_result!((self.udaf.accumulator)(&self.udaf, args))
+                .map(<Box<dyn Accumulator>>::from)
         }
     }
 
@@ -488,13 +524,13 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
                 .into_iter()
                 .map(|field_bytes| {
                     datafusion_proto_common::Field::decode(field_bytes.as_ref())
-                        .map_err(|e| exec_datafusion_err!("{e}"))
+                        .map_err(|e| ffi_datafusion_err!("{e}"))
                 })
                 .collect::<Result<Vec<_>>>()?;
 
             parse_proto_fields_to_fields(fields.iter())
                 .map(|fields| fields.into_iter().map(Arc::new).collect())
-                .map_err(|e| exec_datafusion_err!("{e}"))
+                .map_err(|e| ffi_datafusion_err!("{e}"))
         }
     }
 
@@ -517,12 +553,8 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
         let args = FFI_AccumulatorArgs::try_from(args)?;
 
         unsafe {
-            df_result!((self.udaf.create_groups_accumulator)(&self.udaf, args)).map(
-                |accum| {
-                    Box::new(ForeignGroupsAccumulator::from(accum))
-                        as Box<dyn GroupsAccumulator>
-                },
-            )
+            df_result!((self.udaf.create_groups_accumulator)(&self.udaf, args))
+                .map(<Box<dyn GroupsAccumulator>>::from)
         }
     }
 
@@ -536,9 +568,8 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
     ) -> Result<Box<dyn Accumulator>> {
         let args = args.try_into()?;
         unsafe {
-            df_result!((self.udaf.create_sliding_accumulator)(&self.udaf, args)).map(
-                |accum| Box::new(ForeignAccumulator::from(accum)) as Box<dyn Accumulator>,
-            )
+            df_result!((self.udaf.create_sliding_accumulator)(&self.udaf, args))
+                .map(<Box<dyn Accumulator>>::from)
         }
     }
 
@@ -553,11 +584,9 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
             ))?
             .into_option();
 
-            let result = result
-                .map(|func| ForeignAggregateUDF::try_from(&func))
-                .transpose()?;
+            let result = result.map(|func| <Arc<dyn AggregateUDFImpl>>::from(&func));
 
-            Ok(result.map(|func| Arc::new(func) as Arc<dyn AggregateUDFImpl>))
+            Ok(result)
         }
     }
 
@@ -581,7 +610,6 @@ impl AggregateUDFImpl for ForeignAggregateUDF {
 
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub enum FFI_AggregateOrderSensitivity {
     Insensitive,
     HardRequirement,
@@ -613,15 +641,16 @@ impl From<AggregateOrderSensitivity> for FFI_AggregateOrderSensitivity {
 
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::Schema;
-    use datafusion::{
-        common::create_array, functions_aggregate::sum::Sum,
-        physical_expr::PhysicalSortExpr, physical_plan::expressions::col,
-        scalar::ScalarValue,
-    };
     use std::any::Any;
     use std::collections::HashMap;
 
+    use arrow::datatypes::Schema;
+    use datafusion::common::create_array;
+    use datafusion::functions_aggregate::sum::Sum;
+    use datafusion::physical_expr::PhysicalSortExpr;
+    use datafusion::physical_plan::expressions::col;
+    use datafusion::scalar::ScalarValue;
+
     use super::*;
 
     #[derive(Default, Debug, Hash, Eq, PartialEq)]
@@ -661,10 +690,11 @@ mod tests {
     ) -> Result<AggregateUDF> {
         let original_udaf = Arc::new(AggregateUDF::from(original_udaf));
 
-        let local_udaf: FFI_AggregateUDF = Arc::clone(&original_udaf).into();
+        let mut local_udaf: FFI_AggregateUDF = Arc::clone(&original_udaf).into();
+        local_udaf.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_udaf: ForeignAggregateUDF = (&local_udaf).try_into()?;
-        Ok(foreign_udaf.into())
+        let foreign_udaf: Arc<dyn AggregateUDFImpl> = (&local_udaf).into();
+        Ok(AggregateUDF::new_from_shared_impl(foreign_udaf))
     }
 
     #[test]
@@ -674,11 +704,12 @@ mod tests {
         let original_udaf = Arc::new(AggregateUDF::from(original_udaf));
 
         // Convert to FFI format
-        let local_udaf: FFI_AggregateUDF = Arc::clone(&original_udaf).into();
+        let mut local_udaf: FFI_AggregateUDF = Arc::clone(&original_udaf).into();
+        local_udaf.library_marker_id = crate::mock_foreign_marker_id;
 
         // Convert back to native format
-        let foreign_udaf: ForeignAggregateUDF = (&local_udaf).try_into()?;
-        let foreign_udaf: AggregateUDF = foreign_udaf.into();
+        let foreign_udaf: Arc<dyn AggregateUDFImpl> = (&local_udaf).into();
+        let foreign_udaf = AggregateUDF::new_from_shared_impl(foreign_udaf);
 
         assert_eq!(original_name, foreign_udaf.name());
         Ok(())
@@ -731,8 +762,8 @@ mod tests {
         let local_udaf: FFI_AggregateUDF = Arc::clone(&original_udaf).into();
 
         // Convert back to native format
-        let foreign_udaf: ForeignAggregateUDF = (&local_udaf).try_into()?;
-        let foreign_udaf: AggregateUDF = foreign_udaf.into();
+        let foreign_udaf: Arc<dyn AggregateUDFImpl> = (&local_udaf).into();
+        let foreign_udaf = AggregateUDF::new_from_shared_impl(foreign_udaf);
 
         let metadata: HashMap<String, String> =
             [("a_key".to_string(), "a_value".to_string())]
@@ -815,4 +846,28 @@ mod tests {
         test_round_trip_order_sensitivity(AggregateOrderSensitivity::SoftRequirement);
         test_round_trip_order_sensitivity(AggregateOrderSensitivity::Beneficial);
     }
+
+    #[test]
+    fn test_ffi_udaf_local_bypass() -> Result<()> {
+        let original_udaf = Sum::new();
+        let original_udaf = Arc::new(AggregateUDF::from(original_udaf));
+
+        let mut ffi_udaf = FFI_AggregateUDF::from(original_udaf);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_udaf: Arc<dyn AggregateUDFImpl> = (&ffi_udaf).into();
+        assert!(foreign_udaf.as_any().downcast_ref::<Sum>().is_some());
+
+        // Verify different library markers generate foreign providers
+        ffi_udaf.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_udaf: Arc<dyn AggregateUDFImpl> = (&ffi_udaf).into();
+        assert!(
+            foreign_udaf
+                .as_any()
+                .downcast_ref::<ForeignAggregateUDF>()
+                .is_some()
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs
index 5e59cfc5ecb07..d7da050b35efa 100644
--- a/datafusion/ffi/src/udf/mod.rs
+++ b/datafusion/ffi/src/udf/mod.rs
@@ -15,47 +15,40 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{
-    arrow_wrappers::{WrappedArray, WrappedSchema},
-    df_result, rresult, rresult_return,
-    util::{rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped},
-    volatility::FFI_Volatility,
-};
-use abi_stable::{
-    std_types::{RResult, RString, RVec},
-    StableAbi,
-};
+use std::ffi::c_void;
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RString, RVec};
+use arrow::array::ArrayRef;
 use arrow::datatypes::{DataType, Field};
-use arrow::{
-    array::ArrayRef,
-    error::ArrowError,
-    ffi::{from_ffi, to_ffi, FFI_ArrowSchema},
-};
+use arrow::error::ArrowError;
+use arrow::ffi::{FFI_ArrowSchema, from_ffi, to_ffi};
 use arrow_schema::FieldRef;
-use datafusion::config::ConfigOptions;
-use datafusion::logical_expr::ReturnFieldArgs;
-use datafusion::{
-    error::DataFusionError,
-    logical_expr::type_coercion::functions::data_types_with_scalar_udf,
-};
-use datafusion::{
-    error::Result,
-    logical_expr::{
-        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
-    },
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::{DataFusionError, Result, internal_err};
+use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
+use datafusion_expr::{
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
+    Signature,
 };
 use return_type_args::{
     FFI_ReturnFieldArgs, ForeignReturnFieldArgs, ForeignReturnFieldArgsOwned,
 };
-use std::hash::{Hash, Hasher};
-use std::{ffi::c_void, sync::Arc};
+
+use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
+use crate::util::{
+    FFIResult, rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped,
+};
+use crate::volatility::FFI_Volatility;
+use crate::{df_result, rresult, rresult_return};
 
 pub mod return_type_args;
 
 /// A stable struct for sharing a [`ScalarUDF`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_ScalarUDF {
     /// FFI equivalent to the `name` of a [`ScalarUDF`]
     pub name: RString,
@@ -66,31 +59,21 @@ pub struct FFI_ScalarUDF {
     /// FFI equivalent to the `volatility` of a [`ScalarUDF`]
     pub volatility: FFI_Volatility,
 
-    /// Determines the return type of the underlying [`ScalarUDF`] based on the
-    /// argument types.
-    pub return_type: unsafe extern "C" fn(
-        udf: &Self,
-        arg_types: RVec<WrappedSchema>,
-    ) -> RResult<WrappedSchema, RString>,
-
-    /// Determines the return info of the underlying [`ScalarUDF`]. Either this
-    /// or return_type may be implemented on a UDF.
+    /// Determines the return info of the underlying [`ScalarUDF`].
     pub return_field_from_args: unsafe extern "C" fn(
         udf: &Self,
         args: FFI_ReturnFieldArgs,
-    )
-        -> RResult<WrappedSchema, RString>,
+    ) -> FFIResult<WrappedSchema>,
 
     /// Execute the underlying [`ScalarUDF`] and return the result as a `FFI_ArrowArray`
     /// within an AbiStable wrapper.
-    #[allow(clippy::type_complexity)]
     pub invoke_with_args: unsafe extern "C" fn(
         udf: &Self,
         args: RVec<WrappedArray>,
         arg_fields: RVec<WrappedSchema>,
         num_rows: usize,
         return_field: WrappedSchema,
-    ) -> RResult<WrappedArray, RString>,
+    ) -> FFIResult<WrappedArray>,
 
     /// See [`ScalarUDFImpl`] for details on short_circuits
     pub short_circuits: bool,
@@ -102,7 +85,7 @@ pub struct FFI_ScalarUDF {
     pub coerce_types: unsafe extern "C" fn(
         udf: &Self,
         arg_types: RVec<WrappedSchema>,
-    ) -> RResult<RVec<WrappedSchema>, RString>,
+    ) -> FFIResult<RVec<WrappedSchema>>,
 
     /// Used to create a clone on the provider of the udf. This should
     /// only need to be called by the receiver of the udf.
@@ -114,6 +97,11 @@ pub struct FFI_ScalarUDF {
     /// Internal data. This is only to be accessed by the provider of the udf.
     /// A [`ForeignScalarUDF`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_ScalarUDF {}
@@ -123,34 +111,22 @@ pub struct ScalarUDFPrivateData {
     pub udf: Arc<ScalarUDF>,
 }
 
-unsafe extern "C" fn return_type_fn_wrapper(
-    udf: &FFI_ScalarUDF,
-    arg_types: RVec<WrappedSchema>,
-) -> RResult<WrappedSchema, RString> {
-    let private_data = udf.private_data as *const ScalarUDFPrivateData;
-    let udf = &(*private_data).udf;
-
-    let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types));
-
-    let return_type = udf
-        .return_type(&arg_types)
-        .and_then(|v| FFI_ArrowSchema::try_from(v).map_err(DataFusionError::from))
-        .map(WrappedSchema);
-
-    rresult!(return_type)
+impl FFI_ScalarUDF {
+    fn inner(&self) -> &Arc<ScalarUDF> {
+        let private_data = self.private_data as *const ScalarUDFPrivateData;
+        unsafe { &(*private_data).udf }
+    }
 }
 
 unsafe extern "C" fn return_field_from_args_fn_wrapper(
     udf: &FFI_ScalarUDF,
     args: FFI_ReturnFieldArgs,
-) -> RResult<WrappedSchema, RString> {
-    let private_data = udf.private_data as *const ScalarUDFPrivateData;
-    let udf = &(*private_data).udf;
-
+) -> FFIResult<WrappedSchema> {
     let args: ForeignReturnFieldArgsOwned = rresult_return!((&args).try_into());
     let args_ref: ForeignReturnFieldArgs = (&args).into();
 
     let return_type = udf
+        .inner()
         .return_field_from_args((&args_ref).into())
         .and_then(|f| FFI_ArrowSchema::try_from(&f).map_err(DataFusionError::from))
         .map(WrappedSchema);
@@ -161,13 +137,11 @@ unsafe extern "C" fn return_field_from_args_fn_wrapper(
 unsafe extern "C" fn coerce_types_fn_wrapper(
     udf: &FFI_ScalarUDF,
     arg_types: RVec<WrappedSchema>,
-) -> RResult<RVec<WrappedSchema>, RString> {
-    let private_data = udf.private_data as *const ScalarUDFPrivateData;
-    let udf = &(*private_data).udf;
-
+) -> FFIResult<RVec<WrappedSchema>> {
     let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types));
 
-    let return_types = rresult_return!(data_types_with_scalar_udf(&arg_types, udf));
+    let return_types =
+        rresult_return!(data_types_with_scalar_udf(&arg_types, udf.inner()));
 
     rresult!(vec_datatype_to_rvec_wrapped(&return_types))
 }
@@ -178,62 +152,69 @@ unsafe extern "C" fn invoke_with_args_fn_wrapper(
     arg_fields: RVec<WrappedSchema>,
     number_rows: usize,
     return_field: WrappedSchema,
-) -> RResult<WrappedArray, RString> {
-    let private_data = udf.private_data as *const ScalarUDFPrivateData;
-    let udf = &(*private_data).udf;
-
-    let args = args
-        .into_iter()
-        .map(|arr| {
-            from_ffi(arr.array, &arr.schema.0)
-                .map(|v| ColumnarValue::Array(arrow::array::make_array(v)))
-        })
-        .collect::<std::result::Result<_, _>>();
+) -> FFIResult<WrappedArray> {
+    unsafe {
+        let args = args
+            .into_iter()
+            .map(|arr| {
+                from_ffi(arr.array, &arr.schema.0)
+                    .map(|v| ColumnarValue::Array(arrow::array::make_array(v)))
+            })
+            .collect::<std::result::Result<_, _>>();
 
-    let args = rresult_return!(args);
-    let return_field = rresult_return!(Field::try_from(&return_field.0)).into();
+        let args = rresult_return!(args);
+        let return_field = rresult_return!(Field::try_from(&return_field.0)).into();
 
-    let arg_fields = arg_fields
-        .into_iter()
-        .map(|wrapped_field| {
-            Field::try_from(&wrapped_field.0)
-                .map(Arc::new)
-                .map_err(DataFusionError::from)
+        let arg_fields = arg_fields
+            .into_iter()
+            .map(|wrapped_field| {
+                Field::try_from(&wrapped_field.0)
+                    .map(Arc::new)
+                    .map_err(DataFusionError::from)
+            })
+            .collect::<Result<Vec<FieldRef>>>();
+        let arg_fields = rresult_return!(arg_fields);
+
+        let args = ScalarFunctionArgs {
+            args,
+            arg_fields,
+            number_rows,
+            return_field,
+            // TODO: pass config options: https://github.com/apache/datafusion/issues/17035
+            config_options: Arc::new(ConfigOptions::default()),
+        };
+
+        let result = rresult_return!(
+            udf.inner()
+                .invoke_with_args(args)
+                .and_then(|r| r.to_array(number_rows))
+        );
+
+        let (result_array, result_schema) = rresult_return!(to_ffi(&result.to_data()));
+
+        RResult::ROk(WrappedArray {
+            array: result_array,
+            schema: WrappedSchema(result_schema),
         })
-        .collect::<Result<Vec<FieldRef>>>();
-    let arg_fields = rresult_return!(arg_fields);
-
-    let args = ScalarFunctionArgs {
-        args,
-        arg_fields,
-        number_rows,
-        return_field,
-        // TODO: pass config options: https://github.com/apache/datafusion/issues/17035
-        config_options: Arc::new(ConfigOptions::default()),
-    };
-
-    let result = rresult_return!(udf
-        .invoke_with_args(args)
-        .and_then(|r| r.to_array(number_rows)));
-
-    let (result_array, result_schema) = rresult_return!(to_ffi(&result.to_data()));
-
-    RResult::ROk(WrappedArray {
-        array: result_array,
-        schema: WrappedSchema(result_schema),
-    })
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(udf: &mut FFI_ScalarUDF) {
-    let private_data = Box::from_raw(udf.private_data as *mut ScalarUDFPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!udf.private_data.is_null());
+        let private_data = Box::from_raw(udf.private_data as *mut ScalarUDFPrivateData);
+        drop(private_data);
+        udf.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(udf: &FFI_ScalarUDF) -> FFI_ScalarUDF {
-    let private_data = udf.private_data as *const ScalarUDFPrivateData;
-    let udf_data = &(*private_data);
+    unsafe {
+        let private_data = udf.private_data as *const ScalarUDFPrivateData;
+        let udf_data = &(*private_data);
 
-    Arc::clone(&udf_data.udf).into()
+        Arc::clone(&udf_data.udf).into()
+    }
 }
 
 impl Clone for FFI_ScalarUDF {
@@ -257,12 +238,12 @@ impl From<Arc<ScalarUDF>> for FFI_ScalarUDF {
             volatility,
             short_circuits,
             invoke_with_args: invoke_with_args_fn_wrapper,
-            return_type: return_type_fn_wrapper,
             return_field_from_args: return_field_from_args_fn_wrapper,
             coerce_types: coerce_types_fn_wrapper,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -321,21 +302,23 @@ impl Hash for ForeignScalarUDF {
     }
 }
 
-impl TryFrom<&FFI_ScalarUDF> for ForeignScalarUDF {
-    type Error = DataFusionError;
-
-    fn try_from(udf: &FFI_ScalarUDF) -> Result<Self, Self::Error> {
-        let name = udf.name.to_owned().into();
-        let signature = Signature::user_defined((&udf.volatility).into());
-
-        let aliases = udf.aliases.iter().map(|s| s.to_string()).collect();
-
-        Ok(Self {
-            name,
-            udf: udf.clone(),
-            aliases,
-            signature,
-        })
+impl From<&FFI_ScalarUDF> for Arc<dyn ScalarUDFImpl> {
+    fn from(udf: &FFI_ScalarUDF) -> Self {
+        if (udf.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(udf.inner().inner())
+        } else {
+            let name = udf.name.to_owned().into();
+            let signature = Signature::user_defined((&udf.volatility).into());
+
+            let aliases = udf.aliases.iter().map(|s| s.to_string()).collect();
+
+            Arc::new(ForeignScalarUDF {
+                name,
+                udf: udf.clone(),
+                aliases,
+                signature,
+            })
+        }
     }
 }
 
@@ -352,14 +335,8 @@ impl ScalarUDFImpl for ForeignScalarUDF {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let arg_types = vec_datatype_to_rvec_wrapped(arg_types)?;
-
-        let result = unsafe { (self.udf.return_type)(&self.udf, arg_types) };
-
-        let result = df_result!(result);
-
-        result.and_then(|r| (&r.0).try_into().map_err(DataFusionError::from))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("ForeignScalarUDF implements return_field_from_args instead.")
     }
 
     fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
@@ -455,12 +432,38 @@ mod tests {
         let original_udf = datafusion::functions::math::abs::AbsFunc::new();
         let original_udf = Arc::new(ScalarUDF::from(original_udf));
 
-        let local_udf: FFI_ScalarUDF = Arc::clone(&original_udf).into();
+        let mut local_udf: FFI_ScalarUDF = Arc::clone(&original_udf).into();
+        local_udf.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_udf: ForeignScalarUDF = (&local_udf).try_into()?;
+        let foreign_udf: Arc<dyn ScalarUDFImpl> = (&local_udf).into();
 
         assert_eq!(original_udf.name(), foreign_udf.name());
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_udf_local_bypass() -> Result<()> {
+        use datafusion::functions::math::abs::AbsFunc;
+        let original_udf = AbsFunc::new();
+        let original_udf = Arc::new(ScalarUDF::from(original_udf));
+
+        let mut ffi_udf = FFI_ScalarUDF::from(original_udf);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_udf: Arc<dyn ScalarUDFImpl> = (&ffi_udf).into();
+        assert!(foreign_udf.as_any().downcast_ref::<AbsFunc>().is_some());
+
+        // Verify different library markers generate foreign providers
+        ffi_udf.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_udf: Arc<dyn ScalarUDFImpl> = (&ffi_udf).into();
+        assert!(
+            foreign_udf
+                .as_any()
+                .downcast_ref::<ForeignScalarUDF>()
+                .is_some()
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udf/return_type_args.rs b/datafusion/ffi/src/udf/return_type_args.rs
index c437c9537be6f..8fb015b7ed922 100644
--- a/datafusion/ffi/src/udf/return_type_args.rs
+++ b/datafusion/ffi/src/udf/return_type_args.rs
@@ -15,24 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use abi_stable::{
-    std_types::{ROption, RVec},
-    StableAbi,
-};
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RVec};
 use arrow_schema::FieldRef;
-use datafusion::{
-    common::exec_datafusion_err, error::DataFusionError, logical_expr::ReturnFieldArgs,
-    scalar::ScalarValue,
-};
+use datafusion_common::scalar::ScalarValue;
+use datafusion_common::{DataFusionError, ffi_datafusion_err};
+use datafusion_expr::ReturnFieldArgs;
+use prost::Message;
 
 use crate::arrow_wrappers::WrappedSchema;
 use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped};
-use prost::Message;
 
 /// A stable struct for sharing a [`ReturnFieldArgs`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_ReturnFieldArgs {
     arg_fields: RVec<WrappedSchema>,
     scalar_arguments: RVec<ROption<RVec<u8>>>,
@@ -91,7 +87,7 @@ impl TryFrom<&FFI_ReturnFieldArgs> for ForeignReturnFieldArgsOwned {
                 let maybe_arg = maybe_arg.as_ref().map(|arg| {
                     let proto_value =
                         datafusion_proto::protobuf::ScalarValue::decode(arg.as_ref())
-                            .map_err(|err| exec_datafusion_err!("{}", err))?;
+                            .map_err(|err| ffi_datafusion_err!("{}", err))?;
                     let scalar_value: ScalarValue = (&proto_value).try_into()?;
                     Ok(scalar_value)
                 });
diff --git a/datafusion/ffi/src/udtf.rs b/datafusion/ffi/src/udtf.rs
index edd5273c70a80..6024ec755de58 100644
--- a/datafusion/ffi/src/udtf.rs
+++ b/datafusion/ffi/src/udtf.rs
@@ -15,43 +15,40 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, sync::Arc};
-
-use abi_stable::{
-    std_types::{RResult, RString, RVec},
-    StableAbi,
-};
-
-use datafusion::error::Result;
-use datafusion::{
-    catalog::{TableFunctionImpl, TableProvider},
-    prelude::{Expr, SessionContext},
-};
-use datafusion_proto::{
-    logical_plan::{
-        from_proto::parse_exprs, to_proto::serialize_exprs, DefaultLogicalExtensionCodec,
-    },
-    protobuf::LogicalExprList,
+use std::ffi::c_void;
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RVec};
+use datafusion_catalog::{TableFunctionImpl, TableProvider};
+use datafusion_common::error::Result;
+use datafusion_execution::TaskContext;
+use datafusion_expr::Expr;
+use datafusion_proto::logical_plan::from_proto::parse_exprs;
+use datafusion_proto::logical_plan::to_proto::serialize_exprs;
+use datafusion_proto::logical_plan::{
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec,
 };
+use datafusion_proto::protobuf::LogicalExprList;
 use prost::Message;
 use tokio::runtime::Handle;
 
-use crate::{
-    df_result, rresult_return,
-    table_provider::{FFI_TableProvider, ForeignTableProvider},
-};
+use crate::execution::FFI_TaskContextProvider;
+use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use crate::table_provider::FFI_TableProvider;
+use crate::util::FFIResult;
+use crate::{df_result, rresult_return};
 
 /// A stable struct for sharing a [`TableFunctionImpl`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_TableFunction {
     /// Equivalent to the `call` function of the TableFunctionImpl.
     /// The arguments are Expr passed as protobuf encoded bytes.
-    pub call: unsafe extern "C" fn(
-        udtf: &Self,
-        args: RVec<u8>,
-    ) -> RResult<FFI_TableProvider, RString>,
+    pub call:
+        unsafe extern "C" fn(udtf: &Self, args: RVec<u8>) -> FFIResult<FFI_TableProvider>,
+
+    pub logical_codec: FFI_LogicalExtensionCodec,
 
     /// Used to create a clone on the provider of the udtf. This should
     /// only need to be called by the receiver of the udtf.
@@ -63,6 +60,11 @@ pub struct FFI_TableFunction {
     /// Internal data. This is only to be accessed by the provider of the udtf.
     /// A [`ForeignTableFunction`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_TableFunction {}
@@ -88,32 +90,50 @@ impl FFI_TableFunction {
 unsafe extern "C" fn call_fn_wrapper(
     udtf: &FFI_TableFunction,
     args: RVec<u8>,
-) -> RResult<FFI_TableProvider, RString> {
+) -> FFIResult<FFI_TableProvider> {
     let runtime = udtf.runtime();
-    let udtf = udtf.inner();
+    let udtf_inner = udtf.inner();
 
-    let default_ctx = SessionContext::new();
-    let codec = DefaultLogicalExtensionCodec {};
+    let ctx: Arc<TaskContext> =
+        rresult_return!((&udtf.logical_codec.task_ctx_provider).try_into());
+    let codec: Arc<dyn LogicalExtensionCodec> = (&udtf.logical_codec).into();
 
     let proto_filters = rresult_return!(LogicalExprList::decode(args.as_ref()));
 
-    let args =
-        rresult_return!(parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec));
-
-    let table_provider = rresult_return!(udtf.call(&args));
-    RResult::ROk(FFI_TableProvider::new(table_provider, false, runtime))
+    let args = rresult_return!(parse_exprs(
+        proto_filters.expr.iter(),
+        ctx.as_ref(),
+        codec.as_ref()
+    ));
+
+    let table_provider = rresult_return!(udtf_inner.call(&args));
+    RResult::ROk(FFI_TableProvider::new_with_ffi_codec(
+        table_provider,
+        false,
+        runtime,
+        udtf.logical_codec.clone(),
+    ))
 }
 
 unsafe extern "C" fn release_fn_wrapper(udtf: &mut FFI_TableFunction) {
-    let private_data = Box::from_raw(udtf.private_data as *mut TableFunctionPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!udtf.private_data.is_null());
+        let private_data =
+            Box::from_raw(udtf.private_data as *mut TableFunctionPrivateData);
+        drop(private_data);
+        udtf.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(udtf: &FFI_TableFunction) -> FFI_TableFunction {
     let runtime = udtf.runtime();
-    let udtf = udtf.inner();
+    let udtf_inner = udtf.inner();
 
-    FFI_TableFunction::new(Arc::clone(udtf), runtime)
+    FFI_TableFunction::new_with_ffi_codec(
+        Arc::clone(udtf_inner),
+        runtime,
+        udtf.logical_codec.clone(),
+    )
 }
 
 impl Clone for FFI_TableFunction {
@@ -123,30 +143,38 @@ impl Clone for FFI_TableFunction {
 }
 
 impl FFI_TableFunction {
-    pub fn new(udtf: Arc<dyn TableFunctionImpl>, runtime: Option<Handle>) -> Self {
-        let private_data = Box::new(TableFunctionPrivateData { udtf, runtime });
-
-        Self {
-            call: call_fn_wrapper,
-            clone: clone_fn_wrapper,
-            release: release_fn_wrapper,
-            private_data: Box::into_raw(private_data) as *mut c_void,
-        }
+    pub fn new(
+        udtf: Arc<dyn TableFunctionImpl>,
+        runtime: Option<Handle>,
+        task_ctx_provider: impl Into<FFI_TaskContextProvider>,
+        logical_codec: Option<Arc<dyn LogicalExtensionCodec>>,
+    ) -> Self {
+        let task_ctx_provider = task_ctx_provider.into();
+        let logical_codec =
+            logical_codec.unwrap_or_else(|| Arc::new(DefaultLogicalExtensionCodec {}));
+        let logical_codec = FFI_LogicalExtensionCodec::new(
+            logical_codec,
+            runtime.clone(),
+            task_ctx_provider.clone(),
+        );
+
+        Self::new_with_ffi_codec(udtf, runtime, logical_codec)
     }
-}
 
-impl From<Arc<dyn TableFunctionImpl>> for FFI_TableFunction {
-    fn from(udtf: Arc<dyn TableFunctionImpl>) -> Self {
-        let private_data = Box::new(TableFunctionPrivateData {
-            udtf,
-            runtime: None,
-        });
+    pub fn new_with_ffi_codec(
+        udtf: Arc<dyn TableFunctionImpl>,
+        runtime: Option<Handle>,
+        logical_codec: FFI_LogicalExtensionCodec,
+    ) -> Self {
+        let private_data = Box::new(TableFunctionPrivateData { udtf, runtime });
 
         Self {
             call: call_fn_wrapper,
+            logical_codec,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -169,40 +197,45 @@ pub struct ForeignTableFunction(FFI_TableFunction);
 unsafe impl Send for ForeignTableFunction {}
 unsafe impl Sync for ForeignTableFunction {}
 
-impl From<FFI_TableFunction> for ForeignTableFunction {
+impl From<FFI_TableFunction> for Arc<dyn TableFunctionImpl> {
     fn from(value: FFI_TableFunction) -> Self {
-        Self(value)
+        if (value.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(value.inner())
+        } else {
+            Arc::new(ForeignTableFunction(value))
+        }
     }
 }
 
 impl TableFunctionImpl for ForeignTableFunction {
     fn call(&self, args: &[Expr]) -> Result<Arc<dyn TableProvider>> {
-        let codec = DefaultLogicalExtensionCodec {};
+        let codec: Arc<dyn LogicalExtensionCodec> = (&self.0.logical_codec).into();
         let expr_list = LogicalExprList {
-            expr: serialize_exprs(args, &codec)?,
+            expr: serialize_exprs(args, codec.as_ref())?,
         };
         let filters_serialized = expr_list.encode_to_vec().into();
 
         let table_provider = unsafe { (self.0.call)(&self.0, filters_serialized) };
 
         let table_provider = df_result!(table_provider)?;
-        let table_provider: ForeignTableProvider = (&table_provider).into();
+        let table_provider: Arc<dyn TableProvider> = (&table_provider).into();
 
-        Ok(Arc::new(table_provider))
+        Ok(table_provider)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use arrow::{
-        array::{
-            record_batch, ArrayRef, Float64Array, RecordBatch, StringArray, UInt64Array,
-        },
-        datatypes::{DataType, Field, Schema},
-    };
-    use datafusion::{
-        catalog::MemTable, common::exec_err, prelude::lit, scalar::ScalarValue,
+    use arrow::array::{
+        ArrayRef, Float64Array, RecordBatch, StringArray, UInt64Array, record_batch,
     };
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion::catalog::MemTable;
+    use datafusion::common::exec_err;
+    use datafusion::logical_expr::ptr_eq::arc_ptr_eq;
+    use datafusion::prelude::{SessionContext, lit};
+    use datafusion::scalar::ScalarValue;
+    use datafusion_execution::TaskContextProvider;
 
     use super::*;
 
@@ -287,15 +320,22 @@ mod tests {
     #[tokio::test]
     async fn test_round_trip_udtf() -> Result<()> {
         let original_udtf = Arc::new(TestUDTF {}) as Arc<dyn TableFunctionImpl>;
+        let ctx = Arc::new(SessionContext::default());
+        let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
 
-        let local_udtf: FFI_TableFunction =
-            FFI_TableFunction::new(Arc::clone(&original_udtf), None);
+        let mut local_udtf: FFI_TableFunction = FFI_TableFunction::new(
+            Arc::clone(&original_udtf),
+            None,
+            task_ctx_provider,
+            None,
+        );
+        local_udtf.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_udf: ForeignTableFunction = local_udtf.into();
+        let foreign_udf: Arc<dyn TableFunctionImpl> = local_udtf.into();
 
         let table = foreign_udf.call(&[lit(6_u64), lit("one"), lit(2.0), lit(3_u64)])?;
 
-        let ctx = SessionContext::default();
         let _ = ctx.register_table("test-table", table)?;
 
         let returned_batches = ctx.table("test-table").await?.collect().await?;
@@ -317,4 +357,29 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_udtf_local_bypass() -> Result<()> {
+        let original_udtf = Arc::new(TestUDTF {}) as Arc<dyn TableFunctionImpl>;
+
+        let ctx = Arc::new(SessionContext::default()) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&ctx);
+        let mut ffi_udtf = FFI_TableFunction::new(
+            Arc::clone(&original_udtf),
+            None,
+            task_ctx_provider,
+            None,
+        );
+
+        // Verify local libraries can be downcast to their original
+        let foreign_udtf: Arc<dyn TableFunctionImpl> = ffi_udtf.clone().into();
+        assert!(arc_ptr_eq(&original_udtf, &foreign_udtf));
+
+        // Verify different library markers generate foreign providers
+        ffi_udtf.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_udtf: Arc<dyn TableFunctionImpl> = ffi_udtf.into();
+        assert!(!arc_ptr_eq(&original_udtf, &foreign_udtf));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs
index 9f56e2d4788b7..53aa6c34eba42 100644
--- a/datafusion/ffi/src/udwf/mod.rs
+++ b/datafusion/ffi/src/udwf/mod.rs
@@ -15,53 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use abi_stable::{
-    std_types::{ROption, RResult, RString, RVec},
-    StableAbi,
-};
-use arrow::datatypes::Schema;
-use arrow::{
-    compute::SortOptions,
-    datatypes::{DataType, SchemaRef},
-};
+use std::ffi::c_void;
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{ROption, RResult, RString, RVec};
+use arrow::compute::SortOptions;
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use arrow_schema::{Field, FieldRef};
-use datafusion::logical_expr::LimitEffect;
-use datafusion::physical_expr::PhysicalExpr;
-use datafusion::{
-    error::DataFusionError,
-    logical_expr::{
-        function::WindowUDFFieldArgs, type_coercion::functions::fields_with_window_udf,
-        PartitionEvaluator,
-    },
-};
-use datafusion::{
-    error::Result,
-    logical_expr::{Signature, WindowUDF, WindowUDFImpl},
+use datafusion_common::{Result, ffi_err};
+use datafusion_expr::function::WindowUDFFieldArgs;
+use datafusion_expr::type_coercion::functions::fields_with_window_udf;
+use datafusion_expr::{
+    LimitEffect, PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl,
 };
-use datafusion_common::exec_err;
-use partition_evaluator::{FFI_PartitionEvaluator, ForeignPartitionEvaluator};
+use datafusion_physical_expr::PhysicalExpr;
+use partition_evaluator::FFI_PartitionEvaluator;
 use partition_evaluator_args::{
     FFI_PartitionEvaluatorArgs, ForeignPartitionEvaluatorArgs,
 };
-use std::hash::{Hash, Hasher};
-use std::{ffi::c_void, sync::Arc};
 
 mod partition_evaluator;
 mod partition_evaluator_args;
 mod range;
 
-use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped};
-use crate::{
-    arrow_wrappers::WrappedSchema,
-    df_result, rresult, rresult_return,
-    util::{rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped},
-    volatility::FFI_Volatility,
+use crate::arrow_wrappers::WrappedSchema;
+use crate::util::{
+    FFIResult, rvec_wrapped_to_vec_datatype, rvec_wrapped_to_vec_fieldref,
+    vec_datatype_to_rvec_wrapped, vec_fieldref_to_rvec_wrapped,
 };
+use crate::volatility::FFI_Volatility;
+use crate::{df_result, rresult, rresult_return};
 
 /// A stable struct for sharing a [`WindowUDF`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_WindowUDF {
     /// FFI equivalent to the `name` of a [`WindowUDF`]
     pub name: RString,
@@ -72,17 +61,17 @@ pub struct FFI_WindowUDF {
     /// FFI equivalent to the `volatility` of a [`WindowUDF`]
     pub volatility: FFI_Volatility,
 
-    pub partition_evaluator:
-        unsafe extern "C" fn(
-            udwf: &Self,
-            args: FFI_PartitionEvaluatorArgs,
-        ) -> RResult<FFI_PartitionEvaluator, RString>,
+    pub partition_evaluator: unsafe extern "C" fn(
+        udwf: &Self,
+        args: FFI_PartitionEvaluatorArgs,
+    )
+        -> FFIResult<FFI_PartitionEvaluator>,
 
     pub field: unsafe extern "C" fn(
         udwf: &Self,
         input_types: RVec<WrappedSchema>,
         display_name: RString,
-    ) -> RResult<WrappedSchema, RString>,
+    ) -> FFIResult<WrappedSchema>,
 
     /// Performs type coercion. To simply this interface, all UDFs are treated as having
     /// user defined signatures, which will in turn call coerce_types to be called. This
@@ -91,7 +80,7 @@ pub struct FFI_WindowUDF {
     pub coerce_types: unsafe extern "C" fn(
         udf: &Self,
         arg_types: RVec<WrappedSchema>,
-    ) -> RResult<RVec<WrappedSchema>, RString>,
+    ) -> FFIResult<RVec<WrappedSchema>>,
 
     pub sort_options: ROption<FFI_SortOptions>,
 
@@ -105,6 +94,11 @@ pub struct FFI_WindowUDF {
     /// Internal data. This is only to be accessed by the provider of the udf.
     /// A [`ForeignWindowUDF`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_WindowUDF {}
@@ -116,91 +110,107 @@ pub struct WindowUDFPrivateData {
 
 impl FFI_WindowUDF {
     unsafe fn inner(&self) -> &Arc<WindowUDF> {
-        let private_data = self.private_data as *const WindowUDFPrivateData;
-        &(*private_data).udf
+        unsafe {
+            let private_data = self.private_data as *const WindowUDFPrivateData;
+            &(*private_data).udf
+        }
     }
 }
 
 unsafe extern "C" fn partition_evaluator_fn_wrapper(
     udwf: &FFI_WindowUDF,
     args: FFI_PartitionEvaluatorArgs,
-) -> RResult<FFI_PartitionEvaluator, RString> {
-    let inner = udwf.inner();
+) -> FFIResult<FFI_PartitionEvaluator> {
+    unsafe {
+        let inner = udwf.inner();
 
-    let args = rresult_return!(ForeignPartitionEvaluatorArgs::try_from(args));
+        let args = rresult_return!(ForeignPartitionEvaluatorArgs::try_from(args));
 
-    let evaluator = rresult_return!(inner.partition_evaluator_factory((&args).into()));
+        let evaluator =
+            rresult_return!(inner.partition_evaluator_factory((&args).into()));
 
-    RResult::ROk(evaluator.into())
+        RResult::ROk(evaluator.into())
+    }
 }
 
 unsafe extern "C" fn field_fn_wrapper(
     udwf: &FFI_WindowUDF,
     input_fields: RVec<WrappedSchema>,
     display_name: RString,
-) -> RResult<WrappedSchema, RString> {
-    let inner = udwf.inner();
+) -> FFIResult<WrappedSchema> {
+    unsafe {
+        let inner = udwf.inner();
 
-    let input_fields = rresult_return!(rvec_wrapped_to_vec_fieldref(&input_fields));
+        let input_fields = rresult_return!(rvec_wrapped_to_vec_fieldref(&input_fields));
 
-    let field = rresult_return!(inner.field(WindowUDFFieldArgs::new(
-        &input_fields,
-        display_name.as_str()
-    )));
+        let field = rresult_return!(inner.field(WindowUDFFieldArgs::new(
+            &input_fields,
+            display_name.as_str()
+        )));
 
-    let schema = Arc::new(Schema::new(vec![field]));
+        let schema = Arc::new(Schema::new(vec![field]));
 
-    RResult::ROk(WrappedSchema::from(schema))
+        RResult::ROk(WrappedSchema::from(schema))
+    }
 }
 
 unsafe extern "C" fn coerce_types_fn_wrapper(
     udwf: &FFI_WindowUDF,
     arg_types: RVec<WrappedSchema>,
-) -> RResult<RVec<WrappedSchema>, RString> {
-    let inner = udwf.inner();
-
-    let arg_fields = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types))
-        .into_iter()
-        .map(|dt| Field::new("f", dt, false))
-        .map(Arc::new)
-        .collect::<Vec<_>>();
-
-    let return_fields = rresult_return!(fields_with_window_udf(&arg_fields, inner));
-    let return_types = return_fields
-        .into_iter()
-        .map(|f| f.data_type().to_owned())
-        .collect::<Vec<_>>();
-
-    rresult!(vec_datatype_to_rvec_wrapped(&return_types))
+) -> FFIResult<RVec<WrappedSchema>> {
+    unsafe {
+        let inner = udwf.inner();
+
+        let arg_fields = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types))
+            .into_iter()
+            .map(|dt| Field::new("f", dt, false))
+            .map(Arc::new)
+            .collect::<Vec<_>>();
+
+        let return_fields = rresult_return!(fields_with_window_udf(&arg_fields, inner));
+        let return_types = return_fields
+            .into_iter()
+            .map(|f| f.data_type().to_owned())
+            .collect::<Vec<_>>();
+
+        rresult!(vec_datatype_to_rvec_wrapped(&return_types))
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(udwf: &mut FFI_WindowUDF) {
-    let private_data = Box::from_raw(udwf.private_data as *mut WindowUDFPrivateData);
-    drop(private_data);
+    unsafe {
+        debug_assert!(!udwf.private_data.is_null());
+        let private_data = Box::from_raw(udwf.private_data as *mut WindowUDFPrivateData);
+        drop(private_data);
+        udwf.private_data = std::ptr::null_mut();
+    }
 }
 
 unsafe extern "C" fn clone_fn_wrapper(udwf: &FFI_WindowUDF) -> FFI_WindowUDF {
-    // let private_data = udf.private_data as *const WindowUDFPrivateData;
-    // let udf_data = &(*private_data);
-
-    // let private_data = Box::new(WindowUDFPrivateData {
-    //     udf: Arc::clone(&udf_data.udf),
-    // });
-    let private_data = Box::new(WindowUDFPrivateData {
-        udf: Arc::clone(udwf.inner()),
-    });
-
-    FFI_WindowUDF {
-        name: udwf.name.clone(),
-        aliases: udwf.aliases.clone(),
-        volatility: udwf.volatility.clone(),
-        partition_evaluator: partition_evaluator_fn_wrapper,
-        sort_options: udwf.sort_options.clone(),
-        coerce_types: coerce_types_fn_wrapper,
-        field: field_fn_wrapper,
-        clone: clone_fn_wrapper,
-        release: release_fn_wrapper,
-        private_data: Box::into_raw(private_data) as *mut c_void,
+    unsafe {
+        // let private_data = udf.private_data as *const WindowUDFPrivateData;
+        // let udf_data = &(*private_data);
+
+        // let private_data = Box::new(WindowUDFPrivateData {
+        //     udf: Arc::clone(&udf_data.udf),
+        // });
+        let private_data = Box::new(WindowUDFPrivateData {
+            udf: Arc::clone(udwf.inner()),
+        });
+
+        FFI_WindowUDF {
+            name: udwf.name.clone(),
+            aliases: udwf.aliases.clone(),
+            volatility: udwf.volatility.clone(),
+            partition_evaluator: partition_evaluator_fn_wrapper,
+            sort_options: udwf.sort_options.clone(),
+            coerce_types: coerce_types_fn_wrapper,
+            field: field_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
+        }
     }
 }
 
@@ -230,6 +240,7 @@ impl From<Arc<WindowUDF>> for FFI_WindowUDF {
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
             private_data: Box::into_raw(private_data) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -270,21 +281,23 @@ impl Hash for ForeignWindowUDF {
     }
 }
 
-impl TryFrom<&FFI_WindowUDF> for ForeignWindowUDF {
-    type Error = DataFusionError;
-
-    fn try_from(udf: &FFI_WindowUDF) -> Result<Self, Self::Error> {
-        let name = udf.name.to_owned().into();
-        let signature = Signature::user_defined((&udf.volatility).into());
-
-        let aliases = udf.aliases.iter().map(|s| s.to_string()).collect();
-
-        Ok(Self {
-            name,
-            udf: udf.clone(),
-            aliases,
-            signature,
-        })
+impl From<&FFI_WindowUDF> for Arc<dyn WindowUDFImpl> {
+    fn from(udf: &FFI_WindowUDF) -> Self {
+        if (udf.library_marker_id)() == crate::get_library_marker_id() {
+            Arc::clone(unsafe { udf.inner().inner() })
+        } else {
+            let name = udf.name.to_owned().into();
+            let signature = Signature::user_defined((&udf.volatility).into());
+
+            let aliases = udf.aliases.iter().map(|s| s.to_string()).collect();
+
+            Arc::new(ForeignWindowUDF {
+                name,
+                udf: udf.clone(),
+                aliases,
+                signature,
+            })
+        }
     }
 }
 
@@ -315,17 +328,14 @@ impl WindowUDFImpl for ForeignWindowUDF {
 
     fn partition_evaluator(
         &self,
-        args: datafusion::logical_expr::function::PartitionEvaluatorArgs,
+        args: datafusion_expr::function::PartitionEvaluatorArgs,
     ) -> Result<Box<dyn PartitionEvaluator>> {
         let evaluator = unsafe {
             let args = FFI_PartitionEvaluatorArgs::try_from(args)?;
             (self.udf.partition_evaluator)(&self.udf, args)
         };
 
-        df_result!(evaluator).map(|evaluator| {
-            Box::new(ForeignPartitionEvaluator::from(evaluator))
-                as Box<dyn PartitionEvaluator>
-        })
+        df_result!(evaluator).map(<Box<dyn PartitionEvaluator>>::from)
     }
 
     fn field(&self, field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
@@ -339,7 +349,7 @@ impl WindowUDFImpl for ForeignWindowUDF {
             let schema: SchemaRef = schema.into();
 
             match schema.fields().is_empty() {
-                true => exec_err!(
+                true => ffi_err!(
                     "Unable to retrieve field in WindowUDF via FFI - schema has no fields"
                 ),
                 false => Ok(schema.field(0).to_owned().into()),
@@ -359,7 +369,6 @@ impl WindowUDFImpl for ForeignWindowUDF {
 
 #[repr(C)]
 #[derive(Debug, StableAbi, Clone)]
-#[allow(non_camel_case_types)]
 pub struct FFI_SortOptions {
     pub descending: bool,
     pub nulls_first: bool,
@@ -386,24 +395,27 @@ impl From<&FFI_SortOptions> for SortOptions {
 #[cfg(test)]
 #[cfg(feature = "integration-tests")]
 mod tests {
-    use crate::tests::create_record_batch;
-    use crate::udwf::{FFI_WindowUDF, ForeignWindowUDF};
-    use arrow::array::{create_array, ArrayRef};
-    use datafusion::functions_window::lead_lag::{lag_udwf, WindowShift};
+    use std::sync::Arc;
+
+    use arrow::array::{ArrayRef, create_array};
+    use datafusion::functions_window::lead_lag::{WindowShift, lag_udwf};
     use datafusion::logical_expr::expr::Sort;
-    use datafusion::logical_expr::{col, ExprFunctionExt, WindowUDF, WindowUDFImpl};
+    use datafusion::logical_expr::{ExprFunctionExt, WindowUDF, WindowUDFImpl, col};
     use datafusion::prelude::SessionContext;
-    use std::sync::Arc;
+
+    use crate::tests::create_record_batch;
+    use crate::udwf::{FFI_WindowUDF, ForeignWindowUDF};
 
     fn create_test_foreign_udwf(
         original_udwf: impl WindowUDFImpl + 'static,
     ) -> datafusion::common::Result<WindowUDF> {
         let original_udwf = Arc::new(WindowUDF::from(original_udwf));
 
-        let local_udwf: FFI_WindowUDF = Arc::clone(&original_udwf).into();
+        let mut local_udwf: FFI_WindowUDF = Arc::clone(&original_udwf).into();
+        local_udwf.library_marker_id = crate::mock_foreign_marker_id;
 
-        let foreign_udwf: ForeignWindowUDF = (&local_udwf).try_into()?;
-        Ok(foreign_udwf.into())
+        let foreign_udwf: Arc<dyn WindowUDFImpl> = (&local_udwf).into();
+        Ok(WindowUDF::new_from_shared_impl(foreign_udwf))
     }
 
     #[test]
@@ -412,11 +424,12 @@ mod tests {
         let original_name = original_udwf.name().to_owned();
 
         // Convert to FFI format
-        let local_udwf: FFI_WindowUDF = Arc::clone(&original_udwf).into();
+        let mut local_udwf: FFI_WindowUDF = Arc::clone(&original_udwf).into();
+        local_udwf.library_marker_id = crate::mock_foreign_marker_id;
 
         // Convert back to native format
-        let foreign_udwf: ForeignWindowUDF = (&local_udwf).try_into()?;
-        let foreign_udwf: WindowUDF = foreign_udwf.into();
+        let foreign_udwf: Arc<dyn WindowUDFImpl> = (&local_udwf).into();
+        let foreign_udwf = WindowUDF::new_from_shared_impl(foreign_udwf);
 
         assert_eq!(original_name, foreign_udwf.name());
         Ok(())
@@ -450,4 +463,32 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ffi_udwf_local_bypass() -> datafusion_common::Result<()> {
+        let original_udwf = Arc::new(WindowUDF::from(WindowShift::lag()));
+
+        let mut ffi_udwf = FFI_WindowUDF::from(original_udwf);
+
+        // Verify local libraries can be downcast to their original
+        let foreign_udwf: Arc<dyn WindowUDFImpl> = (&ffi_udwf).into();
+        assert!(
+            foreign_udwf
+                .as_any()
+                .downcast_ref::<WindowShift>()
+                .is_some()
+        );
+
+        // Verify different library markers generate foreign providers
+        ffi_udwf.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_udwf: Arc<dyn WindowUDFImpl> = (&ffi_udwf).into();
+        assert!(
+            foreign_udwf
+                .as_any()
+                .downcast_ref::<ForeignWindowUDF>()
+                .is_some()
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/udwf/partition_evaluator.rs b/datafusion/ffi/src/udwf/partition_evaluator.rs
index 14cf23b919aa3..8df02511aa4b3 100644
--- a/datafusion/ffi/src/udwf/partition_evaluator.rs
+++ b/datafusion/ffi/src/udwf/partition_evaluator.rs
@@ -15,54 +15,53 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::c_void, ops::Range};
-
-use crate::{arrow_wrappers::WrappedArray, df_result, rresult, rresult_return};
-use abi_stable::{
-    std_types::{RResult, RString, RVec},
-    StableAbi,
-};
-use arrow::{array::ArrayRef, error::ArrowError};
-use datafusion::{
-    error::{DataFusionError, Result},
-    logical_expr::{window_state::WindowAggState, PartitionEvaluator},
-    scalar::ScalarValue,
-};
+use std::ffi::c_void;
+use std::ops::Range;
+
+use abi_stable::StableAbi;
+use abi_stable::std_types::{RResult, RVec};
+use arrow::array::ArrayRef;
+use arrow::error::ArrowError;
+use datafusion_common::scalar::ScalarValue;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::PartitionEvaluator;
+use datafusion_expr::window_state::WindowAggState;
 use prost::Message;
 
 use super::range::FFI_Range;
+use crate::arrow_wrappers::WrappedArray;
+use crate::util::FFIResult;
+use crate::{df_result, rresult, rresult_return};
 
 /// A stable struct for sharing [`PartitionEvaluator`] across FFI boundaries.
 /// For an explanation of each field, see the corresponding function
 /// defined in [`PartitionEvaluator`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_PartitionEvaluator {
     pub evaluate_all: unsafe extern "C" fn(
         evaluator: &mut Self,
         values: RVec<WrappedArray>,
         num_rows: usize,
-    ) -> RResult<WrappedArray, RString>,
+    ) -> FFIResult<WrappedArray>,
 
     pub evaluate: unsafe extern "C" fn(
         evaluator: &mut Self,
         values: RVec<WrappedArray>,
         range: FFI_Range,
-    ) -> RResult<RVec<u8>, RString>,
+    ) -> FFIResult<RVec<u8>>,
 
     pub evaluate_all_with_rank: unsafe extern "C" fn(
         evaluator: &Self,
         num_rows: usize,
         ranks_in_partition: RVec<FFI_Range>,
-    )
-        -> RResult<WrappedArray, RString>,
+    ) -> FFIResult<WrappedArray>,
 
     pub get_range: unsafe extern "C" fn(
         evaluator: &Self,
         idx: usize,
         n_rows: usize,
-    ) -> RResult<FFI_Range, RString>,
+    ) -> FFIResult<FFI_Range>,
 
     pub is_causal: bool,
 
@@ -76,6 +75,11 @@ pub struct FFI_PartitionEvaluator {
     /// Internal data. This is only to be accessed by the provider of the evaluator.
     /// A [`ForeignPartitionEvaluator`] should never attempt to access this data.
     pub private_data: *mut c_void,
+
+    /// Utility to identify when FFI objects are accessed locally through
+    /// the foreign interface. See [`crate::get_library_marker_id`] and
+    /// the crate's `README.md` for more information.
+    pub library_marker_id: extern "C" fn() -> usize,
 }
 
 unsafe impl Send for FFI_PartitionEvaluator {}
@@ -87,13 +91,17 @@ pub struct PartitionEvaluatorPrivateData {
 
 impl FFI_PartitionEvaluator {
     unsafe fn inner_mut(&mut self) -> &mut Box<dyn PartitionEvaluator + 'static> {
-        let private_data = self.private_data as *mut PartitionEvaluatorPrivateData;
-        &mut (*private_data).evaluator
+        unsafe {
+            let private_data = self.private_data as *mut PartitionEvaluatorPrivateData;
+            &mut (*private_data).evaluator
+        }
     }
 
     unsafe fn inner(&self) -> &(dyn PartitionEvaluator + 'static) {
-        let private_data = self.private_data as *mut PartitionEvaluatorPrivateData;
-        (*private_data).evaluator.as_ref()
+        unsafe {
+            let private_data = self.private_data as *mut PartitionEvaluatorPrivateData;
+            (*private_data).evaluator.as_ref()
+        }
     }
 }
 
@@ -101,78 +109,98 @@ unsafe extern "C" fn evaluate_all_fn_wrapper(
     evaluator: &mut FFI_PartitionEvaluator,
     values: RVec<WrappedArray>,
     num_rows: usize,
-) -> RResult<WrappedArray, RString> {
-    let inner = evaluator.inner_mut();
-
-    let values_arrays = values
-        .into_iter()
-        .map(|v| v.try_into().map_err(DataFusionError::from))
-        .collect::<Result<Vec<ArrayRef>>>();
-    let values_arrays = rresult_return!(values_arrays);
-
-    let return_array = inner
-        .evaluate_all(&values_arrays, num_rows)
-        .and_then(|array| WrappedArray::try_from(&array).map_err(DataFusionError::from));
-
-    rresult!(return_array)
+) -> FFIResult<WrappedArray> {
+    unsafe {
+        let inner = evaluator.inner_mut();
+
+        let values_arrays = values
+            .into_iter()
+            .map(|v| v.try_into().map_err(DataFusionError::from))
+            .collect::<Result<Vec<ArrayRef>>>();
+        let values_arrays = rresult_return!(values_arrays);
+
+        let return_array =
+            inner
+                .evaluate_all(&values_arrays, num_rows)
+                .and_then(|array| {
+                    WrappedArray::try_from(&array).map_err(DataFusionError::from)
+                });
+
+        rresult!(return_array)
+    }
 }
 
 unsafe extern "C" fn evaluate_fn_wrapper(
     evaluator: &mut FFI_PartitionEvaluator,
     values: RVec<WrappedArray>,
     range: FFI_Range,
-) -> RResult<RVec<u8>, RString> {
-    let inner = evaluator.inner_mut();
-
-    let values_arrays = values
-        .into_iter()
-        .map(|v| v.try_into().map_err(DataFusionError::from))
-        .collect::<Result<Vec<ArrayRef>>>();
-    let values_arrays = rresult_return!(values_arrays);
-
-    // let return_array = (inner.evaluate(&values_arrays, &range.into()));
-    // .and_then(|array| WrappedArray::try_from(&array).map_err(DataFusionError::from));
-    let scalar_result = rresult_return!(inner.evaluate(&values_arrays, &range.into()));
-    let proto_result: datafusion_proto::protobuf::ScalarValue =
-        rresult_return!((&scalar_result).try_into());
-
-    RResult::ROk(proto_result.encode_to_vec().into())
+) -> FFIResult<RVec<u8>> {
+    unsafe {
+        let inner = evaluator.inner_mut();
+
+        let values_arrays = values
+            .into_iter()
+            .map(|v| v.try_into().map_err(DataFusionError::from))
+            .collect::<Result<Vec<ArrayRef>>>();
+        let values_arrays = rresult_return!(values_arrays);
+
+        // let return_array = (inner.evaluate(&values_arrays, &range.into()));
+        // .and_then(|array| WrappedArray::try_from(&array).map_err(DataFusionError::from));
+        let scalar_result =
+            rresult_return!(inner.evaluate(&values_arrays, &range.into()));
+        let proto_result: datafusion_proto::protobuf::ScalarValue =
+            rresult_return!((&scalar_result).try_into());
+
+        RResult::ROk(proto_result.encode_to_vec().into())
+    }
 }
 
 unsafe extern "C" fn evaluate_all_with_rank_fn_wrapper(
     evaluator: &FFI_PartitionEvaluator,
     num_rows: usize,
     ranks_in_partition: RVec<FFI_Range>,
-) -> RResult<WrappedArray, RString> {
-    let inner = evaluator.inner();
-
-    let ranks_in_partition = ranks_in_partition
-        .into_iter()
-        .map(Range::from)
-        .collect::<Vec<_>>();
-
-    let return_array = inner
-        .evaluate_all_with_rank(num_rows, &ranks_in_partition)
-        .and_then(|array| WrappedArray::try_from(&array).map_err(DataFusionError::from));
-
-    rresult!(return_array)
+) -> FFIResult<WrappedArray> {
+    unsafe {
+        let inner = evaluator.inner();
+
+        let ranks_in_partition = ranks_in_partition
+            .into_iter()
+            .map(Range::from)
+            .collect::<Vec<_>>();
+
+        let return_array = inner
+            .evaluate_all_with_rank(num_rows, &ranks_in_partition)
+            .and_then(|array| {
+                WrappedArray::try_from(&array).map_err(DataFusionError::from)
+            });
+
+        rresult!(return_array)
+    }
 }
 
 unsafe extern "C" fn get_range_fn_wrapper(
     evaluator: &FFI_PartitionEvaluator,
     idx: usize,
     n_rows: usize,
-) -> RResult<FFI_Range, RString> {
-    let inner = evaluator.inner();
-    let range = inner.get_range(idx, n_rows).map(FFI_Range::from);
+) -> FFIResult<FFI_Range> {
+    unsafe {
+        let inner = evaluator.inner();
+        let range = inner.get_range(idx, n_rows).map(FFI_Range::from);
 
-    rresult!(range)
+        rresult!(range)
+    }
 }
 
 unsafe extern "C" fn release_fn_wrapper(evaluator: &mut FFI_PartitionEvaluator) {
-    let private_data =
-        Box::from_raw(evaluator.private_data as *mut PartitionEvaluatorPrivateData);
-    drop(private_data);
+    unsafe {
+        if !evaluator.private_data.is_null() {
+            let private_data = Box::from_raw(
+                evaluator.private_data as *mut PartitionEvaluatorPrivateData,
+            );
+            drop(private_data);
+            evaluator.private_data = std::ptr::null_mut();
+        }
+    }
 }
 
 impl From<Box<dyn PartitionEvaluator>> for FFI_PartitionEvaluator {
@@ -195,6 +223,7 @@ impl From<Box<dyn PartitionEvaluator>> for FFI_PartitionEvaluator {
             uses_window_frame,
             release: release_fn_wrapper,
             private_data: Box::into_raw(Box::new(private_data)) as *mut c_void,
+            library_marker_id: crate::get_library_marker_id,
         }
     }
 }
@@ -216,12 +245,20 @@ pub struct ForeignPartitionEvaluator {
     evaluator: FFI_PartitionEvaluator,
 }
 
-unsafe impl Send for ForeignPartitionEvaluator {}
-unsafe impl Sync for ForeignPartitionEvaluator {}
-
-impl From<FFI_PartitionEvaluator> for ForeignPartitionEvaluator {
-    fn from(evaluator: FFI_PartitionEvaluator) -> Self {
-        Self { evaluator }
+impl From<FFI_PartitionEvaluator> for Box<dyn PartitionEvaluator> {
+    fn from(mut evaluator: FFI_PartitionEvaluator) -> Self {
+        if (evaluator.library_marker_id)() == crate::get_library_marker_id() {
+            unsafe {
+                let private_data = Box::from_raw(
+                    evaluator.private_data as *mut PartitionEvaluatorPrivateData,
+                );
+                // We must set this to null to avoid a double free
+                evaluator.private_data = std::ptr::null_mut();
+                private_data.evaluator
+            }
+        } else {
+            Box::new(ForeignPartitionEvaluator { evaluator })
+        }
     }
 }
 
@@ -317,4 +354,55 @@ impl PartitionEvaluator for ForeignPartitionEvaluator {
 }
 
 #[cfg(test)]
-mod tests {}
+mod tests {
+    use arrow::array::ArrayRef;
+    use datafusion::logical_expr::PartitionEvaluator;
+
+    use crate::udwf::partition_evaluator::{
+        FFI_PartitionEvaluator, ForeignPartitionEvaluator,
+    };
+
+    #[derive(Debug)]
+    struct TestPartitionEvaluator {}
+
+    impl PartitionEvaluator for TestPartitionEvaluator {
+        fn evaluate_all(
+            &mut self,
+            values: &[ArrayRef],
+            _num_rows: usize,
+        ) -> datafusion_common::Result<ArrayRef> {
+            Ok(values[0].to_owned())
+        }
+    }
+
+    #[test]
+    fn test_ffi_partition_evaluator_local_bypass_inner() -> datafusion_common::Result<()>
+    {
+        let original_accum = TestPartitionEvaluator {};
+        let boxed_accum: Box<dyn PartitionEvaluator> = Box::new(original_accum);
+
+        let ffi_accum: FFI_PartitionEvaluator = boxed_accum.into();
+
+        // Verify local libraries can be downcast to their original
+        let foreign_accum: Box<dyn PartitionEvaluator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn PartitionEvaluator
+                as *const TestPartitionEvaluator);
+            assert!(!concrete.uses_window_frame());
+        }
+
+        // Verify different library markers generate foreign accumulator
+        let original_accum = TestPartitionEvaluator {};
+        let boxed_accum: Box<dyn PartitionEvaluator> = Box::new(original_accum);
+        let mut ffi_accum: FFI_PartitionEvaluator = boxed_accum.into();
+        ffi_accum.library_marker_id = crate::mock_foreign_marker_id;
+        let foreign_accum: Box<dyn PartitionEvaluator> = ffi_accum.into();
+        unsafe {
+            let concrete = &*(foreign_accum.as_ref() as *const dyn PartitionEvaluator
+                as *const ForeignPartitionEvaluator);
+            assert!(!concrete.uses_window_frame());
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/udwf/partition_evaluator_args.rs b/datafusion/ffi/src/udwf/partition_evaluator_args.rs
index cd26412564374..ffad1f41ee42d 100644
--- a/datafusion/ffi/src/udwf/partition_evaluator_args.rs
+++ b/datafusion/ffi/src/udwf/partition_evaluator_args.rs
@@ -15,91 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{collections::HashMap, sync::Arc};
+use std::sync::Arc;
 
-use crate::arrow_wrappers::WrappedSchema;
-use abi_stable::{std_types::RVec, StableAbi};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef},
-    error::ArrowError,
-    ffi::FFI_ArrowSchema,
-};
+use abi_stable::StableAbi;
+use abi_stable::std_types::RVec;
+use arrow::error::ArrowError;
+use arrow::ffi::FFI_ArrowSchema;
 use arrow_schema::FieldRef;
-use datafusion::{
-    error::{DataFusionError, Result},
-    logical_expr::function::PartitionEvaluatorArgs,
-    physical_plan::{expressions::Column, PhysicalExpr},
-    prelude::SessionContext,
-};
-use datafusion_common::exec_datafusion_err;
-use datafusion_proto::{
-    physical_plan::{
-        from_proto::parse_physical_expr, to_proto::serialize_physical_exprs,
-        DefaultPhysicalExtensionCodec,
-    },
-    protobuf::PhysicalExprNode,
-};
-use prost::Message;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::function::PartitionEvaluatorArgs;
+use datafusion_physical_plan::PhysicalExpr;
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::physical_expr::FFI_PhysicalExpr;
+use crate::util::rvec_wrapped_to_vec_fieldref;
 
 /// A stable struct for sharing [`PartitionEvaluatorArgs`] across FFI boundaries.
 /// For an explanation of each field, see the corresponding function
 /// defined in [`PartitionEvaluatorArgs`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_PartitionEvaluatorArgs {
-    input_exprs: RVec<RVec<u8>>,
+    input_exprs: RVec<FFI_PhysicalExpr>,
     input_fields: RVec<WrappedSchema>,
     is_reversed: bool,
     ignore_nulls: bool,
-    schema: WrappedSchema,
 }
 
 impl TryFrom<PartitionEvaluatorArgs<'_>> for FFI_PartitionEvaluatorArgs {
     type Error = DataFusionError;
+
     fn try_from(args: PartitionEvaluatorArgs) -> Result<Self, DataFusionError> {
-        // This is a bit of a hack. Since PartitionEvaluatorArgs does not carry a schema
-        // around, and instead passes the data types directly we are unable to decode the
-        // protobuf PhysicalExpr correctly. In evaluating the code the only place these
-        // appear to be really used are the Column data types. So here we will find all
-        // of the required columns and create a schema that has empty fields except for
-        // the ones we require. Ideally we would enhance PartitionEvaluatorArgs to just
-        // pass along the schema, but that is a larger breaking change.
-        let required_columns: HashMap<usize, (&str, &DataType)> = args
+        let input_exprs = args
             .input_exprs()
             .iter()
-            .zip(args.input_fields())
-            .filter_map(|(expr, field)| {
-                expr.as_any()
-                    .downcast_ref::<Column>()
-                    .map(|column| (column.index(), (column.name(), field.data_type())))
-            })
-            .collect();
-
-        let max_column = required_columns.keys().max();
-        let fields: Vec<_> = max_column
-            .map(|max_column| {
-                (0..(max_column + 1))
-                    .map(|idx| match required_columns.get(&idx) {
-                        Some((name, data_type)) => {
-                            Field::new(*name, (*data_type).clone(), true)
-                        }
-                        None => Field::new(
-                            format!("ffi_partition_evaluator_col_{idx}"),
-                            DataType::Null,
-                            true,
-                        ),
-                    })
-                    .collect()
-            })
-            .unwrap_or_default();
-
-        let schema = Arc::new(Schema::new(fields));
-
-        let codec = DefaultPhysicalExtensionCodec {};
-        let input_exprs = serialize_physical_exprs(args.input_exprs(), &codec)?
-            .into_iter()
-            .map(|expr_node| expr_node.encode_to_vec().into())
+            .map(Arc::clone)
+            .map(FFI_PhysicalExpr::from)
             .collect();
 
         let input_fields = args
@@ -109,12 +60,9 @@ impl TryFrom<PartitionEvaluatorArgs<'_>> for FFI_PartitionEvaluatorArgs {
             .collect::<Result<Vec<_>, ArrowError>>()?
             .into();
 
-        let schema: WrappedSchema = schema.into();
-
         Ok(Self {
             input_exprs,
             input_fields,
-            schema,
             is_reversed: args.is_reversed(),
             ignore_nulls: args.ignore_nulls(),
         })
@@ -136,27 +84,9 @@ impl TryFrom<FFI_PartitionEvaluatorArgs> for ForeignPartitionEvaluatorArgs {
     type Error = DataFusionError;
 
     fn try_from(value: FFI_PartitionEvaluatorArgs) -> Result<Self> {
-        let default_ctx = SessionContext::new();
-        let codec = DefaultPhysicalExtensionCodec {};
-
-        let schema: SchemaRef = value.schema.into();
+        let input_exprs = value.input_exprs.iter().map(Into::into).collect();
 
-        let input_exprs = value
-            .input_exprs
-            .into_iter()
-            .map(|input_expr_bytes| PhysicalExprNode::decode(input_expr_bytes.as_ref()))
-            .collect::<std::result::Result<Vec<_>, prost::DecodeError>>()
-            .map_err(|e| exec_datafusion_err!("Failed to decode PhysicalExprNode: {e}"))?
-            .iter()
-            .map(|expr_node| {
-                parse_physical_expr(expr_node, &default_ctx.task_ctx(), &schema, &codec)
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        let input_fields = input_exprs
-            .iter()
-            .map(|expr| expr.return_field(&schema))
-            .collect::<Result<Vec<_>>>()?;
+        let input_fields = rvec_wrapped_to_vec_fieldref(&value.input_fields)?;
 
         Ok(Self {
             input_exprs,
diff --git a/datafusion/ffi/src/udwf/range.rs b/datafusion/ffi/src/udwf/range.rs
index 1ddcc4199fe28..19a908c5e2454 100644
--- a/datafusion/ffi/src/udwf/range.rs
+++ b/datafusion/ffi/src/udwf/range.rs
@@ -24,7 +24,6 @@ use abi_stable::StableAbi;
 /// defined in [`Range`].
 #[repr(C)]
 #[derive(Debug, StableAbi)]
-#[allow(non_camel_case_types)]
 pub struct FFI_Range {
     pub start: usize,
     pub end: usize,
diff --git a/datafusion/ffi/src/util.rs b/datafusion/ffi/src/util.rs
index 151464dc97458..db6eb0552d2aa 100644
--- a/datafusion/ffi/src/util.rs
+++ b/datafusion/ffi/src/util.rs
@@ -15,13 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow_wrappers::WrappedSchema;
-use abi_stable::std_types::RVec;
-use arrow::datatypes::Field;
-use arrow::{datatypes::DataType, ffi::FFI_ArrowSchema};
-use arrow_schema::FieldRef;
 use std::sync::Arc;
 
+use abi_stable::std_types::{RResult, RString, RVec};
+use arrow::datatypes::{DataType, Field};
+use arrow::ffi::FFI_ArrowSchema;
+use arrow_schema::FieldRef;
+
+use crate::arrow_wrappers::WrappedSchema;
+
+/// Convenience type for results passed through the FFI boundary. Since the
+/// `DataFusionError` enum is complex and little value is gained from creating
+/// a FFI safe variant of it, we convert errors to strings when passing results
+/// back. These are converted back and forth using the `df_result`, `rresult`,
+/// and `rresult_return` macros.
+pub type FFIResult<T> = RResult<T, RString>;
+
 /// This macro is a helpful conversion utility to convert from an abi_stable::RResult to a
 /// DataFusion result.
 #[macro_export]
@@ -29,8 +38,8 @@ macro_rules! df_result {
     ( $x:expr ) => {
         match $x {
             abi_stable::std_types::RResult::ROk(v) => Ok(v),
-            abi_stable::std_types::RResult::RErr(e) => {
-                datafusion_common::exec_err!("FFI error: {}", e)
+            abi_stable::std_types::RResult::RErr(err) => {
+                datafusion_common::ffi_err!("{err}")
             }
         }
     };
@@ -117,11 +126,27 @@ pub fn rvec_wrapped_to_vec_datatype(
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
+    use std::sync::Arc;
+
     use abi_stable::std_types::{RResult, RString};
     use datafusion::error::DataFusionError;
+    use datafusion::prelude::SessionContext;
+    use datafusion_execution::TaskContextProvider;
+
+    use crate::execution::FFI_TaskContextProvider;
+    use crate::util::FFIResult;
 
-    fn wrap_result(result: Result<String, DataFusionError>) -> RResult<String, RString> {
+    pub(crate) fn test_session_and_ctx() -> (Arc<SessionContext>, FFI_TaskContextProvider)
+    {
+        let ctx = Arc::new(SessionContext::new());
+        let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+        let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
+
+        (ctx, task_ctx_provider)
+    }
+
+    fn wrap_result(result: Result<String, DataFusionError>) -> FFIResult<String> {
         RResult::ROk(rresult_return!(result))
     }
 
@@ -130,9 +155,9 @@ mod tests {
         const VALID_VALUE: &str = "valid_value";
         const ERROR_VALUE: &str = "error_value";
 
-        let ok_r_result: RResult<RString, RString> =
+        let ok_r_result: FFIResult<RString> =
             RResult::ROk(VALID_VALUE.to_string().into());
-        let err_r_result: RResult<RString, RString> =
+        let err_r_result: FFIResult<RString> =
             RResult::RErr(ERROR_VALUE.to_string().into());
 
         let returned_ok_result = df_result!(ok_r_result);
@@ -143,20 +168,22 @@ mod tests {
         assert!(returned_err_result.is_err());
         assert!(
             returned_err_result.unwrap_err().strip_backtrace()
-                == format!("Execution error: FFI error: {ERROR_VALUE}")
+                == format!("FFI error: {ERROR_VALUE}")
         );
 
         let ok_result: Result<String, DataFusionError> = Ok(VALID_VALUE.to_string());
         let err_result: Result<String, DataFusionError> =
-            datafusion_common::exec_err!("{ERROR_VALUE}");
+            datafusion_common::ffi_err!("{ERROR_VALUE}");
 
         let returned_ok_r_result = wrap_result(ok_result);
         assert!(returned_ok_r_result == RResult::ROk(VALID_VALUE.into()));
 
         let returned_err_r_result = wrap_result(err_result);
         assert!(returned_err_r_result.is_err());
-        assert!(returned_err_r_result
-            .unwrap_err()
-            .starts_with(format!("Execution error: {ERROR_VALUE}").as_str()));
+        assert!(
+            returned_err_r_result
+                .unwrap_err()
+                .starts_with(format!("FFI error: {ERROR_VALUE}").as_str())
+        );
     }
 }
diff --git a/datafusion/ffi/src/volatility.rs b/datafusion/ffi/src/volatility.rs
index f1705da294a39..bc714ae59587d 100644
--- a/datafusion/ffi/src/volatility.rs
+++ b/datafusion/ffi/src/volatility.rs
@@ -16,11 +16,10 @@
 // under the License.
 
 use abi_stable::StableAbi;
-use datafusion::logical_expr::Volatility;
+use datafusion_expr::Volatility;
 
 #[repr(C)]
 #[derive(Debug, StableAbi, Clone)]
-#[allow(non_camel_case_types)]
 pub enum FFI_Volatility {
     Immutable,
     Stable,
diff --git a/datafusion/ffi/tests/ffi_catalog.rs b/datafusion/ffi/tests/ffi_catalog.rs
new file mode 100644
index 0000000000000..28bb5f406f53f
--- /dev/null
+++ b/datafusion/ffi/tests/ffi_catalog.rs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod utils;
+
+/// Add an additional module here for convenience to scope this to only
+/// when the feature integration-tests is built
+#[cfg(feature = "integration-tests")]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion::catalog::{CatalogProvider, CatalogProviderList};
+    use datafusion_common::DataFusionError;
+    use datafusion_ffi::tests::utils::get_module;
+
+    #[tokio::test]
+    async fn test_catalog() -> datafusion_common::Result<()> {
+        let module = get_module()?;
+        let (ctx, codec) = super::utils::ctx_and_codec();
+
+        let ffi_catalog =
+            module
+                .create_catalog()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External catalog provider failed to implement create_catalog"
+                        .to_string(),
+                ))?(codec);
+        let foreign_catalog: Arc<dyn CatalogProvider + Send> = (&ffi_catalog).into();
+
+        let _ = ctx.register_catalog("fruit", foreign_catalog);
+
+        let df = ctx.table("fruit.apple.purchases").await?;
+
+        let results = df.collect().await?;
+
+        assert_eq!(results.len(), 2);
+        let num_rows: usize = results.into_iter().map(|rb| rb.num_rows()).sum();
+        assert_eq!(num_rows, 5);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_catalog_list() -> datafusion_common::Result<()> {
+        let module = get_module()?;
+        let (ctx, codec) = super::utils::ctx_and_codec();
+
+        let ffi_catalog_list =
+            module
+                .create_catalog_list()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External catalog provider failed to implement create_catalog_list"
+                        .to_string(),
+                ))?(codec);
+        let foreign_catalog_list: Arc<dyn CatalogProviderList + Send> =
+            (&ffi_catalog_list).into();
+
+        ctx.register_catalog_list(foreign_catalog_list);
+
+        let df = ctx.table("blue.apple.purchases").await?;
+
+        let results = df.collect().await?;
+
+        assert_eq!(results.len(), 2);
+        let num_rows: usize = results.into_iter().map(|rb| rb.num_rows()).sum();
+        assert_eq!(num_rows, 5);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/tests/ffi_integration.rs b/datafusion/ffi/tests/ffi_integration.rs
index eb53e76bfb9b6..2d18679cb018e 100644
--- a/datafusion/ffi/tests/ffi_integration.rs
+++ b/datafusion/ffi/tests/ffi_integration.rs
@@ -15,23 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod utils;
+
 /// Add an additional module here for convenience to scope this to only
 /// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
+    use std::sync::Arc;
+
+    use datafusion::catalog::TableProvider;
     use datafusion::error::{DataFusionError, Result};
-    use datafusion::prelude::SessionContext;
-    use datafusion_ffi::catalog_provider::ForeignCatalogProvider;
-    use datafusion_ffi::table_provider::ForeignTableProvider;
     use datafusion_ffi::tests::create_record_batch;
     use datafusion_ffi::tests::utils::get_module;
-    use std::sync::Arc;
 
     /// It is important that this test is in the `tests` directory and not in the
     /// library directory so we can verify we are building a dynamic library and
     /// testing it via a different executable.
     async fn test_table_provider(synchronous: bool) -> Result<()> {
         let table_provider_module = get_module()?;
+        let (ctx, codec) = super::utils::ctx_and_codec();
 
         // By calling the code below, the table provided will be created within
         // the module's code.
@@ -39,16 +41,14 @@ mod tests {
             DataFusionError::NotImplemented(
                 "External table provider failed to implement create_table".to_string(),
             ),
-        )?(synchronous);
+        )?(synchronous, codec);
 
         // In order to access the table provider within this executable, we need to
-        // turn it into a `ForeignTableProvider`.
-        let foreign_table_provider: ForeignTableProvider = (&ffi_table_provider).into();
-
-        let ctx = SessionContext::new();
+        // turn it into a `TableProvider`.
+        let foreign_table_provider: Arc<dyn TableProvider> = (&ffi_table_provider).into();
 
         // Display the data to show the full cycle works.
-        ctx.register_table("external_table", Arc::new(foreign_table_provider))?;
+        ctx.register_table("external_table", foreign_table_provider)?;
         let df = ctx.table("external_table").await?;
         let results = df.collect().await?;
 
@@ -69,30 +69,4 @@ mod tests {
     async fn sync_test_table_provider() -> Result<()> {
         test_table_provider(true).await
     }
-
-    #[tokio::test]
-    async fn test_catalog() -> Result<()> {
-        let module = get_module()?;
-
-        let ffi_catalog =
-            module
-                .create_catalog()
-                .ok_or(DataFusionError::NotImplemented(
-                    "External catalog provider failed to implement create_catalog"
-                        .to_string(),
-                ))?();
-        let foreign_catalog: ForeignCatalogProvider = (&ffi_catalog).into();
-
-        let ctx = SessionContext::default();
-        let _ = ctx.register_catalog("fruit", Arc::new(foreign_catalog));
-
-        let df = ctx.table("fruit.apple.purchases").await?;
-
-        let results = df.collect().await?;
-
-        assert!(!results.is_empty());
-        assert!(results[0].num_rows() != 0);
-
-        Ok(())
-    }
 }
diff --git a/datafusion/ffi/tests/ffi_udaf.rs b/datafusion/ffi/tests/ffi_udaf.rs
index ffd99bac62ecc..f219979a85062 100644
--- a/datafusion/ffi/tests/ffi_udaf.rs
+++ b/datafusion/ffi/tests/ffi_udaf.rs
@@ -19,14 +19,16 @@
 /// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
+    use std::sync::Arc;
+
     use arrow::array::Float64Array;
     use datafusion::common::record_batch;
     use datafusion::error::{DataFusionError, Result};
-    use datafusion::logical_expr::AggregateUDF;
-    use datafusion::prelude::{col, SessionContext};
-
+    use datafusion::logical_expr::{AggregateUDF, AggregateUDFImpl};
+    use datafusion::prelude::{SessionContext, col};
+    use datafusion_catalog::MemTable;
+    use datafusion_expr::{ScalarUDF, ScalarUDFImpl};
     use datafusion_ffi::tests::utils::get_module;
-    use datafusion_ffi::udaf::ForeignAggregateUDF;
 
     #[tokio::test]
     async fn test_ffi_udaf() -> Result<()> {
@@ -38,9 +40,9 @@ mod tests {
                 .ok_or(DataFusionError::NotImplemented(
                     "External table provider failed to implement create_udaf".to_string(),
                 ))?();
-        let foreign_sum_func: ForeignAggregateUDF = (&ffi_sum_func).try_into()?;
+        let foreign_sum_func: Arc<dyn AggregateUDFImpl> = (&ffi_sum_func).into();
 
-        let udaf: AggregateUDF = foreign_sum_func.into();
+        let udaf = AggregateUDF::new_from_shared_impl(foreign_sum_func);
 
         let ctx = SessionContext::default();
         let record_batch = record_batch!(
@@ -80,9 +82,9 @@ mod tests {
                 .ok_or(DataFusionError::NotImplemented(
                     "External table provider failed to implement create_udaf".to_string(),
                 ))?();
-        let foreign_stddev_func: ForeignAggregateUDF = (&ffi_stddev_func).try_into()?;
+        let foreign_stddev_func: Arc<dyn AggregateUDFImpl> = (&ffi_stddev_func).into();
 
-        let udaf: AggregateUDF = foreign_stddev_func.into();
+        let udaf = AggregateUDF::new_from_shared_impl(foreign_stddev_func);
 
         let ctx = SessionContext::default();
         let record_batch = record_batch!(
@@ -126,4 +128,69 @@ mod tests {
 
         Ok(())
     }
+
+    /// This test FFI UDFs can be used as inputs to FFI Aggregate UDFs.
+    /// Really this is a test of the Protobuf serialization and deserialization
+    /// using the TaskContextProvider. It can be demonstrated through the
+    /// UDAF accumulator arguments as an end-to-end test.
+    #[tokio::test]
+    async fn udf_as_input_to_udf() -> Result<()> {
+        let module = get_module()?;
+
+        let ffi_abs_func =
+            module
+                .create_scalar_udf()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External table provider failed to implement create_scalar_udf"
+                        .to_string(),
+                ))?();
+        let foreign_abs_func: Arc<dyn ScalarUDFImpl> = (&ffi_abs_func).into();
+        let abs_udf = ScalarUDF::new_from_shared_impl(foreign_abs_func);
+
+        let ctx = SessionContext::new();
+        ctx.deregister_udf("abs");
+
+        let ffi_sum_func =
+            module
+                .create_sum_udaf()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External table provider failed to implement create_udaf".to_string(),
+                ))?();
+        let foreign_sum_func: Arc<dyn AggregateUDFImpl> = (&ffi_sum_func).into();
+
+        let udaf = AggregateUDF::new_from_shared_impl(foreign_sum_func);
+
+        // We need at least 2 record batches so we get an accumulator
+        let ctx = SessionContext::default();
+        let rb1 = record_batch!(
+            ("a", Int32, vec![1, 2, 2, 4, 4, 4, 4]),
+            ("b", Float64, vec![-1.0, 2.0, -2.0, 4.0, -4.0, -4.0, -4.0])
+        )
+        .unwrap();
+        let rb2 = rb1.clone();
+
+        let table = Arc::new(MemTable::try_new(rb1.schema(), vec![vec![rb1, rb2]])?);
+
+        let df = ctx.read_table(table)?;
+
+        let df = df
+            .aggregate(
+                vec![col("a")],
+                vec![udaf.call(vec![abs_udf.call(vec![col("b")])]).alias("sum_b")],
+            )?
+            .sort_by(vec![col("a")])?;
+
+        df.clone().show().await?;
+
+        let result = df.collect().await?;
+
+        let expected = record_batch!(
+            ("a", Int32, vec![1, 2, 4]),
+            ("sum_b", Float64, vec![2.0, 8.0, 32.0])
+        )?;
+
+        assert_eq!(result[0], expected);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/tests/ffi_udf.rs b/datafusion/ffi/tests/ffi_udf.rs
index fd6a84bcf5b08..c659e27f029d7 100644
--- a/datafusion/ffi/tests/ffi_udf.rs
+++ b/datafusion/ffi/tests/ffi_udf.rs
@@ -19,16 +19,15 @@
 /// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
+    use std::sync::Arc;
 
     use arrow::datatypes::DataType;
     use datafusion::common::record_batch;
     use datafusion::error::{DataFusionError, Result};
-    use datafusion::logical_expr::ScalarUDF;
-    use datafusion::prelude::{col, SessionContext};
-
+    use datafusion::logical_expr::{ScalarUDF, ScalarUDFImpl};
+    use datafusion::prelude::{SessionContext, col};
     use datafusion_ffi::tests::create_record_batch;
     use datafusion_ffi::tests::utils::get_module;
-    use datafusion_ffi::udf::ForeignScalarUDF;
 
     /// This test validates that we can load an external module and use a scalar
     /// udf defined in it via the foreign function interface. In this case we are
@@ -44,9 +43,9 @@ mod tests {
                     "External table provider failed to implement create_scalar_udf"
                         .to_string(),
                 ))?();
-        let foreign_abs_func: ForeignScalarUDF = (&ffi_abs_func).try_into()?;
+        let foreign_abs_func: Arc<dyn ScalarUDFImpl> = (&ffi_abs_func).into();
 
-        let udf: ScalarUDF = foreign_abs_func.into();
+        let udf = ScalarUDF::new_from_shared_impl(foreign_abs_func);
 
         let ctx = SessionContext::default();
         let df = ctx.read_batch(create_record_batch(-5, 5))?;
@@ -82,9 +81,9 @@ mod tests {
                     "External table provider failed to implement create_scalar_udf"
                         .to_string(),
                 ))?();
-        let foreign_abs_func: ForeignScalarUDF = (&ffi_abs_func).try_into()?;
+        let foreign_abs_func: Arc<dyn ScalarUDFImpl> = (&ffi_abs_func).into();
 
-        let udf: ScalarUDF = foreign_abs_func.into();
+        let udf = ScalarUDF::new_from_shared_impl(foreign_abs_func);
 
         let ctx = SessionContext::default();
         let df = ctx.read_batch(create_record_batch(-5, 5))?;
diff --git a/datafusion/ffi/tests/ffi_udtf.rs b/datafusion/ffi/tests/ffi_udtf.rs
index 8c1c64a092e13..ab7818932959c 100644
--- a/datafusion/ffi/tests/ffi_udtf.rs
+++ b/datafusion/ffi/tests/ffi_udtf.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod utils;
+
 /// Add an additional module here for convenience to scope this to only
 /// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
@@ -22,12 +24,10 @@ mod tests {
 
     use std::sync::Arc;
 
-    use arrow::array::{create_array, ArrayRef};
+    use arrow::array::{ArrayRef, create_array};
+    use datafusion::catalog::TableFunctionImpl;
     use datafusion::error::{DataFusionError, Result};
-    use datafusion::prelude::SessionContext;
-
     use datafusion_ffi::tests::utils::get_module;
-    use datafusion_ffi::udtf::ForeignTableFunction;
 
     /// This test validates that we can load an external module and use a scalar
     /// udf defined in it via the foreign function interface. In this case we are
@@ -35,19 +35,17 @@ mod tests {
     #[tokio::test]
     async fn test_user_defined_table_function() -> Result<()> {
         let module = get_module()?;
+        let (ctx, codec) = super::utils::ctx_and_codec();
 
         let ffi_table_func = module
             .create_table_function()
             .ok_or(DataFusionError::NotImplemented(
             "External table function provider failed to implement create_table_function"
                 .to_string(),
-        ))?();
-        let foreign_table_func: ForeignTableFunction = ffi_table_func.into();
-
-        let udtf = Arc::new(foreign_table_func);
+        ))?(codec);
+        let foreign_table_func: Arc<dyn TableFunctionImpl> = ffi_table_func.into();
 
-        let ctx = SessionContext::default();
-        ctx.register_udtf("my_range", udtf);
+        ctx.register_udtf("my_range", foreign_table_func);
 
         let result = ctx
             .sql("SELECT * FROM my_range(5)")
diff --git a/datafusion/ffi/tests/ffi_udwf.rs b/datafusion/ffi/tests/ffi_udwf.rs
index 18ffd0c5bcb79..c4e889b796008 100644
--- a/datafusion/ffi/tests/ffi_udwf.rs
+++ b/datafusion/ffi/tests/ffi_udwf.rs
@@ -19,14 +19,15 @@
 /// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
-    use arrow::array::{create_array, ArrayRef};
+    use std::sync::Arc;
+
+    use arrow::array::{ArrayRef, create_array};
     use datafusion::error::{DataFusionError, Result};
     use datafusion::logical_expr::expr::Sort;
-    use datafusion::logical_expr::{col, ExprFunctionExt, WindowUDF};
+    use datafusion::logical_expr::{ExprFunctionExt, WindowUDF, WindowUDFImpl, col};
     use datafusion::prelude::SessionContext;
     use datafusion_ffi::tests::create_record_batch;
     use datafusion_ffi::tests::utils::get_module;
-    use datafusion_ffi::udwf::ForeignWindowUDF;
 
     #[tokio::test]
     async fn test_rank_udwf() -> Result<()> {
@@ -39,9 +40,9 @@ mod tests {
                     "External table provider failed to implement create_scalar_udf"
                         .to_string(),
                 ))?();
-        let foreign_rank_func: ForeignWindowUDF = (&ffi_rank_func).try_into()?;
+        let foreign_rank_func: Arc<dyn WindowUDFImpl> = (&ffi_rank_func).into();
 
-        let udwf: WindowUDF = foreign_rank_func.into();
+        let udwf = WindowUDF::new_from_shared_impl(foreign_rank_func);
 
         let ctx = SessionContext::default();
         let df = ctx.read_batch(create_record_batch(-5, 5))?;
diff --git a/datafusion/ffi/tests/utils/mod.rs b/datafusion/ffi/tests/utils/mod.rs
new file mode 100644
index 0000000000000..acf59de7f3464
--- /dev/null
+++ b/datafusion/ffi/tests/utils/mod.rs
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use datafusion::prelude::SessionContext;
+use datafusion_execution::TaskContextProvider;
+use datafusion_ffi::execution::FFI_TaskContextProvider;
+use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
+use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
+
+// Creates a default SessionContext and FFI Logical Extension Codec
+// for use in FFI integration tests.
+//
+// This helper centralizes setup logic and is kept intentionally
+// for upcoming FFI test expansions.
+#[cfg_attr(not(feature = "integration-tests"), expect(dead_code))]
+pub fn ctx_and_codec() -> (Arc<SessionContext>, FFI_LogicalExtensionCodec) {
+    let ctx = Arc::new(SessionContext::default());
+    let task_ctx_provider = Arc::clone(&ctx) as Arc<dyn TaskContextProvider>;
+    let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider);
+    let codec = FFI_LogicalExtensionCodec::new(
+        Arc::new(DefaultLogicalExtensionCodec {}),
+        None,
+        task_ctx_provider,
+    );
+
+    (ctx, codec)
+}
diff --git a/datafusion/functions-aggregate-common/benches/accumulate.rs b/datafusion/functions-aggregate-common/benches/accumulate.rs
index f422f8a2a7bfd..f1e4fe23cbb15 100644
--- a/datafusion/functions-aggregate-common/benches/accumulate.rs
+++ b/datafusion/functions-aggregate-common/benches/accumulate.rs
@@ -20,7 +20,7 @@ extern crate criterion;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, BooleanArray, Int64Array};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_indices;
 
 fn generate_group_indices(len: usize) -> Vec<usize> {
diff --git a/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/decimal.rs b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/decimal.rs
index 9920bf5bf4485..0a4c1692baa84 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/decimal.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/decimal.rs
@@ -18,7 +18,7 @@
 use arrow::{
     array::{ArrayRef, ArrowNumericType},
     datatypes::{
-        i256, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, DecimalType,
+        Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, i256,
     },
 };
 use datafusion_common::{Result, ScalarValue};
@@ -158,7 +158,7 @@ impl<T: DecimalType + ArrowNumericType + Debug> Accumulator
 mod tests {
     use super::*;
     use arrow::array::{
-        Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
+        Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
     };
     use std::sync::Arc;
 
diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
index e321df61ddc6a..6e0d55bd64372 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
@@ -18,9 +18,9 @@
 //! [`BytesDistinctCountAccumulator`] for Utf8/LargeUtf8/Binary/LargeBinary values
 
 use arrow::array::{ArrayRef, OffsetSizeTrait};
+use datafusion_common::ScalarValue;
 use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::ScalarValue;
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType};
 use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewSet;
@@ -48,7 +48,9 @@ impl<O: OffsetSizeTrait> Accumulator for BytesDistinctCountAccumulator<O> {
     fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
         let set = self.0.take();
         let arr = set.into_state();
-        Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()])
+        Ok(vec![
+            SingleRowListArrayBuilder::new(arr).build_list_scalar(),
+        ])
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
@@ -107,7 +109,9 @@ impl Accumulator for BytesViewDistinctCountAccumulator {
     fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
         let set = self.0.take();
         let arr = set.into_state();
-        Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()])
+        Ok(vec![
+            SingleRowListArrayBuilder::new(arr).build_list_scalar(),
+        ])
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/dict.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/dict.rs
index 089d8d5acded1..d71aed3debe95 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/dict.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/dict.rs
@@ -17,8 +17,8 @@
 
 use arrow::array::{ArrayRef, BooleanArray};
 use arrow::downcast_dictionary_array;
-use datafusion_common::{arrow_datafusion_err, ScalarValue};
-use datafusion_common::{internal_err, DataFusionError};
+use datafusion_common::internal_err;
+use datafusion_common::{ScalarValue, arrow_datafusion_err};
 use datafusion_expr_common::accumulator::Accumulator;
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
index e8b6588dc0913..aa86052bcbbc0 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
@@ -27,18 +27,18 @@ use std::mem::size_of_val;
 use std::sync::Arc;
 
 use ahash::RandomState;
-use arrow::array::types::ArrowPrimitiveType;
 use arrow::array::ArrayRef;
 use arrow::array::PrimitiveArray;
+use arrow::array::types::ArrowPrimitiveType;
 use arrow::datatypes::DataType;
 
+use datafusion_common::ScalarValue;
 use datafusion_common::cast::{as_list_array, as_primitive_array};
-use datafusion_common::utils::memory::estimate_memory_size;
 use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::ScalarValue;
+use datafusion_common::utils::memory::estimate_memory_size;
 use datafusion_expr_common::accumulator::Accumulator;
 
-use crate::utils::Hashable;
+use crate::utils::GenericDistinctBuffer;
 
 #[derive(Debug)]
 pub struct PrimitiveDistinctCountAccumulator<T>
@@ -73,7 +73,9 @@ where
             PrimitiveArray::<T>::from_iter_values(self.values.iter().cloned())
                 .with_data_type(self.data_type.clone()),
         );
-        Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()])
+        Ok(vec![
+            SingleRowListArrayBuilder::new(arr).build_list_scalar(),
+        ])
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
@@ -124,88 +126,42 @@ where
 }
 
 #[derive(Debug)]
-pub struct FloatDistinctCountAccumulator<T>
-where
-    T: ArrowPrimitiveType + Send,
-{
-    values: HashSet<Hashable<T::Native>, RandomState>,
+pub struct FloatDistinctCountAccumulator<T: ArrowPrimitiveType> {
+    values: GenericDistinctBuffer<T>,
 }
 
-impl<T> FloatDistinctCountAccumulator<T>
-where
-    T: ArrowPrimitiveType + Send,
-{
+impl<T: ArrowPrimitiveType> FloatDistinctCountAccumulator<T> {
     pub fn new() -> Self {
         Self {
-            values: HashSet::default(),
+            values: GenericDistinctBuffer::new(T::DATA_TYPE),
         }
     }
 }
 
-impl<T> Default for FloatDistinctCountAccumulator<T>
-where
-    T: ArrowPrimitiveType + Send,
-{
+impl<T: ArrowPrimitiveType> Default for FloatDistinctCountAccumulator<T> {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl<T> Accumulator for FloatDistinctCountAccumulator<T>
-where
-    T: ArrowPrimitiveType + Send + Debug,
-{
+impl<T: ArrowPrimitiveType + Debug> Accumulator for FloatDistinctCountAccumulator<T> {
     fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
-        let arr = Arc::new(PrimitiveArray::<T>::from_iter_values(
-            self.values.iter().map(|v| v.0),
-        )) as ArrayRef;
-        Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()])
+        self.values.state()
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let arr = as_primitive_array::<T>(&values[0])?;
-        arr.iter().for_each(|value| {
-            if let Some(value) = value {
-                self.values.insert(Hashable(value));
-            }
-        });
-
-        Ok(())
+        self.values.update_batch(values)
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion_common::Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-        assert_eq!(
-            states.len(),
-            1,
-            "count_distinct states must be single array"
-        );
-
-        let arr = as_list_array(&states[0])?;
-        arr.iter().try_for_each(|maybe_list| {
-            if let Some(list) = maybe_list {
-                let list = as_primitive_array::<T>(&list)?;
-                self.values
-                    .extend(list.values().iter().map(|v| Hashable(*v)));
-            };
-            Ok(())
-        })
+        self.values.merge_batch(states)
     }
 
     fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
-        Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
+        Ok(ScalarValue::Int64(Some(self.values.values.len() as i64)))
     }
 
     fn size(&self) -> usize {
-        let num_elements = self.values.len();
-        let fixed_size = size_of_val(self) + size_of_val(&self.values);
-
-        estimate_memory_size::<T::Native>(num_elements, fixed_size).unwrap()
+        size_of_val(self) + self.values.size()
     }
 }
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index c807591dabec8..ad2a21bb4733c 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -32,7 +32,7 @@ use arrow::{
     compute::take_arrays,
     datatypes::UInt32Type,
 };
-use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, arrow_datafusion_err};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
index 736345874c274..29b8752048c3e 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -195,11 +195,11 @@ impl NullState {
                     .zip(group_indices.iter())
                     .zip(values.iter())
                     .for_each(|((filter_value, &group_index), new_value)| {
-                        if let Some(true) = filter_value {
-                            if let Some(new_value) = new_value {
-                                seen_values.set_bit(group_index, true);
-                                value_fn(group_index, new_value)
-                            }
+                        if let Some(true) = filter_value
+                            && let Some(new_value) = new_value
+                        {
+                            seen_values.set_bit(group_index, true);
+                            value_fn(group_index, new_value)
                         }
                     })
             }
@@ -218,14 +218,10 @@ impl NullState {
             EmitTo::All => nulls,
             EmitTo::First(n) => {
                 // split off the first N values in seen_values
-                //
-                // TODO make this more efficient rather than two
-                // copies and bitwise manipulation
-                let first_n_null: BooleanBuffer = nulls.iter().take(n).collect();
+                let first_n_null: BooleanBuffer = nulls.slice(0, n);
                 // reset the existing seen buffer
-                for seen in nulls.iter().skip(n) {
-                    self.seen_values.append(seen);
-                }
+                self.seen_values
+                    .append_buffer(&nulls.slice(n, nulls.len() - n));
                 first_n_null
             }
         };
@@ -361,10 +357,10 @@ pub fn accumulate<T, F>(
                 .zip(group_indices.iter())
                 .zip(values.iter())
                 .for_each(|((filter_value, &group_index), new_value)| {
-                    if let Some(true) = filter_value {
-                        if let Some(new_value) = new_value {
-                            value_fn(group_index, new_value)
-                        }
+                    if let Some(true) = filter_value
+                        && let Some(new_value) = new_value
+                    {
+                        value_fn(group_index, new_value)
                     }
                 })
         }
@@ -598,7 +594,7 @@ mod test {
     use super::*;
 
     use arrow::array::{Int32Array, UInt32Array};
-    use rand::{rngs::ThreadRng, Rng};
+    use rand::{Rng, rngs::ThreadRng};
     use std::collections::HashSet;
 
     #[test]
@@ -694,11 +690,7 @@ mod test {
             let values_with_nulls: Vec<Option<u32>> = (0..num_values)
                 .map(|_| {
                     let is_null = null_pct < rng.random_range(0.0..1.0);
-                    if is_null {
-                        None
-                    } else {
-                        Some(rng.random())
-                    }
+                    if is_null { None } else { Some(rng.random()) }
                 })
                 .collect();
 
@@ -828,18 +820,20 @@ mod test {
                         .zip(filter.iter())
                         .for_each(|((&group_index, value), is_included)| {
                             // if value passed filter
-                            if let Some(true) = is_included {
-                                if let Some(value) = value {
-                                    mock.saw_value(group_index);
-                                    expected_values.push((group_index, value));
-                                }
+                            if let Some(true) = is_included
+                                && let Some(value) = value
+                            {
+                                mock.saw_value(group_index);
+                                expected_values.push((group_index, value));
                             }
                         });
                 }
             }
 
-            assert_eq!(accumulated_values, expected_values,
-                       "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}");
+            assert_eq!(
+                accumulated_values, expected_values,
+                "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}"
+            );
             let seen_values = null_state.seen_values.finish_cloned();
             mock.validate_seen_values(&seen_values);
 
@@ -899,8 +893,10 @@ mod test {
                 }
             }
 
-            assert_eq!(accumulated_values, expected_values,
-                       "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}");
+            assert_eq!(
+                accumulated_values, expected_values,
+                "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}"
+            );
         }
 
         /// This is effectively a different implementation of
@@ -944,18 +940,20 @@ mod test {
                         .zip(filter.iter())
                         .for_each(|((&group_index, value), is_included)| {
                             // if value passed filter
-                            if let Some(true) = is_included {
-                                if let Some(value) = value {
-                                    mock.saw_value(group_index);
-                                    expected_values.push((group_index, value));
-                                }
+                            if let Some(true) = is_included
+                                && let Some(value) = value
+                            {
+                                mock.saw_value(group_index);
+                                expected_values.push((group_index, value));
                             }
                         });
                 }
             }
 
-            assert_eq!(accumulated_values, expected_values,
-                       "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}");
+            assert_eq!(
+                accumulated_values, expected_values,
+                "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}"
+            );
 
             let seen_values = null_state.seen_values.finish_cloned();
             mock.validate_seen_values(&seen_values);
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
index c8c7736bba14f..74d361cf257bc 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
@@ -24,7 +24,7 @@ use arrow::array::{
 };
 use arrow::buffer::NullBuffer;
 use arrow::datatypes::DataType;
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use std::sync::Arc;
 
 /// Sets the validity mask for a `PrimitiveArray` to `nulls`
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
index fe920927f39b8..656b95d140dde 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -23,7 +23,7 @@ use arrow::buffer::NullBuffer;
 use arrow::compute;
 use arrow::datatypes::ArrowPrimitiveType;
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, internal_datafusion_err};
 use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 
 use super::accumulate::NullState;
diff --git a/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs
index 3021783a2a79c..e5a23597c44ad 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs
@@ -17,16 +17,12 @@
 
 //! Defines the accumulator for `SUM DISTINCT` for primitive numeric types
 
-use std::collections::HashSet;
 use std::fmt::Debug;
-use std::mem::{size_of, size_of_val};
+use std::mem::size_of_val;
 
-use ahash::RandomState;
-use arrow::array::Array;
 use arrow::array::ArrayRef;
 use arrow::array::ArrowNativeTypeOp;
 use arrow::array::ArrowPrimitiveType;
-use arrow::array::AsArray;
 use arrow::datatypes::ArrowNativeType;
 use arrow::datatypes::DataType;
 
@@ -34,90 +30,54 @@ use datafusion_common::Result;
 use datafusion_common::ScalarValue;
 use datafusion_expr_common::accumulator::Accumulator;
 
-use crate::utils::Hashable;
+use crate::utils::GenericDistinctBuffer;
 
 /// Accumulator for computing SUM(DISTINCT expr)
+#[derive(Debug)]
 pub struct DistinctSumAccumulator<T: ArrowPrimitiveType> {
-    values: HashSet<Hashable<T::Native>, RandomState>,
+    values: GenericDistinctBuffer<T>,
     data_type: DataType,
 }
 
-impl<T: ArrowPrimitiveType> Debug for DistinctSumAccumulator<T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "DistinctSumAccumulator({})", self.data_type)
-    }
-}
-
 impl<T: ArrowPrimitiveType> DistinctSumAccumulator<T> {
     pub fn new(data_type: &DataType) -> Self {
         Self {
-            values: HashSet::default(),
+            values: GenericDistinctBuffer::new(data_type.clone()),
             data_type: data_type.clone(),
         }
     }
 
     pub fn distinct_count(&self) -> usize {
-        self.values.len()
+        self.values.values.len()
     }
 }
 
-impl<T: ArrowPrimitiveType> Accumulator for DistinctSumAccumulator<T> {
+impl<T: ArrowPrimitiveType + Debug> Accumulator for DistinctSumAccumulator<T> {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        // 1. Stores aggregate state in `ScalarValue::List`
-        // 2. Constructs `ScalarValue::List` state from distinct numeric stored in hash set
-        let state_out = {
-            let distinct_values = self
-                .values
-                .iter()
-                .map(|value| {
-                    ScalarValue::new_primitive::<T>(Some(value.0), &self.data_type)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            vec![ScalarValue::List(ScalarValue::new_list_nullable(
-                &distinct_values,
-                &self.data_type,
-            ))]
-        };
-        Ok(state_out)
+        self.values.state()
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let array = values[0].as_primitive::<T>();
-        match array.nulls().filter(|x| x.null_count() > 0) {
-            Some(n) => {
-                for idx in n.valid_indices() {
-                    self.values.insert(Hashable(array.value(idx)));
-                }
-            }
-            None => array.values().iter().for_each(|x| {
-                self.values.insert(Hashable(*x));
-            }),
-        }
-        Ok(())
+        self.values.update_batch(values)
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        for x in states[0].as_list::<i32>().iter().flatten() {
-            self.update_batch(&[x])?
-        }
-        Ok(())
+        self.values.merge_batch(states)
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
-        let mut acc = T::Native::usize_as(0);
-        for distinct_value in self.values.iter() {
-            acc = acc.add_wrapping(distinct_value.0)
+        if self.distinct_count() == 0 {
+            ScalarValue::new_primitive::<T>(None, &self.data_type)
+        } else {
+            let mut acc = T::Native::usize_as(0);
+            for distinct_value in self.values.values.iter() {
+                acc = acc.add_wrapping(distinct_value.0)
+            }
+            ScalarValue::new_primitive::<T>(Some(acc), &self.data_type)
         }
-        let v = (!self.values.is_empty()).then_some(acc);
-        ScalarValue::new_primitive::<T>(v, &self.data_type)
     }
 
     fn size(&self) -> usize {
-        size_of_val(self) + self.values.capacity() * size_of::<T::Native>()
+        size_of_val(self) + self.values.size()
     }
 }
diff --git a/datafusion/functions-aggregate-common/src/lib.rs b/datafusion/functions-aggregate-common/src/lib.rs
index a07ef4d597cf2..61b880095047c 100644
--- a/datafusion/functions-aggregate-common/src/lib.rs
+++ b/datafusion/functions-aggregate-common/src/lib.rs
@@ -30,11 +30,15 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 pub mod accumulator;
 pub mod aggregate;
 pub mod merge_arrays;
 pub mod min_max;
+pub mod noop_accumulator;
 pub mod order;
 pub mod stats;
 pub mod tdigest;
diff --git a/datafusion/functions-aggregate-common/src/merge_arrays.rs b/datafusion/functions-aggregate-common/src/merge_arrays.rs
index bdf1490417beb..fc96931853b9d 100644
--- a/datafusion/functions-aggregate-common/src/merge_arrays.rs
+++ b/datafusion/functions-aggregate-common/src/merge_arrays.rs
@@ -17,7 +17,7 @@
 
 use arrow::compute::SortOptions;
 use datafusion_common::utils::compare_rows;
-use datafusion_common::{exec_err, ScalarValue};
+use datafusion_common::{ScalarValue, exec_err};
 use std::cmp::Ordering;
 use std::collections::{BinaryHeap, VecDeque};
 
diff --git a/datafusion/functions-aggregate-common/src/min_max.rs b/datafusion/functions-aggregate-common/src/min_max.rs
index 7dd60e1c0e1b4..27620221cf23c 100644
--- a/datafusion/functions-aggregate-common/src/min_max.rs
+++ b/datafusion/functions-aggregate-common/src/min_max.rs
@@ -19,20 +19,20 @@
 
 use arrow::array::{
     ArrayRef, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
-    Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
+    Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
     DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
     DurationSecondArray, FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
+    Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray,
     IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
     LargeStringArray, StringArray, StringViewArray, Time32MillisecondArray,
     Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
     TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+    TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array,
 };
 use arrow::compute;
 use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
 use datafusion_common::{
-    downcast_value, internal_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, downcast_value, internal_err,
 };
 use datafusion_expr_common::accumulator::Accumulator;
 use std::{cmp::Ordering, mem::size_of_val};
@@ -108,7 +108,9 @@ macro_rules! interval_min_max {
             Some(choose_min_max!($OP)) => $RHS.clone(),
             Some(_) => $LHS.clone(),
             None => {
-                return internal_err!("Comparison error while computing interval min/max")
+                return internal_err!(
+                    "Comparison error while computing interval min/max"
+                );
             }
         }
     }};
diff --git a/datafusion/functions-aggregate-common/src/noop_accumulator.rs b/datafusion/functions-aggregate-common/src/noop_accumulator.rs
new file mode 100644
index 0000000000000..e34d58770a69d
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/noop_accumulator.rs
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::ArrayRef;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr_common::accumulator::Accumulator;
+
+/// [`Accumulator`] that does no work and always returns a fixed value (default
+/// of `NULL` but can be customized).
+///
+/// Useful for aggregate functions that need to handle an input of [`DataType::Null`]
+/// that does no work.
+///
+/// [`DataType::Null`]: arrow::datatypes::DataType::Null
+#[derive(Debug)]
+pub struct NoopAccumulator {
+    evaluate_value: ScalarValue,
+}
+
+impl NoopAccumulator {
+    pub fn new(evaluate_value: ScalarValue) -> Self {
+        Self { evaluate_value }
+    }
+}
+
+impl Default for NoopAccumulator {
+    fn default() -> Self {
+        Self {
+            evaluate_value: ScalarValue::Null,
+        }
+    }
+}
+
+impl Accumulator for NoopAccumulator {
+    fn update_batch(&mut self, _values: &[ArrayRef]) -> Result<()> {
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        Ok(self.evaluate_value.clone())
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self)
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        // We ensure we return a state field even if unused otherwise we run into
+        // issues with queries like `SELECT agg_fn(NULL) FROM table`
+        Ok(vec![ScalarValue::Null])
+    }
+
+    fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index 320157fb7bd83..225c61b71939e 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -31,9 +31,8 @@
 
 use arrow::datatypes::DataType;
 use arrow::datatypes::Float64Type;
-use datafusion_common::cast::as_primitive_array;
-use datafusion_common::Result;
 use datafusion_common::ScalarValue;
+use datafusion_common::cast::as_primitive_array;
 use std::cmp::Ordering;
 use std::mem::{size_of, size_of_val};
 
@@ -61,41 +60,6 @@ macro_rules! cast_scalar_u64 {
     };
 }
 
-/// This trait is implemented for each type a [`TDigest`] can operate on,
-/// allowing it to support both numerical rust types (obtained from
-/// `PrimitiveArray` instances), and [`ScalarValue`] instances.
-pub trait TryIntoF64 {
-    /// A fallible conversion of a possibly null `self` into a [`f64`].
-    ///
-    /// If `self` is null, this method must return `Ok(None)`.
-    ///
-    /// If `self` cannot be coerced to the desired type, this method must return
-    /// an `Err` variant.
-    fn try_as_f64(&self) -> Result<Option<f64>>;
-}
-
-/// Generate an infallible conversion from `type` to an [`f64`].
-macro_rules! impl_try_ordered_f64 {
-    ($type:ty) => {
-        impl TryIntoF64 for $type {
-            fn try_as_f64(&self) -> Result<Option<f64>> {
-                Ok(Some(*self as f64))
-            }
-        }
-    };
-}
-
-impl_try_ordered_f64!(f64);
-impl_try_ordered_f64!(f32);
-impl_try_ordered_f64!(i64);
-impl_try_ordered_f64!(i32);
-impl_try_ordered_f64!(i16);
-impl_try_ordered_f64!(i8);
-impl_try_ordered_f64!(u64);
-impl_try_ordered_f64!(u32);
-impl_try_ordered_f64!(u16);
-impl_try_ordered_f64!(u8);
-
 /// Centroid implementation to the cluster mentioned in the paper.
 #[derive(Debug, PartialEq, Clone)]
 pub struct Centroid {
@@ -163,6 +127,7 @@ impl TDigest {
         }
     }
 
+    #[expect(clippy::needless_pass_by_value)]
     pub fn new_with_centroid(max_size: usize, centroid: Centroid) -> Self {
         TDigest {
             centroids: vec![centroid.clone()],
@@ -228,7 +193,11 @@ impl TDigest {
         if lo.is_nan() || hi.is_nan() {
             return v;
         }
-        v.clamp(lo, hi)
+
+        // Handle the case where floating point precision causes min > max.
+        let (min, max) = if lo > hi { (hi, lo) } else { (lo, hi) };
+
+        v.clamp(min, max)
     }
 
     // public for testing in other modules
@@ -779,4 +748,22 @@ mod tests {
 
         assert_eq!(t.size(), 96);
     }
+
+    #[test]
+    fn test_identical_values_floating_point_precision() {
+        // Regression test for https://github.com/apache/datafusion/issues/14855
+        // When all values are the same, floating-point arithmetic during centroid
+        // merging can cause slight precision differences between min and max,
+        // which previously caused a panic in clamp().
+
+        let t = TDigest::new(100);
+        let values: Vec<_> = (0..215).map(|_| 15.699999988079073_f64).collect();
+
+        let t = t.merge_unsorted_f64(values);
+
+        // This should not panic
+        let result = t.estimate_quantile(0.99);
+        // The result should be approximately equal to the input value
+        assert!((result - 15.699999988079073).abs() < 1e-10);
+    }
 }
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index 7ce5f09373f5c..78b8c52490c76 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -15,12 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{ArrayRef, ArrowNativeTypeOp};
+use ahash::RandomState;
+use arrow::array::{
+    Array, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, PrimitiveArray,
+};
 use arrow::compute::SortOptions;
 use arrow::datatypes::{
     ArrowNativeType, DataType, DecimalType, Field, FieldRef, ToByteSlice,
 };
-use datafusion_common::{exec_err, internal_datafusion_err, Result};
+use datafusion_common::cast::{as_list_array, as_primitive_array};
+use datafusion_common::utils::SingleRowListArrayBuilder;
+use datafusion_common::utils::memory::estimate_memory_size;
+use datafusion_common::{
+    HashSet, Result, ScalarValue, exec_err, internal_datafusion_err,
+};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use std::sync::Arc;
@@ -167,3 +175,92 @@ impl<T: DecimalType> DecimalAverager<T> {
         }
     }
 }
+
+/// Generic way to collect distinct values for accumulators.
+///
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
+pub struct GenericDistinctBuffer<T: ArrowPrimitiveType> {
+    pub values: HashSet<Hashable<T::Native>, RandomState>,
+    data_type: DataType,
+}
+
+impl<T: ArrowPrimitiveType> std::fmt::Debug for GenericDistinctBuffer<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "GenericDistinctBuffer({}, values={})",
+            self.data_type,
+            self.values.len()
+        )
+    }
+}
+
+impl<T: ArrowPrimitiveType> GenericDistinctBuffer<T> {
+    pub fn new(data_type: DataType) -> Self {
+        Self {
+            values: HashSet::default(),
+            data_type,
+        }
+    }
+
+    /// Mirrors [`Accumulator::state`].
+    pub fn state(&self) -> Result<Vec<ScalarValue>> {
+        let arr = Arc::new(
+            PrimitiveArray::<T>::from_iter_values(self.values.iter().map(|v| v.0))
+                // Ideally we'd just use T::DATA_TYPE but this misses things like
+                // decimal scale/precision and timestamp timezones, which need to
+                // match up with Accumulator::state_fields
+                .with_data_type(self.data_type.clone()),
+        );
+        Ok(vec![
+            SingleRowListArrayBuilder::new(arr).build_list_scalar(),
+        ])
+    }
+
+    /// Mirrors [`Accumulator::update_batch`].
+    pub fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        debug_assert_eq!(
+            values.len(),
+            1,
+            "DistinctValuesBuffer::update_batch expects only a single input array"
+        );
+
+        let arr = as_primitive_array::<T>(&values[0])?;
+        if arr.null_count() > 0 {
+            self.values.extend(arr.iter().flatten().map(Hashable));
+        } else {
+            self.values
+                .extend(arr.values().iter().cloned().map(Hashable));
+        }
+
+        Ok(())
+    }
+
+    /// Mirrors [`Accumulator::merge_batch`].
+    pub fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        if states.is_empty() {
+            return Ok(());
+        }
+
+        let array = as_list_array(&states[0])?;
+        for list in array.iter().flatten() {
+            self.update_batch(&[list])?;
+        }
+
+        Ok(())
+    }
+
+    /// Mirrors [`Accumulator::size`].
+    pub fn size(&self) -> usize {
+        let num_elements = self.values.len();
+        let fixed_size = size_of_val(self) + size_of_val(&self.values);
+        estimate_memory_size::<T::Native>(num_elements, fixed_size).unwrap()
+    }
+}
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index 428855a61698c..8f8697fef0a1f 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -53,7 +53,7 @@ datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 half = { workspace = true }
 log = { workspace = true }
-paste = "1.0.14"
+paste = { workspace = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs
index 83b0c4a4c659c..d7f687386333f 100644
--- a/datafusion/functions-aggregate/benches/array_agg.rs
+++ b/datafusion/functions-aggregate/benches/array_agg.rs
@@ -23,21 +23,22 @@ use arrow::array::{
     PrimitiveArray,
 };
 use arrow::datatypes::{Field, Int64Type};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_expr::Accumulator;
 use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;
 
 use arrow::buffer::OffsetBuffer;
-use rand::distr::{Distribution, StandardUniform};
-use rand::prelude::StdRng;
 use rand::Rng;
 use rand::SeedableRng;
+use rand::distr::{Distribution, StandardUniform};
+use rand::prelude::StdRng;
 
 /// Returns fixed seedable RNG
 pub fn seedable_rng() -> StdRng {
     StdRng::seed_from_u64(42)
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
     let list_item_data_type = values.as_list::<i32>().values().data_type().clone();
     c.bench_function(name, |b| {
diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs
index 53484652fd251..711bbe5a3c4df 100644
--- a/datafusion/functions-aggregate/benches/count.rs
+++ b/datafusion/functions-aggregate/benches/count.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{Accumulator, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::count::Count;
 use datafusion_physical_expr::expressions::col;
 
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 fn prepare_group_accumulator() -> Box<dyn GroupsAccumulator> {
     let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)]));
@@ -76,6 +76,7 @@ fn prepare_accumulator() -> Box<dyn Accumulator> {
     count_fn.accumulator(accumulator_args).unwrap()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn convert_to_state_bench(
     c: &mut Criterion,
     name: &str,
diff --git a/datafusion/functions-aggregate/benches/min_max_bytes.rs b/datafusion/functions-aggregate/benches/min_max_bytes.rs
index 6d76ff2d0366d..9f4eb0f0c6246 100644
--- a/datafusion/functions-aggregate/benches/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/benches/min_max_bytes.rs
@@ -29,8 +29,8 @@ use arrow::{
     array::{ArrayRef, StringArray},
     datatypes::{DataType, Field, Schema},
 };
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
-use datafusion_expr::{function::AccumulatorArgs, GroupsAccumulator};
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use datafusion_expr::{GroupsAccumulator, function::AccumulatorArgs};
 use datafusion_functions_aggregate::min_max;
 use datafusion_physical_expr::expressions::col;
 
diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs
index d85f0686224b3..52998179024c1 100644
--- a/datafusion/functions-aggregate/benches/sum.rs
+++ b/datafusion/functions-aggregate/benches/sum.rs
@@ -22,11 +22,11 @@ use arrow::array::{ArrayRef, BooleanArray};
 use arrow::datatypes::{DataType, Field, Int64Type, Schema};
 use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
 
-use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
+use datafusion_expr::{AggregateUDFImpl, GroupsAccumulator, function::AccumulatorArgs};
 use datafusion_functions_aggregate::sum::Sum;
 use datafusion_physical_expr::expressions::col;
 
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
     let field = Field::new("f", data_type.clone(), true).into();
@@ -47,6 +47,7 @@ fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
     sum_fn.create_groups_accumulator(accumulator_args).unwrap()
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn convert_to_state_bench(
     c: &mut Criterion,
     name: &str,
diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs b/datafusion/functions-aggregate/src/approx_distinct.rs
index 998f981deef7b..3c9692d29c30f 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -23,23 +23,24 @@ use arrow::array::{
     GenericBinaryArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
 };
 use arrow::datatypes::{
-    ArrowPrimitiveType, Date32Type, Date64Type, FieldRef, Int16Type, Int32Type,
-    Int64Type, Int8Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+    ArrowPrimitiveType, Date32Type, Date64Type, FieldRef, Int8Type, Int16Type, Int32Type,
+    Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
     Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type,
 };
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
 use datafusion_common::ScalarValue;
 use datafusion_common::{
-    downcast_value, internal_datafusion_err, internal_err, not_impl_err, DataFusionError,
-    Result,
+    DataFusionError, Result, downcast_value, internal_datafusion_err, internal_err,
+    not_impl_err,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
+use datafusion_functions_aggregate_common::noop_accumulator::NoopAccumulator;
 use datafusion_macros::user_doc;
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
@@ -171,9 +172,6 @@ where
     }
 }
 
-#[derive(Debug)]
-struct NullHLLAccumulator;
-
 macro_rules! default_accumulator_impl {
     () => {
         fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
@@ -269,29 +267,6 @@ where
     default_accumulator_impl!();
 }
 
-impl Accumulator for NullHLLAccumulator {
-    fn update_batch(&mut self, _values: &[ArrayRef]) -> Result<()> {
-        // do nothing, all values are null
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
-        Ok(())
-    }
-
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![])
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        Ok(ScalarValue::UInt64(Some(0)))
-    }
-
-    fn size(&self) -> usize {
-        size_of_val(self)
-    }
-}
-
 impl Debug for ApproxDistinct {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("ApproxDistinct")
@@ -352,12 +327,25 @@ impl AggregateUDFImpl for ApproxDistinct {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        Ok(vec![Field::new(
-            format_state_name(args.name, "hll_registers"),
-            DataType::Binary,
-            false,
-        )
-        .into()])
+        if args.input_fields[0].data_type().is_null() {
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, self.name()),
+                    DataType::Null,
+                    true,
+                )
+                .into(),
+            ])
+        } else {
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, "hll_registers"),
+                    DataType::Binary,
+                    false,
+                )
+                .into(),
+            ])
+        }
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -406,11 +394,13 @@ impl AggregateUDFImpl for ApproxDistinct {
             DataType::Utf8View => Box::new(StringViewHLLAccumulator::<i32>::new()),
             DataType::Binary => Box::new(BinaryHLLAccumulator::<i32>::new()),
             DataType::LargeBinary => Box::new(BinaryHLLAccumulator::<i64>::new()),
-            DataType::Null => Box::new(NullHLLAccumulator),
+            DataType::Null => {
+                Box::new(NoopAccumulator::new(ScalarValue::UInt64(Some(0))))
+            }
             other => {
                 return not_impl_err!(
-                "Support for 'approx_distinct' for data type {other} is not implemented"
-            )
+                    "Support for 'approx_distinct' for data type {other} is not implemented"
+                );
             }
         };
         Ok(accumulator)
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index 530dbf3e43c79..739e333b54617 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -19,16 +19,18 @@
 
 use arrow::datatypes::DataType::{Float64, UInt64};
 use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::types::NativeType;
+use datafusion_functions_aggregate_common::noop_accumulator::NoopAccumulator;
 use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, TypeSignature,
+    TypeSignatureClass, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -57,20 +59,11 @@ make_udaf_expr_and_func!(
 ```"#,
     standard_argument(name = "expression",)
 )]
-#[derive(PartialEq, Eq, Hash)]
+#[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ApproxMedian {
     signature: Signature,
 }
 
-impl Debug for ApproxMedian {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("ApproxMedian")
-            .field("name", &self.name())
-            .field("signature", &self.signature)
-            .finish()
-    }
-}
-
 impl Default for ApproxMedian {
     fn default() -> Self {
         Self::new()
@@ -81,33 +74,55 @@ impl ApproxMedian {
     /// Create a new APPROX_MEDIAN aggregate function
     pub fn new() -> Self {
         Self {
-            signature: Signature::uniform(1, NUMERICS.to_vec(), Volatility::Immutable),
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Integer,
+                    )]),
+                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                        TypeSignatureClass::Float,
+                        vec![TypeSignatureClass::Decimal],
+                        NativeType::Float64,
+                    )]),
+                ],
+                Volatility::Immutable,
+            ),
         }
     }
 }
 
 impl AggregateUDFImpl for ApproxMedian {
-    /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
         self
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        Ok(vec![
-            Field::new(format_state_name(args.name, "max_size"), UInt64, false),
-            Field::new(format_state_name(args.name, "sum"), Float64, false),
-            Field::new(format_state_name(args.name, "count"), UInt64, false),
-            Field::new(format_state_name(args.name, "max"), Float64, false),
-            Field::new(format_state_name(args.name, "min"), Float64, false),
-            Field::new_list(
-                format_state_name(args.name, "centroids"),
-                Field::new_list_field(Float64, true),
-                false,
-            ),
-        ]
-        .into_iter()
-        .map(Arc::new)
-        .collect())
+        if args.input_fields[0].data_type().is_null() {
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, self.name()),
+                    DataType::Null,
+                    true,
+                )
+                .into(),
+            ])
+        } else {
+            Ok(vec![
+                Field::new(format_state_name(args.name, "max_size"), UInt64, false),
+                Field::new(format_state_name(args.name, "sum"), Float64, false),
+                Field::new(format_state_name(args.name, "count"), UInt64, false),
+                Field::new(format_state_name(args.name, "max"), Float64, false),
+                Field::new(format_state_name(args.name, "min"), Float64, false),
+                Field::new_list(
+                    format_state_name(args.name, "centroids"),
+                    Field::new_list_field(Float64, true),
+                    false,
+                ),
+            ]
+            .into_iter()
+            .map(Arc::new)
+            .collect())
+        }
     }
 
     fn name(&self) -> &str {
@@ -119,9 +134,6 @@ impl AggregateUDFImpl for ApproxMedian {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if !arg_types[0].is_numeric() {
-            return plan_err!("ApproxMedian requires numeric input types");
-        }
         Ok(arg_types[0].clone())
     }
 
@@ -132,10 +144,14 @@ impl AggregateUDFImpl for ApproxMedian {
             );
         }
 
-        Ok(Box::new(ApproxPercentileAccumulator::new(
-            0.5_f64,
-            acc_args.expr_fields[0].data_type().clone(),
-        )))
+        if acc_args.expr_fields[0].data_type().is_null() {
+            Ok(Box::new(NoopAccumulator::default()))
+        } else {
+            Ok(Box::new(ApproxPercentileAccumulator::new(
+                0.5_f64,
+                acc_args.expr_fields[0].data_type().clone(),
+            )))
+        }
     }
 
     fn documentation(&self) -> Option<&Documentation> {
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 4015abc6adf70..b1e649ec029ff 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -16,23 +16,23 @@
 // under the License.
 
 use std::any::Any;
-use std::fmt::{Debug, Formatter};
+use std::fmt::Debug;
 use std::mem::size_of_val;
 use std::sync::Arc;
 
-use arrow::array::Array;
+use arrow::array::{Array, Float16Array};
 use arrow::compute::{filter, is_not_null};
 use arrow::datatypes::FieldRef;
 use arrow::{
     array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+        ArrayRef, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array,
+        Int64Array, UInt8Array, UInt16Array, UInt32Array, UInt64Array,
     },
     datatypes::{DataType, Field},
 };
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
-    ScalarValue,
+    DataFusionError, Result, ScalarValue, downcast_value, internal_err, not_impl_err,
+    plan_err,
 };
 use datafusion_expr::expr::{AggregateFunction, Sort};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
@@ -42,9 +42,7 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
     Volatility,
 };
-use datafusion_functions_aggregate_common::tdigest::{
-    TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
-};
+use datafusion_functions_aggregate_common::tdigest::{DEFAULT_MAX_SIZE, TDigest};
 use datafusion_macros::user_doc;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
@@ -121,20 +119,11 @@ An alternate syntax is also supported:
         description = "Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory."
     )
 )]
-#[derive(PartialEq, Eq, Hash)]
+#[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ApproxPercentileCont {
     signature: Signature,
 }
 
-impl Debug for ApproxPercentileCont {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        f.debug_struct("ApproxPercentileCont")
-            .field("name", &self.name())
-            .field("signature", &self.signature)
-            .finish()
-    }
-}
-
 impl Default for ApproxPercentileCont {
     fn default() -> Self {
         Self::new()
@@ -164,7 +153,7 @@ impl ApproxPercentileCont {
 
     pub(crate) fn create_accumulator(
         &self,
-        args: AccumulatorArgs,
+        args: &AccumulatorArgs,
     ) -> Result<ApproxPercentileAccumulator> {
         let percentile =
             validate_percentile_expr(&args.exprs[1], "APPROX_PERCENTILE_CONT")?;
@@ -197,10 +186,15 @@ impl ApproxPercentileCont {
             | DataType::Int16
             | DataType::Int32
             | DataType::Int64
+            | DataType::Float16
             | DataType::Float32
             | DataType::Float64 => {
                 if let Some(max_size) = tdigest_max_size {
-                    ApproxPercentileAccumulator::new_with_max_size(percentile, data_type.clone(), max_size)
+                    ApproxPercentileAccumulator::new_with_max_size(
+                        percentile,
+                        data_type.clone(),
+                        max_size,
+                    )
                 } else {
                     ApproxPercentileAccumulator::new(percentile, data_type.clone())
                 }
@@ -208,7 +202,7 @@ impl ApproxPercentileCont {
             other => {
                 return not_impl_err!(
                     "Support for 'APPROX_PERCENTILE_CONT' for data type {other} is not implemented"
-                )
+                );
             }
         };
 
@@ -237,8 +231,8 @@ fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
             return plan_err!(
                 "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
                 sv.data_type()
-            )
-        },
+            );
+        }
     };
 
     Ok(max_size)
@@ -249,7 +243,6 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         self
     }
 
-    #[allow(rustdoc::private_intra_doc_links)]
     /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
@@ -300,7 +293,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
 
     #[inline]
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(self.create_accumulator(acc_args)?))
+        Ok(Box::new(self.create_accumulator(&acc_args)?))
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
@@ -315,10 +308,6 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         Ok(arg_types[0].clone())
     }
 
-    fn supports_null_handling_clause(&self) -> bool {
-        false
-    }
-
     fn supports_within_group_clause(&self) -> bool {
         true
     }
@@ -376,83 +365,51 @@ impl ApproxPercentileAccumulator {
         match values.data_type() {
             DataType::Float64 => {
                 let array = downcast_value!(values, Float64Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().copied().collect::<Vec<_>>())
             }
             DataType::Float32 => {
                 let array = downcast_value!(values, Float32Array);
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
+            }
+            DataType::Float16 => {
+                let array = downcast_value!(values, Float16Array);
                 Ok(array
                     .values()
                     .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                    .map(|v| v.to_f64())
+                    .collect::<Vec<_>>())
             }
             DataType::Int64 => {
                 let array = downcast_value!(values, Int64Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::Int32 => {
                 let array = downcast_value!(values, Int32Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::Int16 => {
                 let array = downcast_value!(values, Int16Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::Int8 => {
                 let array = downcast_value!(values, Int8Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::UInt64 => {
                 let array = downcast_value!(values, UInt64Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::UInt32 => {
                 let array = downcast_value!(values, UInt32Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::UInt16 => {
                 let array = downcast_value!(values, UInt16Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             DataType::UInt8 => {
                 let array = downcast_value!(values, UInt8Array);
-                Ok(array
-                    .values()
-                    .iter()
-                    .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<Result<Vec<_>>>()?)
+                Ok(array.values().iter().map(|v| *v as f64).collect::<Vec<_>>())
             }
             e => internal_err!(
                 "APPROX_PERCENTILE_CONT is not expected to receive the type {e:?}"
@@ -495,6 +452,7 @@ impl Accumulator for ApproxPercentileAccumulator {
             DataType::UInt16 => ScalarValue::UInt16(Some(q as u16)),
             DataType::UInt32 => ScalarValue::UInt32(Some(q as u32)),
             DataType::UInt64 => ScalarValue::UInt64(Some(q as u64)),
+            DataType::Float16 => ScalarValue::Float16(Some(half::f16::from_f64(q))),
             DataType::Float32 => ScalarValue::Float32(Some(q as f32)),
             DataType::Float64 => ScalarValue::Float64(Some(q)),
             v => unreachable!("unexpected return type {}", v),
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 51891ce7f2779..ff7762e816ad6 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -25,11 +25,11 @@ use arrow::compute::{and, filter, is_not_null};
 use arrow::datatypes::FieldRef;
 use arrow::{array::ArrayRef, datatypes::DataType};
 use datafusion_common::ScalarValue;
-use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_common::{Result, not_impl_err, plan_err};
+use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::expr::{AggregateFunction, Sort};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
-use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
 };
@@ -184,7 +184,9 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
             );
         }
         if arg_types[2] != DataType::Float64 {
-            return plan_err!("approx_percentile_cont_with_weight requires float64 percentile input types");
+            return plan_err!(
+                "approx_percentile_cont_with_weight requires float64 percentile input types"
+            );
         }
         if arg_types.len() == 4 && !arg_types[3].is_integer() {
             return plan_err!(
@@ -244,24 +246,19 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
             is_distinct: acc_args.is_distinct,
         };
         let approx_percentile_cont_accumulator =
-            self.approx_percentile_cont.create_accumulator(sub_args)?;
+            self.approx_percentile_cont.create_accumulator(&sub_args)?;
         let accumulator = ApproxPercentileWithWeightAccumulator::new(
             approx_percentile_cont_accumulator,
         );
         Ok(Box::new(accumulator))
     }
 
-    #[allow(rustdoc::private_intra_doc_links)]
     /// See [`TDigest::to_scalar_state()`] for a description of the serialized
     /// state.
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
         self.approx_percentile_cont.state_fields(args)
     }
 
-    fn supports_null_handling_clause(&self) -> bool {
-        false
-    }
-
     fn supports_within_group_clause(&self) -> bool {
         true
     }
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index b830588d404b0..8491062124954 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -23,16 +23,16 @@ use std::mem::{size_of, size_of_val, take};
 use std::sync::Arc;
 
 use arrow::array::{
-    new_empty_array, Array, ArrayRef, AsArray, BooleanArray, ListArray, StructArray,
+    Array, ArrayRef, AsArray, BooleanArray, ListArray, StructArray, new_empty_array,
 };
-use arrow::compute::{filter, SortOptions};
+use arrow::compute::{SortOptions, filter};
 use arrow::datatypes::{DataType, Field, FieldRef, Fields};
 
 use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::{
-    compare_rows, get_row_at_idx, take_function_args, SingleRowListArrayBuilder,
+    SingleRowListArrayBuilder, compare_rows, get_row_at_idx, take_function_args,
 };
-use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err, exec_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
@@ -113,22 +113,26 @@ impl AggregateUDFImpl for ArrayAgg {
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
         if args.is_distinct {
-            return Ok(vec![Field::new_list(
-                format_state_name(args.name, "distinct_array_agg"),
+            return Ok(vec![
+                Field::new_list(
+                    format_state_name(args.name, "distinct_array_agg"),
+                    // See COMMENTS.md to understand why nullable is set to true
+                    Field::new_list_field(args.input_fields[0].data_type().clone(), true),
+                    true,
+                )
+                .into(),
+            ]);
+        }
+
+        let mut fields = vec![
+            Field::new_list(
+                format_state_name(args.name, "array_agg"),
                 // See COMMENTS.md to understand why nullable is set to true
                 Field::new_list_field(args.input_fields[0].data_type().clone(), true),
                 true,
             )
-            .into()]);
-        }
-
-        let mut fields = vec![Field::new_list(
-            format_state_name(args.name, "array_agg"),
-            // See COMMENTS.md to understand why nullable is set to true
-            Field::new_list_field(args.input_fields[0].data_type().clone(), true),
-            true,
-        )
-        .into()];
+            .into(),
+        ];
 
         if args.ordering_fields.is_empty() {
             return Ok(fields);
@@ -224,6 +228,10 @@ impl AggregateUDFImpl for ArrayAgg {
         datafusion_expr::ReversedUDAF::Reversed(array_agg_udaf())
     }
 
+    fn supports_null_handling_clause(&self) -> bool {
+        true
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -315,9 +323,7 @@ impl Accumulator for ArrayAggAccumulator {
             return Ok(());
         }
 
-        if values.len() != 1 {
-            return internal_err!("expects single batch");
-        }
+        assert_eq_or_internal_err!(values.len(), 1, "expects single batch");
 
         let val = &values[0];
         let nulls = if self.ignore_nulls {
@@ -345,9 +351,7 @@ impl Accumulator for ArrayAggAccumulator {
             return Ok(());
         }
 
-        if states.len() != 1 {
-            return internal_err!("expects single state");
-        }
+        assert_eq_or_internal_err!(states.len(), 1, "expects single state");
 
         let list_arr = as_list_array(&states[0])?;
 
@@ -468,9 +472,7 @@ impl Accumulator for DistinctArrayAggAccumulator {
             return Ok(());
         }
 
-        if states.len() != 1 {
-            return internal_err!("expects single state");
-        }
+        assert_eq_or_internal_err!(states.len(), 1, "expects single state");
 
         states[0]
             .as_list::<i32>()
@@ -801,8 +803,8 @@ mod tests {
     use arrow::datatypes::{FieldRef, Schema};
     use datafusion_common::cast::as_generic_string_array;
     use datafusion_common::internal_err;
-    use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr::PhysicalExpr;
+    use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
     use std::sync::Arc;
 
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index bec1734e2e203..46a8dbf9540b6 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -24,25 +24,22 @@ use arrow::array::{
 
 use arrow::compute::sum;
 use arrow::datatypes::{
-    i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
-    Decimal64Type, DecimalType, DurationMicrosecondType, DurationMillisecondType,
-    DurationNanosecondType, DurationSecondType, Field, FieldRef, Float64Type, TimeUnit,
-    UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
-    DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
-    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
-};
-use datafusion_common::plan_err;
-use datafusion_common::{
-    exec_err, not_impl_err, utils::take_function_args, Result, ScalarValue,
+    ArrowNativeType, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION,
+    DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DataType,
+    Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType,
+    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+    DurationSecondType, Field, FieldRef, Float64Type, TimeUnit, UInt64Type, i256,
 };
+use datafusion_common::types::{NativeType, logical_float64};
+use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Documentation, EmitTo, Expr, GroupsAccumulator,
-    ReversedUDAF, Signature,
+    Accumulator, AggregateUDFImpl, Coercion, Documentation, EmitTo, Expr,
+    GroupsAccumulator, ReversedUDAF, Signature, TypeSignature, TypeSignatureClass,
+    Volatility,
 };
-
 use datafusion_functions_aggregate_common::aggregate::avg_distinct::{
     DecimalDistinctAvgAccumulator, Float64DistinctAvgAccumulator,
 };
@@ -50,7 +47,6 @@ use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumu
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::{
     filtered_null_mask, set_nulls,
 };
-
 use datafusion_functions_aggregate_common::utils::DecimalAverager;
 use datafusion_macros::user_doc;
 use log::debug;
@@ -101,7 +97,24 @@ pub struct Avg {
 impl Avg {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Immutable),
+            // Supported types smallint, int, bigint, real, double precision, decimal, or interval
+            // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Decimal,
+                    )]),
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Duration,
+                    )]),
+                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_float64()),
+                        vec![TypeSignatureClass::Integer, TypeSignatureClass::Float],
+                        NativeType::Float64,
+                    )]),
+                ],
+                Volatility::Immutable,
+            ),
             aliases: vec![String::from("mean")],
         }
     }
@@ -126,28 +139,6 @@ impl AggregateUDFImpl for Avg {
         &self.signature
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [args] = take_function_args(self.name(), arg_types)?;
-
-        // Supported types smallint, int, bigint, real, double precision, decimal, or interval
-        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
-        fn coerced_type(data_type: &DataType) -> Result<DataType> {
-            match &data_type {
-                DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
-                DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
-                DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
-                DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
-                d if d.is_numeric() => Ok(DataType::Float64),
-                DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
-                _ => {
-                    plan_err!("Avg does not support inputs of type {data_type}.")
-                }
-            }
-        }
-        Ok(vec![coerced_type(args)?])
-    }
-
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         match &arg_types[0] {
             DataType::Decimal32(precision, scale) => {
@@ -319,12 +310,14 @@ impl AggregateUDFImpl for Avg {
             };
             // Similar to datafusion_functions_aggregate::sum::Sum::state_fields
             // since the accumulator uses DistinctSumAccumulator internally.
-            Ok(vec![Field::new_list(
-                format_state_name(args.name, "avg distinct"),
-                Field::new_list_field(dt, true),
-                false,
-            )
-            .into()])
+            Ok(vec![
+                Field::new_list(
+                    format_state_name(args.name, "avg distinct"),
+                    Field::new_list_field(dt, true),
+                    false,
+                )
+                .into(),
+            ])
         } else {
             Ok(vec![
                 Field::new(
diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
index e63044c753173..734a916e2a870 100644
--- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs
+++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
@@ -24,24 +24,24 @@ use std::hash::Hash;
 use std::mem::{size_of, size_of_val};
 
 use ahash::RandomState;
-use arrow::array::{downcast_integer, Array, ArrayRef, AsArray};
+use arrow::array::{Array, ArrayRef, AsArray, downcast_integer};
 use arrow::datatypes::{
-    ArrowNativeType, ArrowNumericType, DataType, Field, FieldRef, Int16Type, Int32Type,
-    Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowNativeType, ArrowNumericType, DataType, Field, FieldRef, Int8Type, Int16Type,
+    Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
 };
 
 use datafusion_common::cast::as_list_array;
-use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, not_impl_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::INTEGERS;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF,
-    Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Coercion, Documentation, GroupsAccumulator,
+    ReversedUDAF, Signature, TypeSignatureClass, Volatility,
 };
 
 use datafusion_doc::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use datafusion_functions_aggregate_common::noop_accumulator::NoopAccumulator;
 use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
 use std::sync::LazyLock;
 
@@ -89,6 +89,7 @@ macro_rules! accumulator_helper {
 macro_rules! downcast_bitwise_accumulator {
     ($args:ident, $opr:expr, $is_distinct: expr) => {
         match $args.return_field.data_type() {
+            DataType::Null => Ok(Box::new(NoopAccumulator::default())),
             DataType::Int8 => accumulator_helper!(Int8Type, $opr, $is_distinct),
             DataType::Int16 => accumulator_helper!(Int16Type, $opr, $is_distinct),
             DataType::Int32 => accumulator_helper!(Int32Type, $opr, $is_distinct),
@@ -228,7 +229,10 @@ impl BitwiseOperation {
     ) -> Self {
         Self {
             operation: operator,
-            signature: Signature::uniform(1, INTEGERS.to_vec(), Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![Coercion::new_exact(TypeSignatureClass::Integer)],
+                Volatility::Immutable,
+            ),
             func_name,
             documentation,
         }
@@ -249,15 +253,7 @@ impl AggregateUDFImpl for BitwiseOperation {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let arg_type = &arg_types[0];
-        if !arg_type.is_integer() {
-            return exec_err!(
-                "[return_type] {} not supported for {}",
-                self.name(),
-                arg_type
-            );
-        }
-        Ok(arg_type.clone())
+        Ok(arg_types[0].clone())
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -265,24 +261,37 @@ impl AggregateUDFImpl for BitwiseOperation {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        if self.operation == BitwiseOperationType::Xor && args.is_distinct {
-            Ok(vec![Field::new_list(
-                format_state_name(
-                    args.name,
-                    format!("{} distinct", self.name()).as_str(),
-                ),
-                // See COMMENTS.md to understand why nullable is set to true
-                Field::new_list_field(args.return_type().clone(), true),
-                false,
-            )
-            .into()])
+        if args.input_fields[0].data_type().is_null() {
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, self.name()),
+                    DataType::Null,
+                    true,
+                )
+                .into(),
+            ])
+        } else if self.operation == BitwiseOperationType::Xor && args.is_distinct {
+            Ok(vec![
+                Field::new_list(
+                    format_state_name(
+                        args.name,
+                        format!("{} distinct", self.name()).as_str(),
+                    ),
+                    // See COMMENTS.md to understand why nullable is set to true
+                    Field::new_list_field(args.return_type().clone(), true),
+                    false,
+                )
+                .into(),
+            ])
         } else {
-            Ok(vec![Field::new(
-                format_state_name(args.name, self.name()),
-                args.return_field.data_type().clone(),
-                true,
-            )
-            .into()])
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, self.name()),
+                    args.return_field.data_type().clone(),
+                    true,
+                )
+                .into(),
+            ])
         }
     }
 
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs b/datafusion/functions-aggregate/src/bool_and_or.rs
index ff389bb419e2e..a107024e2fb4f 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -28,10 +28,10 @@ use arrow::datatypes::Field;
 use arrow::datatypes::{DataType, FieldRef};
 
 use datafusion_common::internal_err;
-use datafusion_common::{downcast_value, not_impl_err};
 use datafusion_common::{Result, ScalarValue};
+use datafusion_common::{downcast_value, not_impl_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
+use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name};
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF,
     Signature, Volatility,
@@ -151,12 +151,14 @@ impl AggregateUDFImpl for BoolAnd {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        Ok(vec![Field::new(
-            format_state_name(args.name, self.name()),
-            DataType::Boolean,
-            true,
-        )
-        .into()])
+        Ok(vec![
+            Field::new(
+                format_state_name(args.name, self.name()),
+                DataType::Boolean,
+                true,
+            )
+            .into(),
+        ])
     }
 
     fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
@@ -286,12 +288,14 @@ impl AggregateUDFImpl for BoolOr {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        Ok(vec![Field::new(
-            format_state_name(args.name, self.name()),
-            DataType::Boolean,
-            true,
-        )
-        .into()])
+        Ok(vec![
+            Field::new(
+                format_state_name(args.name, self.name()),
+                DataType::Boolean,
+                true,
+            )
+            .into(),
+        ])
     }
 
     fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs
index f2a464de41550..538311dfa2637 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -23,8 +23,8 @@ use std::mem::size_of_val;
 use std::sync::Arc;
 
 use arrow::array::{
-    downcast_array, Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder,
-    UInt64Array,
+    Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder, UInt64Array,
+    downcast_array,
 };
 use arrow::compute::{and, filter, is_not_null};
 use arrow::datatypes::{FieldRef, Float64Type, UInt64Type};
@@ -40,9 +40,9 @@ use crate::covariance::CovarianceAccumulator;
 use crate::stddev::StddevAccumulator;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
     function::{AccumulatorArgs, StateFieldsArgs},
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 use datafusion_macros::user_doc;
@@ -196,24 +196,32 @@ impl Accumulator for CorrelationAccumulator {
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
-        let n = self.covar.get_count();
-        if n < 2 {
-            return Ok(ScalarValue::Float64(None));
-        }
-
         let covar = self.covar.evaluate()?;
         let stddev1 = self.stddev1.evaluate()?;
         let stddev2 = self.stddev2.evaluate()?;
 
-        if let ScalarValue::Float64(Some(c)) = covar {
-            if let ScalarValue::Float64(Some(s1)) = stddev1 {
-                if let ScalarValue::Float64(Some(s2)) = stddev2 {
-                    if s1 == 0_f64 || s2 == 0_f64 {
-                        return Ok(ScalarValue::Float64(None));
-                    } else {
-                        return Ok(ScalarValue::Float64(Some(c / s1 / s2)));
-                    }
-                }
+        // First check if we have NaN values by examining the internal state
+        // This handles the case where both inputs are NaN even with count=1
+        let mean1 = self.covar.get_mean1();
+        let mean2 = self.covar.get_mean2();
+
+        // If both means are NaN, then both input columns contain only NaN values
+        if mean1.is_nan() && mean2.is_nan() {
+            return Ok(ScalarValue::Float64(Some(f64::NAN)));
+        }
+        let n = self.covar.get_count();
+        if mean1.is_nan() || mean2.is_nan() || n < 2 {
+            return Ok(ScalarValue::Float64(None));
+        }
+
+        if let ScalarValue::Float64(Some(c)) = covar
+            && let ScalarValue::Float64(Some(s1)) = stddev1
+            && let ScalarValue::Float64(Some(s2)) = stddev2
+        {
+            if s1 == 0_f64 || s2 == 0_f64 {
+                return Ok(ScalarValue::Float64(None));
+            } else {
+                return Ok(ScalarValue::Float64(Some(c / s1 / s2)));
             }
         }
 
@@ -402,54 +410,6 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator {
         Ok(())
     }
 
-    fn merge_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()> {
-        // Resize vectors to accommodate total number of groups
-        self.count.resize(total_num_groups, 0);
-        self.sum_x.resize(total_num_groups, 0.0);
-        self.sum_y.resize(total_num_groups, 0.0);
-        self.sum_xy.resize(total_num_groups, 0.0);
-        self.sum_xx.resize(total_num_groups, 0.0);
-        self.sum_yy.resize(total_num_groups, 0.0);
-
-        // Extract arrays from input values
-        let partial_counts = values[0].as_primitive::<UInt64Type>();
-        let partial_sum_x = values[1].as_primitive::<Float64Type>();
-        let partial_sum_y = values[2].as_primitive::<Float64Type>();
-        let partial_sum_xy = values[3].as_primitive::<Float64Type>();
-        let partial_sum_xx = values[4].as_primitive::<Float64Type>();
-        let partial_sum_yy = values[5].as_primitive::<Float64Type>();
-
-        assert!(opt_filter.is_none(), "aggregate filter should be applied in partial stage, there should be no filter in final stage");
-
-        accumulate_correlation_states(
-            group_indices,
-            (
-                partial_counts,
-                partial_sum_x,
-                partial_sum_y,
-                partial_sum_xy,
-                partial_sum_xx,
-                partial_sum_yy,
-            ),
-            |group_index, count, values| {
-                self.count[group_index] += count;
-                self.sum_x[group_index] += values[0];
-                self.sum_y[group_index] += values[1];
-                self.sum_xy[group_index] += values[2];
-                self.sum_xx[group_index] += values[3];
-                self.sum_yy[group_index] += values[4];
-            },
-        );
-
-        Ok(())
-    }
-
     fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
         let n = match emit_to {
             EmitTo::All => self.count.len(),
@@ -465,14 +425,10 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator {
         // - Correlation can't be calculated when a group only has 1 record, or when
         //   the `denominator` state is 0. In these cases, the final aggregation
         //   result should be `Null` (according to PostgreSQL's behavior).
+        // - However, if any of the accumulated values contain NaN, the result should
+        //   be NaN regardless of the count (even for single-row groups).
         //
         for i in 0..n {
-            if self.count[i] < 2 {
-                values.push(0.0);
-                nulls.append_null();
-                continue;
-            }
-
             let count = self.count[i];
             let sum_x = self.sum_x[i];
             let sum_y = self.sum_y[i];
@@ -480,6 +436,20 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator {
             let sum_xx = self.sum_xx[i];
             let sum_yy = self.sum_yy[i];
 
+            // If BOTH sum_x AND sum_y are NaN, then both input values are NaN → return NaN
+            // If only ONE of them is NaN, then only one input value is NaN → return NULL
+            if sum_x.is_nan() && sum_y.is_nan() {
+                // Both inputs are NaN → return NaN
+                values.push(f64::NAN);
+                nulls.append_non_null();
+                continue;
+            } else if count < 2 || sum_x.is_nan() || sum_y.is_nan() {
+                // Only one input is NaN → return NULL
+                values.push(0.0);
+                nulls.append_null();
+                continue;
+            }
+
             let mean_x = sum_x / count as f64;
             let mean_y = sum_y / count as f64;
 
@@ -515,6 +485,57 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator {
         ])
     }
 
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // Resize vectors to accommodate total number of groups
+        self.count.resize(total_num_groups, 0);
+        self.sum_x.resize(total_num_groups, 0.0);
+        self.sum_y.resize(total_num_groups, 0.0);
+        self.sum_xy.resize(total_num_groups, 0.0);
+        self.sum_xx.resize(total_num_groups, 0.0);
+        self.sum_yy.resize(total_num_groups, 0.0);
+
+        // Extract arrays from input values
+        let partial_counts = values[0].as_primitive::<UInt64Type>();
+        let partial_sum_x = values[1].as_primitive::<Float64Type>();
+        let partial_sum_y = values[2].as_primitive::<Float64Type>();
+        let partial_sum_xy = values[3].as_primitive::<Float64Type>();
+        let partial_sum_xx = values[4].as_primitive::<Float64Type>();
+        let partial_sum_yy = values[5].as_primitive::<Float64Type>();
+
+        assert!(
+            opt_filter.is_none(),
+            "aggregate filter should be applied in partial stage, there should be no filter in final stage"
+        );
+
+        accumulate_correlation_states(
+            group_indices,
+            (
+                partial_counts,
+                partial_sum_x,
+                partial_sum_y,
+                partial_sum_xy,
+                partial_sum_xx,
+                partial_sum_yy,
+            ),
+            |group_index, count, values| {
+                self.count[group_index] += count;
+                self.sum_x[group_index] += values[0];
+                self.sum_y[group_index] += values[1];
+                self.sum_xy[group_index] += values[2];
+                self.sum_xx[group_index] += values[3];
+                self.sum_yy[group_index] += values[4];
+            },
+        );
+
+        Ok(())
+    }
+
     fn size(&self) -> usize {
         size_of_val(&self.count)
             + size_of_val(&self.sum_x)
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index a291e8e21eb0f..a7c819acafea8 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -22,24 +22,24 @@ use arrow::{
     compute,
     datatypes::{
         DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Field,
-        FieldRef, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
-        Int8Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+        FieldRef, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type,
+        Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
         Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
         TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
-        UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+        UInt8Type, UInt16Type, UInt32Type, UInt64Type,
     },
 };
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_err, stats::Precision,
-    utils::expr::COUNT_STAR_EXPANSION, HashMap, Result, ScalarValue,
+    HashMap, Result, ScalarValue, downcast_value, internal_err, not_impl_err,
+    stats::Precision, utils::expr::COUNT_STAR_EXPANSION,
 };
 use datafusion_expr::{
-    expr::WindowFunction,
-    function::{AccumulatorArgs, StateFieldsArgs},
-    utils::format_state_name,
     Accumulator, AggregateUDFImpl, Documentation, EmitTo, Expr, GroupsAccumulator,
     ReversedUDAF, SetMonotonicity, Signature, StatisticsArgs, TypeSignature, Volatility,
     WindowFunctionDefinition,
+    expr::WindowFunction,
+    function::{AccumulatorArgs, StateFieldsArgs},
+    utils::format_state_name,
 };
 use datafusion_functions_aggregate_common::aggregate::{
     count_distinct::BytesDistinctCountAccumulator,
@@ -307,20 +307,24 @@ impl AggregateUDFImpl for Count {
                 &dtype => dtype.clone(),
             };
 
-            Ok(vec![Field::new_list(
-                format_state_name(args.name, "count distinct"),
-                // See COMMENTS.md to understand why nullable is set to true
-                Field::new_list_field(dtype, true),
-                false,
-            )
-            .into()])
+            Ok(vec![
+                Field::new_list(
+                    format_state_name(args.name, "count distinct"),
+                    // See COMMENTS.md to understand why nullable is set to true
+                    Field::new_list_field(dtype, true),
+                    false,
+                )
+                .into(),
+            ])
         } else {
-            Ok(vec![Field::new(
-                format_state_name(args.name, "count"),
-                DataType::Int64,
-                false,
-            )
-            .into()])
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, "count"),
+                    DataType::Int64,
+                    false,
+                )
+                .into(),
+            ])
         }
     }
 
@@ -373,27 +377,26 @@ impl AggregateUDFImpl for Count {
         if statistics_args.is_distinct {
             return None;
         }
-        if let Precision::Exact(num_rows) = statistics_args.statistics.num_rows {
-            if statistics_args.exprs.len() == 1 {
-                // TODO optimize with exprs other than Column
-                if let Some(col_expr) = statistics_args.exprs[0]
-                    .as_any()
-                    .downcast_ref::<expressions::Column>()
-                {
-                    let current_val = &statistics_args.statistics.column_statistics
-                        [col_expr.index()]
-                    .null_count;
-                    if let &Precision::Exact(val) = current_val {
-                        return Some(ScalarValue::Int64(Some((num_rows - val) as i64)));
-                    }
-                } else if let Some(lit_expr) = statistics_args.exprs[0]
-                    .as_any()
-                    .downcast_ref::<expressions::Literal>()
-                {
-                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
-                        return Some(ScalarValue::Int64(Some(num_rows as i64)));
-                    }
+        if let Precision::Exact(num_rows) = statistics_args.statistics.num_rows
+            && statistics_args.exprs.len() == 1
+        {
+            // TODO optimize with exprs other than Column
+            if let Some(col_expr) = statistics_args.exprs[0]
+                .as_any()
+                .downcast_ref::<expressions::Column>()
+            {
+                let current_val = &statistics_args.statistics.column_statistics
+                    [col_expr.index()]
+                .null_count;
+                if let &Precision::Exact(val) = current_val {
+                    return Some(ScalarValue::Int64(Some((num_rows - val) as i64)));
                 }
+            } else if let Some(lit_expr) = statistics_args.exprs[0]
+                .as_any()
+                .downcast_ref::<expressions::Literal>()
+                && lit_expr.value() == &COUNT_STAR_EXPANSION
+            {
+                return Some(ScalarValue::Int64(Some(num_rows as i64)));
             }
         }
         None
@@ -466,12 +469,12 @@ impl Accumulator for SlidingDistinctCountAccumulator {
         let arr = &values[0];
         for i in 0..arr.len() {
             let v = ScalarValue::try_from_array(arr, i)?;
-            if !v.is_null() {
-                if let Some(cnt) = self.counts.get_mut(&v) {
-                    *cnt -= 1;
-                    if *cnt == 0 {
-                        self.counts.remove(&v);
-                    }
+            if !v.is_null()
+                && let Some(cnt) = self.counts.get_mut(&v)
+            {
+                *cnt -= 1;
+                if *cnt == 0 {
+                    self.counts.remove(&v);
                 }
             }
         }
@@ -854,7 +857,7 @@ mod tests {
         datatypes::{DataType, Field, Int32Type, Schema},
     };
     use datafusion_expr::function::AccumulatorArgs;
-    use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+    use datafusion_physical_expr::{PhysicalExpr, expressions::Column};
     use std::sync::Arc;
     /// Helper function to create a dictionary array with non-null keys but some null values
     /// Returns a dictionary array where:
diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs
index f74fddd603319..e86d742db3d45 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -24,14 +24,13 @@ use arrow::{
     datatypes::{DataType, Field},
 };
 use datafusion_common::{
-    downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, Result,
-    ScalarValue,
+    Result, ScalarValue, downcast_value, plan_err, unwrap_or_internal_err,
 };
 use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
     function::{AccumulatorArgs, StateFieldsArgs},
     type_coercion::aggregates::NUMERICS,
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 use datafusion_macros::user_doc;
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 73f2ec112ffcc..5f3490f535a46 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -30,20 +30,21 @@ use arrow::array::{
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::{self, LexicographicalComparator, SortColumn, SortOptions};
 use arrow::datatypes::{
-    DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Decimal32Type,
-    Decimal64Type, Field, FieldRef, Float16Type, Float32Type, Float64Type, Int16Type,
-    Int32Type, Int64Type, Int8Type, Time32MillisecondType, Time32SecondType,
+    DataType, Date32Type, Date64Type, Decimal32Type, Decimal64Type, Decimal128Type,
+    Decimal256Type, Field, FieldRef, Float16Type, Float32Type, Float64Type, Int8Type,
+    Int16Type, Int32Type, Int64Type, Time32MillisecondType, Time32SecondType,
     Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type,
+    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
+    UInt16Type, UInt32Type, UInt64Type,
 };
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::utils::{compare_rows, extract_row_at_idx_to_buf, get_row_at_idx};
 use datafusion_common::{
-    arrow_datafusion_err, internal_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, arrow_datafusion_err, internal_err,
+    not_impl_err,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
+use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name};
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, EmitTo, Expr, ExprFunctionExt,
     GroupsAccumulator, ReversedUDAF, Signature, SortExpr, Volatility,
@@ -133,8 +134,20 @@ impl AggregateUDFImpl for FirstValue {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        not_impl_err!("Not called because the return_field_from_args is implemented")
+    }
+
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+        // Preserve metadata from the first argument field
+        Ok(Arc::new(
+            Field::new(
+                self.name(),
+                arg_fields[0].data_type().clone(),
+                true, // always nullable, there may be no rows
+            )
+            .with_metadata(arg_fields[0].metadata().clone()),
+        ))
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -159,12 +172,14 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        let mut fields = vec![Field::new(
-            format_state_name(args.name, "first_value"),
-            args.return_type().clone(),
-            true,
-        )
-        .into()];
+        let mut fields = vec![
+            Field::new(
+                format_state_name(args.name, "first_value"),
+                args.return_type().clone(),
+                true,
+            )
+            .into(),
+        ];
         fields.extend(args.ordering_fields.iter().cloned());
         fields.push(
             Field::new(
@@ -209,7 +224,7 @@ impl AggregateUDFImpl for FirstValue {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         fn create_accumulator<T: ArrowPrimitiveType + Send>(
-            args: AccumulatorArgs,
+            args: &AccumulatorArgs,
         ) -> Result<Box<dyn GroupsAccumulator>> {
             let Some(ordering) = LexOrdering::new(args.order_bys.to_vec()) else {
                 return internal_err!("Groups accumulator must have an ordering.");
@@ -231,50 +246,50 @@ impl AggregateUDFImpl for FirstValue {
         }
 
         match args.return_field.data_type() {
-            DataType::Int8 => create_accumulator::<Int8Type>(args),
-            DataType::Int16 => create_accumulator::<Int16Type>(args),
-            DataType::Int32 => create_accumulator::<Int32Type>(args),
-            DataType::Int64 => create_accumulator::<Int64Type>(args),
-            DataType::UInt8 => create_accumulator::<UInt8Type>(args),
-            DataType::UInt16 => create_accumulator::<UInt16Type>(args),
-            DataType::UInt32 => create_accumulator::<UInt32Type>(args),
-            DataType::UInt64 => create_accumulator::<UInt64Type>(args),
-            DataType::Float16 => create_accumulator::<Float16Type>(args),
-            DataType::Float32 => create_accumulator::<Float32Type>(args),
-            DataType::Float64 => create_accumulator::<Float64Type>(args),
-
-            DataType::Decimal32(_, _) => create_accumulator::<Decimal32Type>(args),
-            DataType::Decimal64(_, _) => create_accumulator::<Decimal64Type>(args),
-            DataType::Decimal128(_, _) => create_accumulator::<Decimal128Type>(args),
-            DataType::Decimal256(_, _) => create_accumulator::<Decimal256Type>(args),
+            DataType::Int8 => create_accumulator::<Int8Type>(&args),
+            DataType::Int16 => create_accumulator::<Int16Type>(&args),
+            DataType::Int32 => create_accumulator::<Int32Type>(&args),
+            DataType::Int64 => create_accumulator::<Int64Type>(&args),
+            DataType::UInt8 => create_accumulator::<UInt8Type>(&args),
+            DataType::UInt16 => create_accumulator::<UInt16Type>(&args),
+            DataType::UInt32 => create_accumulator::<UInt32Type>(&args),
+            DataType::UInt64 => create_accumulator::<UInt64Type>(&args),
+            DataType::Float16 => create_accumulator::<Float16Type>(&args),
+            DataType::Float32 => create_accumulator::<Float32Type>(&args),
+            DataType::Float64 => create_accumulator::<Float64Type>(&args),
+
+            DataType::Decimal32(_, _) => create_accumulator::<Decimal32Type>(&args),
+            DataType::Decimal64(_, _) => create_accumulator::<Decimal64Type>(&args),
+            DataType::Decimal128(_, _) => create_accumulator::<Decimal128Type>(&args),
+            DataType::Decimal256(_, _) => create_accumulator::<Decimal256Type>(&args),
 
             DataType::Timestamp(TimeUnit::Second, _) => {
-                create_accumulator::<TimestampSecondType>(args)
+                create_accumulator::<TimestampSecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Millisecond, _) => {
-                create_accumulator::<TimestampMillisecondType>(args)
+                create_accumulator::<TimestampMillisecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                create_accumulator::<TimestampMicrosecondType>(args)
+                create_accumulator::<TimestampMicrosecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                create_accumulator::<TimestampNanosecondType>(args)
+                create_accumulator::<TimestampNanosecondType>(&args)
             }
 
-            DataType::Date32 => create_accumulator::<Date32Type>(args),
-            DataType::Date64 => create_accumulator::<Date64Type>(args),
+            DataType::Date32 => create_accumulator::<Date32Type>(&args),
+            DataType::Date64 => create_accumulator::<Date64Type>(&args),
             DataType::Time32(TimeUnit::Second) => {
-                create_accumulator::<Time32SecondType>(args)
+                create_accumulator::<Time32SecondType>(&args)
             }
             DataType::Time32(TimeUnit::Millisecond) => {
-                create_accumulator::<Time32MillisecondType>(args)
+                create_accumulator::<Time32MillisecondType>(&args)
             }
 
             DataType::Time64(TimeUnit::Microsecond) => {
-                create_accumulator::<Time64MicrosecondType>(args)
+                create_accumulator::<Time64MicrosecondType>(&args)
             }
             DataType::Time64(TimeUnit::Nanosecond) => {
-                create_accumulator::<Time64NanosecondType>(args)
+                create_accumulator::<Time64NanosecondType>(&args)
             }
 
             _ => internal_err!(
@@ -302,6 +317,10 @@ impl AggregateUDFImpl for FirstValue {
         ReversedUDAF::Reversed(last_value_udaf())
     }
 
+    fn supports_null_handling_clause(&self) -> bool {
+        true
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -821,11 +840,11 @@ impl Accumulator for TrivialFirstValueAccumulator {
 
             let filtered_states =
                 filter_states_according_to_is_set(&states[0..1], flags)?;
-            if let Some(first) = filtered_states.first() {
-                if !first.is_empty() {
-                    self.first = ScalarValue::try_from_array(first, 0)?;
-                    self.is_set = true;
-                }
+            if let Some(first) = filtered_states.first()
+                && !first.is_empty()
+            {
+                self.first = ScalarValue::try_from_array(first, 0)?;
+                self.is_set = true;
             }
         }
         Ok(())
@@ -1065,8 +1084,20 @@ impl AggregateUDFImpl for LastValue {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        not_impl_err!("Not called because the return_field_from_args is implemented")
+    }
+
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+        // Preserve metadata from the first argument field
+        Ok(Arc::new(
+            Field::new(
+                self.name(),
+                arg_fields[0].data_type().clone(),
+                true, // always nullable, there may be no rows
+            )
+            .with_metadata(arg_fields[0].metadata().clone()),
+        ))
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -1091,12 +1122,14 @@ impl AggregateUDFImpl for LastValue {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        let mut fields = vec![Field::new(
-            format_state_name(args.name, "last_value"),
-            args.return_field.data_type().clone(),
-            true,
-        )
-        .into()];
+        let mut fields = vec![
+            Field::new(
+                format_state_name(args.name, "last_value"),
+                args.return_field.data_type().clone(),
+                true,
+            )
+            .into(),
+        ];
         fields.extend(args.ordering_fields.iter().cloned());
         fields.push(
             Field::new(
@@ -1127,6 +1160,10 @@ impl AggregateUDFImpl for LastValue {
         ReversedUDAF::Reversed(first_value_udaf())
     }
 
+    fn supports_null_handling_clause(&self) -> bool {
+        true
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -1163,7 +1200,7 @@ impl AggregateUDFImpl for LastValue {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         fn create_accumulator<T>(
-            args: AccumulatorArgs,
+            args: &AccumulatorArgs,
         ) -> Result<Box<dyn GroupsAccumulator>>
         where
             T: ArrowPrimitiveType + Send,
@@ -1187,50 +1224,50 @@ impl AggregateUDFImpl for LastValue {
         }
 
         match args.return_field.data_type() {
-            DataType::Int8 => create_accumulator::<Int8Type>(args),
-            DataType::Int16 => create_accumulator::<Int16Type>(args),
-            DataType::Int32 => create_accumulator::<Int32Type>(args),
-            DataType::Int64 => create_accumulator::<Int64Type>(args),
-            DataType::UInt8 => create_accumulator::<UInt8Type>(args),
-            DataType::UInt16 => create_accumulator::<UInt16Type>(args),
-            DataType::UInt32 => create_accumulator::<UInt32Type>(args),
-            DataType::UInt64 => create_accumulator::<UInt64Type>(args),
-            DataType::Float16 => create_accumulator::<Float16Type>(args),
-            DataType::Float32 => create_accumulator::<Float32Type>(args),
-            DataType::Float64 => create_accumulator::<Float64Type>(args),
-
-            DataType::Decimal32(_, _) => create_accumulator::<Decimal32Type>(args),
-            DataType::Decimal64(_, _) => create_accumulator::<Decimal64Type>(args),
-            DataType::Decimal128(_, _) => create_accumulator::<Decimal128Type>(args),
-            DataType::Decimal256(_, _) => create_accumulator::<Decimal256Type>(args),
+            DataType::Int8 => create_accumulator::<Int8Type>(&args),
+            DataType::Int16 => create_accumulator::<Int16Type>(&args),
+            DataType::Int32 => create_accumulator::<Int32Type>(&args),
+            DataType::Int64 => create_accumulator::<Int64Type>(&args),
+            DataType::UInt8 => create_accumulator::<UInt8Type>(&args),
+            DataType::UInt16 => create_accumulator::<UInt16Type>(&args),
+            DataType::UInt32 => create_accumulator::<UInt32Type>(&args),
+            DataType::UInt64 => create_accumulator::<UInt64Type>(&args),
+            DataType::Float16 => create_accumulator::<Float16Type>(&args),
+            DataType::Float32 => create_accumulator::<Float32Type>(&args),
+            DataType::Float64 => create_accumulator::<Float64Type>(&args),
+
+            DataType::Decimal32(_, _) => create_accumulator::<Decimal32Type>(&args),
+            DataType::Decimal64(_, _) => create_accumulator::<Decimal64Type>(&args),
+            DataType::Decimal128(_, _) => create_accumulator::<Decimal128Type>(&args),
+            DataType::Decimal256(_, _) => create_accumulator::<Decimal256Type>(&args),
 
             DataType::Timestamp(TimeUnit::Second, _) => {
-                create_accumulator::<TimestampSecondType>(args)
+                create_accumulator::<TimestampSecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Millisecond, _) => {
-                create_accumulator::<TimestampMillisecondType>(args)
+                create_accumulator::<TimestampMillisecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                create_accumulator::<TimestampMicrosecondType>(args)
+                create_accumulator::<TimestampMicrosecondType>(&args)
             }
             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                create_accumulator::<TimestampNanosecondType>(args)
+                create_accumulator::<TimestampNanosecondType>(&args)
             }
 
-            DataType::Date32 => create_accumulator::<Date32Type>(args),
-            DataType::Date64 => create_accumulator::<Date64Type>(args),
+            DataType::Date32 => create_accumulator::<Date32Type>(&args),
+            DataType::Date64 => create_accumulator::<Date64Type>(&args),
             DataType::Time32(TimeUnit::Second) => {
-                create_accumulator::<Time32SecondType>(args)
+                create_accumulator::<Time32SecondType>(&args)
             }
             DataType::Time32(TimeUnit::Millisecond) => {
-                create_accumulator::<Time32MillisecondType>(args)
+                create_accumulator::<Time32MillisecondType>(&args)
             }
 
             DataType::Time64(TimeUnit::Microsecond) => {
-                create_accumulator::<Time64MicrosecondType>(args)
+                create_accumulator::<Time64MicrosecondType>(&args)
             }
             DataType::Time64(TimeUnit::Nanosecond) => {
-                create_accumulator::<Time64NanosecondType>(args)
+                create_accumulator::<Time64NanosecondType>(&args)
             }
 
             _ => {
@@ -1306,11 +1343,11 @@ impl Accumulator for TrivialLastValueAccumulator {
         validate_is_set_flags(flags, "last_value")?;
 
         let filtered_states = filter_states_according_to_is_set(&states[0..1], flags)?;
-        if let Some(last) = filtered_states.last() {
-            if !last.is_empty() {
-                self.last = ScalarValue::try_from_array(last, 0)?;
-                self.is_set = true;
-            }
+        if let Some(last) = filtered_states.last()
+            && !last.is_empty()
+        {
+            self.last = ScalarValue::try_from_array(last, 0)?;
+            self.is_set = true;
         }
         Ok(())
     }
@@ -1537,7 +1574,7 @@ mod tests {
         compute::SortOptions,
         datatypes::Schema,
     };
-    use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
+    use datafusion_physical_expr::{PhysicalSortExpr, expressions::col};
 
     use super::*;
 
@@ -1959,10 +1996,12 @@ mod tests {
         let trivial_states = vec![Arc::clone(&value), Arc::clone(&corrupted_flag)];
         let result = trivial_accumulator.merge_batch(&trivial_states);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("is_set flags contain nulls"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("is_set flags contain nulls")
+        );
 
         // Test FirstValueAccumulator (with ordering)
         let schema = Schema::new(vec![Field::new("ordering", DataType::Int64, false)]);
@@ -1982,10 +2021,12 @@ mod tests {
         let ordered_states = vec![value, ordering, corrupted_flag];
         let result = ordered_accumulator.merge_batch(&ordered_states);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("is_set flags contain nulls"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("is_set flags contain nulls")
+        );
 
         Ok(())
     }
@@ -2002,10 +2043,12 @@ mod tests {
         let trivial_states = vec![Arc::clone(&value), Arc::clone(&corrupted_flag)];
         let result = trivial_accumulator.merge_batch(&trivial_states);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("is_set flags contain nulls"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("is_set flags contain nulls")
+        );
 
         // Test LastValueAccumulator (with ordering)
         let schema = Schema::new(vec![Field::new("ordering", DataType::Int64, false)]);
@@ -2025,10 +2068,12 @@ mod tests {
         let ordered_states = vec![value, ordering, corrupted_flag];
         let result = ordered_accumulator.merge_batch(&ordered_states);
         assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("is_set flags contain nulls"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("is_set flags contain nulls")
+        );
 
         Ok(())
     }
diff --git a/datafusion/functions-aggregate/src/grouping.rs b/datafusion/functions-aggregate/src/grouping.rs
index 4d1da1dad5949..43218b1147d39 100644
--- a/datafusion/functions-aggregate/src/grouping.rs
+++ b/datafusion/functions-aggregate/src/grouping.rs
@@ -22,7 +22,7 @@ use std::fmt;
 
 use arrow::datatypes::Field;
 use arrow::datatypes::{DataType, FieldRef};
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::utils::format_state_name;
@@ -107,12 +107,14 @@ impl AggregateUDFImpl for Grouping {
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        Ok(vec![Field::new(
-            format_state_name(args.name, "grouping"),
-            DataType::Int32,
-            true,
-        )
-        .into()])
+        Ok(vec![
+            Field::new(
+                format_state_name(args.name, "grouping"),
+                DataType::Int32,
+                true,
+            )
+            .into(),
+        ])
     }
 
     fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 056cd45fa2c32..f364b785ddaed 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -23,6 +24,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Aggregate Function packages for [DataFusion].
 //!
diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
index 8c524c2f1596a..f137ae0801f09 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -21,8 +21,8 @@ use std::mem::{size_of, size_of_val};
 use std::sync::Arc;
 
 use arrow::array::{
-    downcast_integer, ArrowNumericType, BooleanArray, ListArray, PrimitiveArray,
-    PrimitiveBuilder,
+    ArrowNumericType, BooleanArray, ListArray, PrimitiveArray, PrimitiveBuilder,
+    downcast_integer,
 };
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::{
@@ -40,18 +40,20 @@ use arrow::datatypes::{
 };
 
 use datafusion_common::{
-    internal_datafusion_err, internal_err, DataFusionError, HashSet, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, assert_eq_or_internal_err,
+    internal_datafusion_err,
 };
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
-    function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
-    Documentation, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+    function::AccumulatorArgs, utils::format_state_name,
 };
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
-use datafusion_functions_aggregate_common::utils::Hashable;
+use datafusion_functions_aggregate_common::utils::GenericDistinctBuffer;
 use datafusion_macros::user_doc;
+use std::collections::HashMap;
 
 make_udaf_expr_and_func!(
     Median,
@@ -137,12 +139,14 @@ impl AggregateUDFImpl for Median {
             "median"
         };
 
-        Ok(vec![Field::new(
-            format_state_name(args.name, state_name),
-            DataType::List(Arc::new(field)),
-            true,
-        )
-        .into()])
+        Ok(vec![
+            Field::new(
+                format_state_name(args.name, state_name),
+                DataType::List(Arc::new(field)),
+                true,
+            )
+            .into(),
+        ])
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -151,7 +155,7 @@ impl AggregateUDFImpl for Median {
                 if acc_args.is_distinct {
                     Ok(Box::new(DistinctMedianAccumulator::<$t> {
                         data_type: $dt.clone(),
-                        distinct_values: HashSet::new(),
+                        distinct_values: GenericDistinctBuffer::new($dt),
                     }))
                 } else {
                     Ok(Box::new(MedianAccumulator::<$t> {
@@ -189,12 +193,12 @@ impl AggregateUDFImpl for Median {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         let num_args = args.exprs.len();
-        if num_args != 1 {
-            return internal_err!(
-                "median should only have 1 arg, but found num args:{}",
-                args.exprs.len()
-            );
-        }
+        assert_eq_or_internal_err!(
+            num_args,
+            1,
+            "median should only have 1 arg, but found num args:{}",
+            num_args
+        );
 
         let dt = args.expr_fields[0].data_type().clone();
 
@@ -286,14 +290,51 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
-        let d = std::mem::take(&mut self.all_values);
-        let median = calculate_median::<T>(d);
+        let median = calculate_median::<T>(&mut self.all_values);
         ScalarValue::new_primitive::<T>(median, &self.data_type)
     }
 
     fn size(&self) -> usize {
         size_of_val(self) + self.all_values.capacity() * size_of::<T::Native>()
     }
+
+    fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        let mut to_remove: HashMap<ScalarValue, usize> = HashMap::new();
+
+        let arr = &values[0];
+        for i in 0..arr.len() {
+            let v = ScalarValue::try_from_array(arr, i)?;
+            if !v.is_null() {
+                *to_remove.entry(v).or_default() += 1;
+            }
+        }
+
+        let mut i = 0;
+        while i < self.all_values.len() {
+            let k = ScalarValue::new_primitive::<T>(
+                Some(self.all_values[i]),
+                &self.data_type,
+            )?;
+            if let Some(count) = to_remove.get_mut(&k)
+                && *count > 0
+            {
+                self.all_values.swap_remove(i);
+                *count -= 1;
+                if *count == 0 {
+                    to_remove.remove(&k);
+                    if to_remove.is_empty() {
+                        break;
+                    }
+                }
+            }
+            i += 1;
+        }
+        Ok(())
+    }
+
+    fn supports_retract_batch(&self) -> bool {
+        true
+    }
 }
 
 /// The median groups accumulator accumulates the raw input values
@@ -440,8 +481,8 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
         // Calculate median for each group
         let mut evaluate_result_builder =
             PrimitiveBuilder::<T>::new().with_data_type(self.data_type.clone());
-        for values in emit_group_values {
-            let median = calculate_median::<T>(values);
+        for mut values in emit_group_values {
+            let median = calculate_median::<T>(&mut values);
             evaluate_result_builder.append_option(median);
         }
 
@@ -505,74 +546,36 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
     }
 }
 
-/// The distinct median accumulator accumulates the raw input values
-/// as `ScalarValue`s
-///
-/// The intermediate state is represented as a List of scalar values updated by
-/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
-/// in the final evaluation step so that we avoid expensive conversions and
-/// allocations during `update_batch`.
+#[derive(Debug)]
 struct DistinctMedianAccumulator<T: ArrowNumericType> {
+    distinct_values: GenericDistinctBuffer<T>,
     data_type: DataType,
-    distinct_values: HashSet<Hashable<T::Native>>,
-}
-
-impl<T: ArrowNumericType> Debug for DistinctMedianAccumulator<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "DistinctMedianAccumulator({})", self.data_type)
-    }
 }
 
-impl<T: ArrowNumericType> Accumulator for DistinctMedianAccumulator<T> {
+impl<T: ArrowNumericType + Debug> Accumulator for DistinctMedianAccumulator<T> {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        let all_values = self
-            .distinct_values
-            .iter()
-            .map(|x| ScalarValue::new_primitive::<T>(Some(x.0), &self.data_type))
-            .collect::<Result<Vec<_>>>()?;
-
-        let arr = ScalarValue::new_list_nullable(&all_values, &self.data_type);
-        Ok(vec![ScalarValue::List(arr)])
+        self.distinct_values.state()
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let array = values[0].as_primitive::<T>();
-        match array.nulls().filter(|x| x.null_count() > 0) {
-            Some(n) => {
-                for idx in n.valid_indices() {
-                    self.distinct_values.insert(Hashable(array.value(idx)));
-                }
-            }
-            None => array.values().iter().for_each(|x| {
-                self.distinct_values.insert(Hashable(*x));
-            }),
-        }
-        Ok(())
+        self.distinct_values.update_batch(values)
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let array = states[0].as_list::<i32>();
-        for v in array.iter().flatten() {
-            self.update_batch(&[v])?
-        }
-        Ok(())
+        self.distinct_values.merge_batch(states)
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
-        let d = std::mem::take(&mut self.distinct_values)
+        let mut d = std::mem::take(&mut self.distinct_values.values)
             .into_iter()
             .map(|v| v.0)
             .collect::<Vec<_>>();
-        let median = calculate_median::<T>(d);
+        let median = calculate_median::<T>(&mut d);
         ScalarValue::new_primitive::<T>(median, &self.data_type)
     }
 
     fn size(&self) -> usize {
-        size_of_val(self) + self.distinct_values.capacity() * size_of::<T::Native>()
+        size_of_val(self) + self.distinct_values.size()
     }
 }
 
@@ -591,9 +594,7 @@ where
         .unwrap()
 }
 
-fn calculate_median<T: ArrowNumericType>(
-    mut values: Vec<T::Native>,
-) -> Option<T::Native> {
+fn calculate_median<T: ArrowNumericType>(values: &mut [T::Native]) -> Option<T::Native> {
     let cmp = |x: &T::Native, y: &T::Native| x.compare(*y);
 
     let len = values.len();
@@ -603,9 +604,25 @@ fn calculate_median<T: ArrowNumericType>(
         let (low, high, _) = values.select_nth_unstable_by(len / 2, cmp);
         // Get the maximum of the low (left side after bi-partitioning)
         let left_max = slice_max::<T>(low);
-        let median = left_max
-            .add_wrapping(*high)
-            .div_wrapping(T::Native::usize_as(2));
+        // Calculate median as the average of the two middle values.
+        // Use checked arithmetic to detect overflow and fall back to safe formula.
+        let two = T::Native::usize_as(2);
+        let median = match left_max.add_checked(*high) {
+            Ok(sum) => sum.div_wrapping(two),
+            Err(_) => {
+                // Overflow detected - use safe midpoint formula:
+                // a/2 + b/2 + ((a%2 + b%2) / 2)
+                // This avoids overflow by dividing before adding.
+                let half_left = left_max.div_wrapping(two);
+                let half_right = (*high).div_wrapping(two);
+                let rem_left = left_max.mod_wrapping(two);
+                let rem_right = (*high).mod_wrapping(two);
+                // The sum of remainders (0, 1, or 2 for unsigned; -2 to 2 for signed)
+                // divided by 2 gives the correction factor (0 or 1 for unsigned; -1, 0, or 1 for signed)
+                let correction = rem_left.add_wrapping(rem_right).div_wrapping(two);
+                half_left.add_wrapping(half_right).add_wrapping(correction)
+            }
+        };
         Some(median)
     } else {
         let (_, median, _) = values.select_nth_unstable_by(len / 2, cmp);
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 1a46afefffb3b..0eebad9e3a5c3 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -23,13 +23,13 @@ mod min_max_struct;
 
 use arrow::array::ArrayRef;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type,
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
     DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
-    DurationSecondType, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type,
-    Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    DurationSecondType, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type,
+    Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
 };
 use datafusion_common::stats::Precision;
-use datafusion_common::{exec_err, internal_err, ColumnStatistics, Result};
+use datafusion_common::{ColumnStatistics, Result, exec_err, internal_err};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_physical_expr::expressions;
 use std::cmp::Ordering;
@@ -46,8 +46,8 @@ use crate::min_max::min_max_bytes::MinMaxBytesAccumulator;
 use crate::min_max::min_max_struct::MinMaxStructAccumulator;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{
-    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Documentation,
-    SetMonotonicity, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, SetMonotonicity, Signature, Volatility,
+    function::AccumulatorArgs,
 };
 use datafusion_expr::{GroupsAccumulator, StatisticsArgs};
 use datafusion_macros::user_doc;
@@ -193,10 +193,10 @@ impl FromColumnStatistics for Max {
         &self,
         col_stats: &ColumnStatistics,
     ) -> Option<ScalarValue> {
-        if let Precision::Exact(ref val) = col_stats.max_value {
-            if !val.is_null() {
-                return Some(val.clone());
-            }
+        if let Precision::Exact(ref val) = col_stats.max_value
+            && !val.is_null()
+        {
+            return Some(val.clone());
         }
         None
     }
@@ -480,10 +480,10 @@ impl FromColumnStatistics for Min {
         &self,
         col_stats: &ColumnStatistics,
     ) -> Option<ScalarValue> {
-        if let Precision::Exact(ref val) = col_stats.min_value {
-            if !val.is_null() {
-                return Some(val.clone());
-            }
+        if let Precision::Exact(ref val) = col_stats.min_value
+            && !val.is_null()
+        {
+            return Some(val.clone());
         }
         None
     }
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
index 30b2739c08edc..e4ac7eccf5692 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
@@ -21,7 +21,7 @@ use arrow::array::{
 };
 use arrow::datatypes::DataType;
 use datafusion_common::hash_map::Entry;
-use datafusion_common::{internal_err, HashMap, Result};
+use datafusion_common::{HashMap, Result, internal_err};
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::apply_filter_as_nulls;
 use std::mem::size_of;
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_struct.rs b/datafusion/functions-aggregate/src/min_max/min_max_struct.rs
index 8038f2f01d90c..796fd586ca5c8 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_struct.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_struct.rs
@@ -24,9 +24,8 @@ use arrow::{
     datatypes::DataType,
 };
 use datafusion_common::{
-    internal_err,
+    Result, internal_err,
     scalar::{copy_array_data, partial_cmp_struct},
-    Result,
 };
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::apply_filter_as_nulls;
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 2f4f9371be589..bc343a1969c09 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -23,16 +23,18 @@ use std::collections::VecDeque;
 use std::mem::{size_of, size_of_val};
 use std::sync::Arc;
 
-use arrow::array::{new_empty_array, ArrayRef, AsArray, StructArray};
+use arrow::array::{ArrayRef, AsArray, StructArray, new_empty_array};
 use arrow::datatypes::{DataType, Field, FieldRef, Fields};
 
-use datafusion_common::utils::{get_row_at_idx, SingleRowListArrayBuilder};
-use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::utils::{SingleRowListArrayBuilder, get_row_at_idx};
+use datafusion_common::{
+    Result, ScalarValue, assert_or_internal_err, exec_err, not_impl_err,
+};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    lit, Accumulator, AggregateUDFImpl, Documentation, ExprFunctionExt, ReversedUDAF,
-    Signature, SortExpr, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, ExprFunctionExt, ReversedUDAF,
+    Signature, SortExpr, Volatility, lit,
 };
 use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
 use datafusion_functions_aggregate_common::utils::ordering_fields;
@@ -144,7 +146,7 @@ impl AggregateUDFImpl for NthValueAgg {
                     "{} not supported for n: {}",
                     self.name(),
                     &acc_args.exprs[1]
-                )
+                );
             }
         };
 
@@ -206,10 +208,11 @@ impl TrivialNthValueAccumulator {
     /// Create a new order-insensitive NTH_VALUE accumulator based on the given
     /// item data type.
     pub fn try_new(n: i64, datatype: &DataType) -> Result<Self> {
-        if n == 0 {
-            // n cannot be 0
-            return internal_err!("Nth value indices are 1 based. 0 is invalid index");
-        }
+        // n cannot be 0
+        assert_or_internal_err!(
+            n != 0,
+            "Nth value indices are 1 based. 0 is invalid index"
+        );
         Ok(Self {
             n,
             values: VecDeque::new(),
@@ -339,10 +342,11 @@ impl NthValueAccumulator {
         ordering_dtypes: &[DataType],
         ordering_req: LexOrdering,
     ) -> Result<Self> {
-        if n == 0 {
-            // n cannot be 0
-            return internal_err!("Nth value indices are 1 based. 0 is invalid index");
-        }
+        // n cannot be 0
+        assert_or_internal_err!(
+            n != 0,
+            "Nth value indices are 1 based. 0 is invalid index"
+        );
         let mut datatypes = vec![datatype.clone()];
         datatypes.extend(ordering_dtypes.iter().cloned());
         Ok(Self {
diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs
index 1e06461e569fb..d6c8eabb459e6 100644
--- a/datafusion/functions-aggregate/src/percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/percentile_cont.rs
@@ -26,19 +26,18 @@ use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::{
     array::{Array, ArrayRef, AsArray},
     datatypes::{
-        ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
-        Decimal64Type, Field, FieldRef, Float16Type, Float32Type, Float64Type,
+        ArrowNativeType, DataType, Decimal32Type, Decimal64Type, Decimal128Type,
+        Decimal256Type, Field, FieldRef, Float16Type, Float32Type, Float64Type,
     },
 };
 
 use arrow::array::ArrowNativeTypeOp;
 
+use crate::min_max::{max_udaf, min_udaf};
 use datafusion_common::{
-    internal_datafusion_err, internal_err, plan_err, DataFusionError, HashSet, Result,
-    ScalarValue,
+    DataFusionError, Result, ScalarValue, assert_eq_or_internal_err,
+    internal_datafusion_err, plan_err, utils::take_function_args,
 };
-use datafusion_expr::expr::{AggregateFunction, Sort};
-use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
@@ -46,9 +45,14 @@ use datafusion_expr::{
     Volatility,
 };
 use datafusion_expr::{EmitTo, GroupsAccumulator};
+use datafusion_expr::{
+    expr::{AggregateFunction, Cast, Sort},
+    function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs},
+    simplify::SimplifyInfo,
+};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
-use datafusion_functions_aggregate_common::utils::Hashable;
+use datafusion_functions_aggregate_common::utils::GenericDistinctBuffer;
 use datafusion_macros::user_doc;
 
 use crate::utils::validate_percentile_expr;
@@ -153,7 +157,7 @@ impl PercentileCont {
         }
     }
 
-    fn create_accumulator(&self, args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+    fn create_accumulator(&self, args: &AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
 
         let is_descending = args
@@ -173,7 +177,7 @@ impl PercentileCont {
                 if args.is_distinct {
                     Ok(Box::new(DistinctPercentileContAccumulator::<$t> {
                         data_type: $dt.clone(),
-                        distinct_values: HashSet::new(),
+                        distinct_values: GenericDistinctBuffer::new($dt),
                         percentile,
                     }))
                 } else {
@@ -283,16 +287,18 @@ impl AggregateUDFImpl for PercentileCont {
             "percentile_cont"
         };
 
-        Ok(vec![Field::new(
-            format_state_name(args.name, state_name),
-            DataType::List(Arc::new(field)),
-            true,
-        )
-        .into()])
+        Ok(vec![
+            Field::new(
+                format_state_name(args.name, state_name),
+                DataType::List(Arc::new(field)),
+                true,
+            )
+            .into(),
+        ])
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        self.create_accumulator(acc_args)
+        self.create_accumulator(&acc_args)
     }
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
@@ -304,12 +310,12 @@ impl AggregateUDFImpl for PercentileCont {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         let num_args = args.exprs.len();
-        if num_args != 2 {
-            return internal_err!(
-                "percentile_cont should have 2 args, but found num args:{}",
-                args.exprs.len()
-            );
-        }
+        assert_eq_or_internal_err!(
+            num_args,
+            2,
+            "percentile_cont should have 2 args, but found num args:{}",
+            num_args
+        );
 
         let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
 
@@ -358,8 +364,10 @@ impl AggregateUDFImpl for PercentileCont {
         }
     }
 
-    fn supports_null_handling_clause(&self) -> bool {
-        false
+    fn simplify(&self) -> Option<AggregateFunctionSimplification> {
+        Some(Box::new(|aggregate_function, info| {
+            simplify_percentile_cont_aggregate(aggregate_function, info)
+        }))
     }
 
     fn supports_within_group_clause(&self) -> bool {
@@ -371,6 +379,88 @@ impl AggregateUDFImpl for PercentileCont {
     }
 }
 
+#[derive(Clone, Copy)]
+enum PercentileRewriteTarget {
+    Min,
+    Max,
+}
+
+#[expect(clippy::needless_pass_by_value)]
+fn simplify_percentile_cont_aggregate(
+    aggregate_function: AggregateFunction,
+    info: &dyn SimplifyInfo,
+) -> Result<Expr> {
+    let original_expr = Expr::AggregateFunction(aggregate_function.clone());
+    let params = &aggregate_function.params;
+
+    let [value, percentile] = take_function_args("percentile_cont", &params.args)?;
+
+    let is_descending = params
+        .order_by
+        .first()
+        .map(|sort| !sort.asc)
+        .unwrap_or(false);
+
+    let rewrite_target = match extract_percentile_literal(percentile) {
+        Some(0.0) => {
+            if is_descending {
+                PercentileRewriteTarget::Max
+            } else {
+                PercentileRewriteTarget::Min
+            }
+        }
+        Some(1.0) => {
+            if is_descending {
+                PercentileRewriteTarget::Min
+            } else {
+                PercentileRewriteTarget::Max
+            }
+        }
+        _ => return Ok(original_expr),
+    };
+
+    let input_type = match info.get_data_type(value) {
+        Ok(data_type) => data_type,
+        Err(_) => return Ok(original_expr),
+    };
+
+    let expected_return_type =
+        match percentile_cont_udaf().return_type(std::slice::from_ref(&input_type)) {
+            Ok(data_type) => data_type,
+            Err(_) => return Ok(original_expr),
+        };
+
+    let mut agg_arg = value.clone();
+    if expected_return_type != input_type {
+        // min/max return the same type as their input. percentile_cont widens
+        // integers to Float64 (and preserves float/decimal types), so ensure the
+        // rewritten aggregate sees an input of the final return type.
+        agg_arg = Expr::Cast(Cast::new(Box::new(agg_arg), expected_return_type.clone()));
+    }
+
+    let udaf = match rewrite_target {
+        PercentileRewriteTarget::Min => min_udaf(),
+        PercentileRewriteTarget::Max => max_udaf(),
+    };
+
+    let rewritten = Expr::AggregateFunction(AggregateFunction::new_udf(
+        udaf,
+        vec![agg_arg],
+        params.distinct,
+        params.filter.clone(),
+        vec![],
+        params.null_treatment,
+    ));
+    Ok(rewritten)
+}
+
+fn extract_percentile_literal(expr: &Expr) -> Option<f64> {
+    match expr {
+        Expr::Literal(ScalarValue::Float64(Some(value)), _) => Some(*value),
+        _ => None,
+    }
+}
+
 /// The percentile_cont accumulator accumulates the raw input values
 /// as native types.
 ///
@@ -657,77 +747,28 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator
     }
 }
 
-/// The distinct percentile_cont accumulator accumulates the raw input values
-/// using a HashSet to eliminate duplicates.
-///
-/// The intermediate state is represented as a List of scalar values updated by
-/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
-/// in the final evaluation step so that we avoid expensive conversions and
-/// allocations during `update_batch`.
+#[derive(Debug)]
 struct DistinctPercentileContAccumulator<T: ArrowNumericType> {
+    distinct_values: GenericDistinctBuffer<T>,
     data_type: DataType,
-    distinct_values: HashSet<Hashable<T::Native>>,
     percentile: f64,
 }
 
-impl<T: ArrowNumericType> Debug for DistinctPercentileContAccumulator<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "DistinctPercentileContAccumulator({}, percentile={})",
-            self.data_type, self.percentile
-        )
-    }
-}
-
-impl<T: ArrowNumericType> Accumulator for DistinctPercentileContAccumulator<T> {
+impl<T: ArrowNumericType + Debug> Accumulator for DistinctPercentileContAccumulator<T> {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        let all_values = self
-            .distinct_values
-            .iter()
-            .map(|x| ScalarValue::new_primitive::<T>(Some(x.0), &self.data_type))
-            .collect::<Result<Vec<_>>>()?;
-
-        let arr = ScalarValue::new_list_nullable(&all_values, &self.data_type);
-        Ok(vec![ScalarValue::List(arr)])
+        self.distinct_values.state()
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        // Cast to target type if needed (e.g., integer to Float64)
-        let values = if values[0].data_type() != &self.data_type {
-            arrow::compute::cast(&values[0], &self.data_type)?
-        } else {
-            Arc::clone(&values[0])
-        };
-
-        let array = values.as_primitive::<T>();
-        match array.nulls().filter(|x| x.null_count() > 0) {
-            Some(n) => {
-                for idx in n.valid_indices() {
-                    self.distinct_values.insert(Hashable(array.value(idx)));
-                }
-            }
-            None => array.values().iter().for_each(|x| {
-                self.distinct_values.insert(Hashable(*x));
-            }),
-        }
-        Ok(())
+        self.distinct_values.update_batch(values)
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let array = states[0].as_list::<i32>();
-        for v in array.iter().flatten() {
-            self.update_batch(&[v])?
-        }
-        Ok(())
+        self.distinct_values.merge_batch(states)
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
-        let d = std::mem::take(&mut self.distinct_values)
+        let d = std::mem::take(&mut self.distinct_values.values)
             .into_iter()
             .map(|v| v.0)
             .collect::<Vec<_>>();
@@ -736,7 +777,7 @@ impl<T: ArrowNumericType> Accumulator for DistinctPercentileContAccumulator<T> {
     }
 
     fn size(&self) -> usize {
-        size_of_val(self) + self.distinct_values.capacity() * size_of::<T::Native>()
+        size_of_val(self) + self.distinct_values.size()
     }
 }
 
diff --git a/datafusion/functions-aggregate/src/planner.rs b/datafusion/functions-aggregate/src/planner.rs
index f0e37f6b1dbe4..8a6d9b9bb1e9f 100644
--- a/datafusion/functions-aggregate/src/planner.rs
+++ b/datafusion/functions-aggregate/src/planner.rs
@@ -19,11 +19,11 @@
 
 use datafusion_common::Result;
 use datafusion_expr::{
+    Expr,
     expr::{AggregateFunction, AggregateFunctionParams},
     expr_rewriter::NamePreserver,
     planner::{ExprPlanner, PlannerResult, RawAggregateExpr},
     utils::COUNT_STAR_EXPANSION,
-    Expr,
 };
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs
index 44ce0bd48ead6..bbc5567dab9d6 100644
--- a/datafusion/functions-aggregate/src/regr.rs
+++ b/datafusion/functions-aggregate/src/regr.rs
@@ -26,8 +26,7 @@ use arrow::{
     datatypes::Field,
 };
 use datafusion_common::{
-    downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, HashMap, Result,
-    ScalarValue,
+    HashMap, Result, ScalarValue, downcast_value, plan_err, unwrap_or_internal_err,
 };
 use datafusion_doc::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
@@ -86,7 +85,6 @@ impl Regr {
 }
 
 #[derive(Debug, Clone, PartialEq, Hash, Eq)]
-#[allow(clippy::upper_case_acronyms)]
 pub enum RegrType {
     /// Variant for `regr_slope` aggregate expression
     /// Returns the slope of the linear regression line for non-null pairs in aggregate columns.
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 782524aa4d0ac..13eb5e1660b52 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -26,8 +26,8 @@ use std::sync::Arc;
 use arrow::array::Float64Array;
 use arrow::datatypes::FieldRef;
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
-use datafusion_common::{internal_err, not_impl_err, Result};
-use datafusion_common::{plan_err, ScalarValue};
+use datafusion_common::{Result, internal_err, not_impl_err};
+use datafusion_common::{ScalarValue, plan_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
@@ -473,12 +473,16 @@ mod tests {
         let mut accum1 = agg1.accumulator(args1)?;
         let mut accum2 = agg2.accumulator(args2)?;
 
-        let value1 = vec![col("a", schema)?
-            .evaluate(batch1)
-            .and_then(|v| v.into_array(batch1.num_rows()))?];
-        let value2 = vec![col("a", schema)?
-            .evaluate(batch2)
-            .and_then(|v| v.into_array(batch2.num_rows()))?];
+        let value1 = vec![
+            col("a", schema)?
+                .evaluate(batch1)
+                .and_then(|v| v.into_array(batch1.num_rows()))?,
+        ];
+        let value2 = vec![
+            col("a", schema)?
+                .evaluate(batch2)
+                .and_then(|v| v.into_array(batch2.num_rows()))?,
+        ];
 
         accum1.update_batch(&value1)?;
         accum2.update_batch(&value2)?;
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index 4a040df7b4a3b..77e9f60afd3cf 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -29,7 +29,7 @@ use datafusion_common::cast::{
     as_generic_string_array, as_string_array, as_string_view_array,
 };
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, Result, ScalarValue,
+    Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err,
 };
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::utils::format_state_name;
@@ -150,12 +150,14 @@ impl AggregateUDFImpl for StringAgg {
             (args.ordering_fields.is_empty()) && (!args.is_distinct);
         if no_order_no_distinct {
             // Case `SimpleStringAggAccumulator`
-            Ok(vec![Field::new(
-                format_state_name(args.name, "string_agg"),
-                DataType::LargeUtf8,
-                true,
-            )
-            .into()])
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, "string_agg"),
+                    DataType::LargeUtf8,
+                    true,
+                )
+                .into(),
+            ])
         } else {
             // Case `StringAggAccumulator`
             self.array_agg.state_fields(args)
@@ -252,7 +254,10 @@ impl Accumulator for StringAggAccumulator {
         let scalar = self.array_agg_acc.evaluate()?;
 
         let ScalarValue::List(list) = scalar else {
-            return internal_err!("Expected a DataType::List while evaluating underlying ArrayAggAccumulator, but got {}", scalar.data_type());
+            return internal_err!(
+                "Expected a DataType::List while evaluating underlying ArrayAggAccumulator, but got {}",
+                scalar.data_type()
+            );
         };
 
         let string_arr: Vec<_> = match list.value_type() {
@@ -272,7 +277,7 @@ impl Accumulator for StringAggAccumulator {
                 return internal_err!(
                     "Expected elements to of type Utf8 or LargeUtf8, but got {}",
                     list.value_type()
-                )
+                );
             }
         };
 
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
index 958553d78ca51..198ba54adfa2a 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -18,35 +18,32 @@
 //! Defines `SUM` and `SUM DISTINCT` aggregate accumulators
 
 use ahash::RandomState;
-use arrow::datatypes::DECIMAL32_MAX_PRECISION;
-use arrow::datatypes::DECIMAL64_MAX_PRECISION;
-use datafusion_expr::utils::AggregateOrderSensitivity;
-use datafusion_expr::Expr;
-use std::any::Any;
-use std::mem::size_of_val;
-
-use arrow::array::Array;
-use arrow::array::ArrowNativeTypeOp;
-use arrow::array::{ArrowNumericType, AsArray};
-use arrow::datatypes::{ArrowNativeType, FieldRef};
+use arrow::array::{Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, AsArray};
+use arrow::datatypes::Field;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float64Type,
-    Int64Type, UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    ArrowNativeType, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Decimal32Type,
+    Decimal64Type, Decimal128Type, Decimal256Type, DurationMicrosecondType,
+    DurationMillisecondType, DurationNanosecondType, DurationSecondType, FieldRef,
+    Float64Type, Int64Type, TimeUnit, UInt64Type,
 };
-use arrow::{array::ArrayRef, datatypes::Field};
-use datafusion_common::{
-    exec_err, not_impl_err, utils::take_function_args, HashMap, Result, ScalarValue,
+use datafusion_common::types::{
+    NativeType, logical_float64, logical_int8, logical_int16, logical_int32,
+    logical_int64, logical_uint8, logical_uint16, logical_uint32, logical_uint64,
 };
-use datafusion_expr::function::AccumulatorArgs;
-use datafusion_expr::function::StateFieldsArgs;
-use datafusion_expr::utils::format_state_name;
+use datafusion_common::{HashMap, Result, ScalarValue, exec_err, not_impl_err};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF,
-    SetMonotonicity, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, GroupsAccumulator,
+    ReversedUDAF, SetMonotonicity, Signature, TypeSignature, TypeSignatureClass,
+    Volatility,
 };
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_functions_aggregate_common::aggregate::sum_distinct::DistinctSumAccumulator;
 use datafusion_macros::user_doc;
+use std::any::Any;
+use std::mem::size_of_val;
 
 make_udaf_expr_and_func!(
     Sum,
@@ -97,6 +94,27 @@ macro_rules! downcast_sum {
             DataType::Decimal256(_, _) => {
                 $helper!(Decimal256Type, $args.return_field.data_type().clone())
             }
+            DataType::Duration(TimeUnit::Second) => {
+                $helper!(DurationSecondType, $args.return_field.data_type().clone())
+            }
+            DataType::Duration(TimeUnit::Millisecond) => {
+                $helper!(
+                    DurationMillisecondType,
+                    $args.return_field.data_type().clone()
+                )
+            }
+            DataType::Duration(TimeUnit::Microsecond) => {
+                $helper!(
+                    DurationMicrosecondType,
+                    $args.return_field.data_type().clone()
+                )
+            }
+            DataType::Duration(TimeUnit::Nanosecond) => {
+                $helper!(
+                    DurationNanosecondType,
+                    $args.return_field.data_type().clone()
+                )
+            }
             _ => {
                 not_impl_err!(
                     "Sum not supported for {}: {}",
@@ -130,7 +148,45 @@ pub struct Sum {
 impl Sum {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+            // smallint, int, bigint, real, double precision, decimal, or interval.
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Decimal,
+                    )]),
+                    // Unsigned to u64
+                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_uint64()),
+                        vec![
+                            TypeSignatureClass::Native(logical_uint8()),
+                            TypeSignatureClass::Native(logical_uint16()),
+                            TypeSignatureClass::Native(logical_uint32()),
+                        ],
+                        NativeType::UInt64,
+                    )]),
+                    // Signed to i64
+                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_int64()),
+                        vec![
+                            TypeSignatureClass::Native(logical_int8()),
+                            TypeSignatureClass::Native(logical_int16()),
+                            TypeSignatureClass::Native(logical_int32()),
+                        ],
+                        NativeType::Int64,
+                    )]),
+                    // Floats to f64
+                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_float64()),
+                        vec![TypeSignatureClass::Float],
+                        NativeType::Float64,
+                    )]),
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Duration,
+                    )]),
+                ],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -154,60 +210,30 @@ impl AggregateUDFImpl for Sum {
         &self.signature
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [args] = take_function_args(self.name(), arg_types)?;
-
-        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
-        // smallint, int, bigint, real, double precision, decimal, or interval.
-
-        fn coerced_type(data_type: &DataType) -> Result<DataType> {
-            match data_type {
-                DataType::Dictionary(_, v) => coerced_type(v),
-                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-                DataType::Decimal32(_, _)
-                | DataType::Decimal64(_, _)
-                | DataType::Decimal128(_, _)
-                | DataType::Decimal256(_, _) => Ok(data_type.clone()),
-                dt if dt.is_signed_integer() => Ok(DataType::Int64),
-                dt if dt.is_unsigned_integer() => Ok(DataType::UInt64),
-                dt if dt.is_floating() => Ok(DataType::Float64),
-                _ => exec_err!("Sum not supported for {data_type}"),
-            }
-        }
-
-        Ok(vec![coerced_type(args)?])
-    }
-
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         match &arg_types[0] {
             DataType::Int64 => Ok(DataType::Int64),
             DataType::UInt64 => Ok(DataType::UInt64),
             DataType::Float64 => Ok(DataType::Float64),
+            // In the spark, the result type is DECIMAL(min(38,precision+10), s)
+            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
             DataType::Decimal32(precision, scale) => {
-                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
                 let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
                 Ok(DataType::Decimal32(new_precision, *scale))
             }
             DataType::Decimal64(precision, scale) => {
-                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
                 let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
                 Ok(DataType::Decimal64(new_precision, *scale))
             }
             DataType::Decimal128(precision, scale) => {
-                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
                 let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
                 Ok(DataType::Decimal128(new_precision, *scale))
             }
             DataType::Decimal256(precision, scale) => {
-                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
                 let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
                 Ok(DataType::Decimal256(new_precision, *scale))
             }
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
             other => {
                 exec_err!("[return_type] SUM not supported for {}", other)
             }
@@ -234,20 +260,24 @@ impl AggregateUDFImpl for Sum {
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
         if args.is_distinct {
-            Ok(vec![Field::new_list(
-                format_state_name(args.name, "sum distinct"),
-                // See COMMENTS.md to understand why nullable is set to true
-                Field::new_list_field(args.return_type().clone(), true),
-                false,
-            )
-            .into()])
+            Ok(vec![
+                Field::new_list(
+                    format_state_name(args.name, "sum distinct"),
+                    // See COMMENTS.md to understand why nullable is set to true
+                    Field::new_list_field(args.return_type().clone(), true),
+                    false,
+                )
+                .into(),
+            ])
         } else {
-            Ok(vec![Field::new(
-                format_state_name(args.name, "sum"),
-                args.return_type().clone(),
-                true,
-            )
-            .into()])
+            Ok(vec![
+                Field::new(
+                    format_state_name(args.name, "sum"),
+                    args.return_type().clone(),
+                    true,
+                )
+                .into(),
+            ])
         }
     }
 
diff --git a/datafusion/functions-aggregate/src/utils.rs b/datafusion/functions-aggregate/src/utils.rs
index c058b64f95727..5e1925fcdbb5d 100644
--- a/datafusion/functions-aggregate/src/utils.rs
+++ b/datafusion/functions-aggregate/src/utils.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 
 use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result, ScalarValue, internal_err, plan_err};
 use datafusion_expr::ColumnarValue;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
@@ -58,7 +58,7 @@ pub(crate) fn validate_percentile_expr(
             return plan_err!(
                 "Percentile value for '{fn_name}' must be Float32 or Float64 literal (got data type {})",
                 sv.data_type()
-            )
+            );
         }
     };
 
diff --git a/datafusion/functions-aggregate/src/variance.rs b/datafusion/functions-aggregate/src/variance.rs
index 846c145cb11e7..e6978c15d0bf7 100644
--- a/datafusion/functions-aggregate/src/variance.rs
+++ b/datafusion/functions-aggregate/src/variance.rs
@@ -25,12 +25,12 @@ use arrow::{
     compute::kernels::cast,
     datatypes::{DataType, Field},
 };
-use datafusion_common::{downcast_value, not_impl_err, plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, downcast_value, not_impl_err, plan_err};
 use datafusion_expr::{
-    function::{AccumulatorArgs, StateFieldsArgs},
-    utils::format_state_name,
     Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
     Volatility,
+    function::{AccumulatorArgs, StateFieldsArgs},
+    utils::format_state_name,
 };
 use datafusion_functions_aggregate_common::{
     aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType,
diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml
index 8e4801ba2729d..6b0241a10a544 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -59,7 +59,7 @@ datafusion-macros = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 itertools = { workspace = true, features = ["use_std"] }
 log = { workspace = true }
-paste = "1.0.14"
+paste = { workspace = true }
 
 [dev-dependencies]
 criterion = { workspace = true, features = ["async_tokio"] }
@@ -69,10 +69,18 @@ rand = { workspace = true }
 harness = false
 name = "array_expression"
 
+[[bench]]
+harness = false
+name = "array_has"
+
 [[bench]]
 harness = false
 name = "array_reverse"
 
+[[bench]]
+harness = false
+name = "array_slice"
+
 [[bench]]
 harness = false
 name = "map"
diff --git a/datafusion/functions-nested/benches/array_has.rs b/datafusion/functions-nested/benches/array_has.rs
new file mode 100644
index 0000000000000..a44a80c6ae63e
--- /dev/null
+++ b/datafusion/functions-nested/benches/array_has.rs
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+
+use criterion::{BenchmarkId, Criterion};
+use datafusion_expr::lit;
+use datafusion_functions_nested::expr_fn::{
+    array_has, array_has_all, array_has_any, make_array,
+};
+use std::hint::black_box;
+
+// If not explicitly stated, `array` and `array_size` refer to the haystack array.
+fn criterion_benchmark(c: &mut Criterion) {
+    // Test different array sizes
+    let array_sizes = vec![1, 10, 100, 1000, 10000];
+
+    for &size in &array_sizes {
+        bench_array_has(c, size);
+        bench_array_has_all(c, size);
+        bench_array_has_any(c, size);
+    }
+
+    // Specific benchmarks for string arrays (common use case)
+    bench_array_has_strings(c);
+    bench_array_has_all_strings(c);
+    bench_array_has_any_strings(c);
+
+    // Edge cases
+    bench_array_has_edge_cases(c);
+}
+
+fn bench_array_has(c: &mut Criterion, array_size: usize) {
+    let mut group = c.benchmark_group("array_has_i64");
+
+    // Benchmark: element found at beginning
+    group.bench_with_input(
+        BenchmarkId::new("found_at_start", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle = lit(0_i64);
+
+            b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+        },
+    );
+
+    // Benchmark: element found at end
+    group.bench_with_input(
+        BenchmarkId::new("found_at_end", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle = lit((size - 1) as i64);
+
+            b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+        },
+    );
+
+    // Benchmark: element not found
+    group.bench_with_input(
+        BenchmarkId::new("not_found", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle = lit(-1_i64); // Not in array
+
+            b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+        },
+    );
+
+    group.finish();
+}
+
+fn bench_array_has_all(c: &mut Criterion, array_size: usize) {
+    let mut group = c.benchmark_group("array_has_all");
+
+    // Benchmark: all elements found (small needle)
+    group.bench_with_input(
+        BenchmarkId::new("all_found_small_needle", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_array = make_array(vec![lit(0_i64), lit(1_i64), lit(2_i64)]);
+
+            b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    // Benchmark: all elements found (medium needle - 10% of haystack)
+    group.bench_with_input(
+        BenchmarkId::new("all_found_medium_needle", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_size = (size / 10).max(1);
+            let needle = (0..needle_size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let needle_array = make_array(needle);
+
+            b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    // Benchmark: not all found (early exit)
+    group.bench_with_input(
+        BenchmarkId::new("early_exit", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_array = make_array(vec![lit(0_i64), lit(-1_i64)]); // -1 not in array
+
+            b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    group.finish();
+}
+
+fn bench_array_has_any(c: &mut Criterion, array_size: usize) {
+    let mut group = c.benchmark_group("array_has_any");
+
+    // Benchmark: first element matches (best case)
+    group.bench_with_input(
+        BenchmarkId::new("first_match", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_array = make_array(vec![lit(0_i64), lit(-1_i64), lit(-2_i64)]);
+
+            b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    // Benchmark: last element matches (worst case)
+    group.bench_with_input(
+        BenchmarkId::new("last_match", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_array = make_array(vec![lit(-1_i64), lit(-2_i64), lit(0_i64)]);
+
+            b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    // Benchmark: no match
+    group.bench_with_input(
+        BenchmarkId::new("no_match", array_size),
+        &array_size,
+        |b, &size| {
+            let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle_array = make_array(vec![lit(-1_i64), lit(-2_i64), lit(-3_i64)]);
+
+            b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
+        },
+    );
+
+    group.finish();
+}
+
+fn bench_array_has_strings(c: &mut Criterion) {
+    let mut group = c.benchmark_group("array_has_strings");
+
+    // Benchmark with string arrays (common use case for tickers, tags, etc.)
+    let sizes = vec![10, 100, 1000];
+
+    for &size in &sizes {
+        group.bench_with_input(BenchmarkId::new("found", size), &size, |b, &size| {
+            let array = (0..size)
+                .map(|i| lit(format!("TICKER{i:04}")))
+                .collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle = lit("TICKER0005");
+
+            b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+        });
+
+        group.bench_with_input(BenchmarkId::new("not_found", size), &size, |b, &size| {
+            let array = (0..size)
+                .map(|i| lit(format!("TICKER{i:04}")))
+                .collect::<Vec<_>>();
+            let list_array = make_array(array);
+            let needle = lit("NOTFOUND");
+
+            b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_array_has_all_strings(c: &mut Criterion) {
+    let mut group = c.benchmark_group("array_has_all_strings");
+
+    // Realistic scenario: checking if a portfolio contains certain tickers
+    let portfolio_size = 100;
+    let check_sizes = vec![1, 3, 5, 10];
+
+    for &check_size in &check_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("all_found", check_size),
+            &check_size,
+            |b, &check_size| {
+                let portfolio = (0..portfolio_size)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                let list_array = make_array(portfolio);
+
+                let checking = (0..check_size)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                let needle_array = make_array(checking);
+
+                b.iter(|| {
+                    black_box(array_has_all(list_array.clone(), needle_array.clone()))
+                })
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("some_missing", check_size),
+            &check_size,
+            |b, &check_size| {
+                let portfolio = (0..portfolio_size)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                let list_array = make_array(portfolio);
+
+                let mut checking = (0..check_size - 1)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                checking.push(lit("NOTFOUND".to_string()));
+                let needle_array = make_array(checking);
+
+                b.iter(|| {
+                    black_box(array_has_all(list_array.clone(), needle_array.clone()))
+                })
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_array_has_any_strings(c: &mut Criterion) {
+    let mut group = c.benchmark_group("array_has_any_strings");
+
+    let portfolio_size = 100;
+    let check_sizes = vec![1, 3, 5, 10];
+
+    for &check_size in &check_sizes {
+        group.bench_with_input(
+            BenchmarkId::new("first_matches", check_size),
+            &check_size,
+            |b, &check_size| {
+                let portfolio = (0..portfolio_size)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                let list_array = make_array(portfolio);
+
+                let mut checking = vec![lit("TICKER0000".to_string())];
+                checking.extend((1..check_size).map(|_| lit("NOTFOUND".to_string())));
+                let needle_array = make_array(checking);
+
+                b.iter(|| {
+                    black_box(array_has_any(list_array.clone(), needle_array.clone()))
+                })
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("none_match", check_size),
+            &check_size,
+            |b, &check_size| {
+                let portfolio = (0..portfolio_size)
+                    .map(|i| lit(format!("TICKER{i:04}")))
+                    .collect::<Vec<_>>();
+                let list_array = make_array(portfolio);
+
+                let checking = (0..check_size)
+                    .map(|i| lit(format!("NOTFOUND{i}")))
+                    .collect::<Vec<_>>();
+                let needle_array = make_array(checking);
+
+                b.iter(|| {
+                    black_box(array_has_any(list_array.clone(), needle_array.clone()))
+                })
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_array_has_edge_cases(c: &mut Criterion) {
+    let mut group = c.benchmark_group("array_has_edge_cases");
+
+    // Empty array
+    group.bench_function("empty_array", |b| {
+        let list_array = make_array(vec![]);
+        let needle = lit(1_i64);
+
+        b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+    });
+
+    // Single element array - found
+    group.bench_function("single_element_found", |b| {
+        let list_array = make_array(vec![lit(1_i64)]);
+        let needle = lit(1_i64);
+
+        b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+    });
+
+    // Single element array - not found
+    group.bench_function("single_element_not_found", |b| {
+        let list_array = make_array(vec![lit(1_i64)]);
+        let needle = lit(2_i64);
+
+        b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+    });
+
+    // Array with duplicates
+    group.bench_function("array_with_duplicates", |b| {
+        let array = vec![lit(1_i64); 1000];
+        let list_array = make_array(array);
+        let needle = lit(1_i64);
+
+        b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
+    });
+
+    // array_has_all: empty needle
+    group.bench_function("array_has_all_empty_needle", |b| {
+        let array = (0..1000).map(|i| lit(i as i64)).collect::<Vec<_>>();
+        let list_array = make_array(array);
+        let needle_array = make_array(vec![]);
+
+        b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
+    });
+
+    // array_has_any: empty needle
+    group.bench_function("array_has_any_empty_needle", |b| {
+        let array = (0..1000).map(|i| lit(i as i64)).collect::<Vec<_>>();
+        let list_array = make_array(array);
+        let needle_array = make_array(vec![]);
+
+        b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-nested/benches/array_reverse.rs b/datafusion/functions-nested/benches/array_reverse.rs
index d4a63e36403af..92a65128fe6ba 100644
--- a/datafusion/functions-nested/benches/array_reverse.rs
+++ b/datafusion/functions-nested/benches/array_reverse.rs
@@ -24,7 +24,7 @@ use std::{hint::black_box, sync::Arc};
 use crate::criterion::Criterion;
 use arrow::{
     array::{ArrayRef, FixedSizeListArray, Int32Array, ListArray, ListViewArray},
-    buffer::{OffsetBuffer, ScalarBuffer},
+    buffer::{NullBuffer, OffsetBuffer, ScalarBuffer},
     datatypes::{DataType, Field},
 };
 use datafusion_functions_nested::reverse::array_reverse_inner;
@@ -34,44 +34,80 @@ fn array_reverse(array: &ArrayRef) -> ArrayRef {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    // Construct large arrays for benchmarking
-    let array_len = 100000;
-    let step_size: usize = 1000;
-    let offsets: Vec<i32> = (0..array_len as i32).step_by(step_size).collect();
+    // Create array sizes with step size of 100, starting from 100.
+    let number_of_arrays = 1000;
+    let sizes = (0..number_of_arrays)
+        .map(|i| 100 + i * 100)
+        .collect::<Vec<i32>>();
+
+    // Calculate the total number of values
+    let total_values = sizes.iter().sum::<i32>();
+
+    // Calculate sizes and offsets from array lengths
+    let offsets = sizes
+        .iter()
+        .scan(0, |acc, &x| {
+            let offset = *acc;
+            *acc += x;
+            Some(offset)
+        })
+        .collect::<Vec<i32>>();
     let offsets = ScalarBuffer::from(offsets);
-    let sizes: Vec<i32> = vec![step_size as i32; array_len / step_size];
-    let values = (0..array_len as i32).collect::<Vec<i32>>();
+    // Set every 10th array to null
+    let nulls = (0..number_of_arrays)
+        .map(|i| i % 10 != 0)
+        .collect::<Vec<bool>>();
+
+    let values = (0..total_values).collect::<Vec<i32>>();
+    let values = Arc::new(Int32Array::from(values));
+
+    // Create ListArray and ListViewArray
+    let nulls_list_array = Some(NullBuffer::from(
+        nulls[..((number_of_arrays as usize) - 1)].to_vec(),
+    ));
     let list_array: ArrayRef = Arc::new(ListArray::new(
         Arc::new(Field::new("a", DataType::Int32, false)),
         OffsetBuffer::new(offsets.clone()),
-        Arc::new(Int32Array::from(values.clone())),
-        None,
+        values.clone(),
+        nulls_list_array,
     ));
-    let fixed_size_list_array: ArrayRef = Arc::new(FixedSizeListArray::new(
-        Arc::new(Field::new("a", DataType::Int32, false)),
-        step_size as i32,
-        Arc::new(Int32Array::from(values.clone())),
-        None,
+    let nulls_list_view_array = Some(NullBuffer::from(
+        nulls[..(number_of_arrays as usize)].to_vec(),
     ));
     let list_view_array: ArrayRef = Arc::new(ListViewArray::new(
         Arc::new(Field::new("a", DataType::Int32, false)),
         offsets,
         ScalarBuffer::from(sizes),
-        Arc::new(Int32Array::from(values)),
-        None,
+        values.clone(),
+        nulls_list_view_array,
     ));
 
     c.bench_function("array_reverse_list", |b| {
         b.iter(|| array_reverse(&list_array))
     });
 
-    c.bench_function("array_reverse_fixed_size_list", |b| {
-        b.iter(|| array_reverse(&fixed_size_list_array))
-    });
-
     c.bench_function("array_reverse_list_view", |b| {
         b.iter(|| array_reverse(&list_view_array))
     });
+
+    // Create FixedSizeListArray
+    let array_len = 1000;
+    let num_arrays = 5000;
+    let total_values = num_arrays * array_len;
+    let values = (0..total_values).collect::<Vec<i32>>();
+    let values = Arc::new(Int32Array::from(values));
+    // Set every 10th array to null
+    let nulls = (0..num_arrays).map(|i| i % 10 != 0).collect::<Vec<bool>>();
+    let nulls = Some(NullBuffer::from(nulls));
+    let fixed_size_list_array: ArrayRef = Arc::new(FixedSizeListArray::new(
+        Arc::new(Field::new("a", DataType::Int32, false)),
+        array_len,
+        values.clone(),
+        nulls.clone(),
+    ));
+    c.bench_function("array_reverse_fixed_size_list", |b| {
+        b.iter(|| array_reverse(&fixed_size_list_array))
+    });
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/datafusion/functions-nested/benches/array_slice.rs b/datafusion/functions-nested/benches/array_slice.rs
new file mode 100644
index 0000000000000..858e438996190
--- /dev/null
+++ b/datafusion/functions-nested/benches/array_slice.rs
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{
+    Int64Array, ListArray, ListViewArray, NullBufferBuilder, PrimitiveArray,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::datatypes::{DataType, Field, Int64Type};
+use criterion::{Criterion, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use datafusion_functions_nested::extract::array_slice_udf;
+use rand::rngs::StdRng;
+use rand::seq::IndexedRandom;
+use rand::{Rng, SeedableRng};
+use std::hint::black_box;
+use std::sync::Arc;
+
+fn create_inputs(
+    rng: &mut StdRng,
+    size: usize,
+    child_array_size: usize,
+    null_density: f32,
+) -> (ListArray, ListViewArray) {
+    let mut nulls_builder = NullBufferBuilder::new(size);
+    let mut sizes = Vec::with_capacity(size);
+
+    for _ in 0..size {
+        if rng.random::<f32>() < null_density {
+            nulls_builder.append_null();
+        } else {
+            nulls_builder.append_non_null();
+        }
+        sizes.push(rng.random_range(1..child_array_size));
+    }
+    let nulls = nulls_builder.finish();
+
+    let length = sizes.iter().sum();
+    let values: PrimitiveArray<Int64Type> =
+        (0..length).map(|_| Some(rng.random())).collect();
+    let values = Arc::new(values);
+
+    let offsets = OffsetBuffer::from_lengths(sizes.clone());
+    let list_array = ListArray::new(
+        Arc::new(Field::new_list_field(DataType::Int64, true)),
+        offsets.clone(),
+        values.clone(),
+        nulls.clone(),
+    );
+
+    let offsets = ScalarBuffer::from(offsets.slice(0, size - 1));
+    let sizes = ScalarBuffer::from_iter(sizes.into_iter().map(|v| v as i32));
+    let list_view_array = ListViewArray::new(
+        Arc::new(Field::new_list_field(DataType::Int64, true)),
+        offsets,
+        sizes,
+        values,
+        nulls,
+    );
+
+    (list_array, list_view_array)
+}
+
+/// Create `from`, `to`, and `stride` from an array of strides.
+fn random_from_to_stride(
+    rng: &mut StdRng,
+    size: i64,
+    null_density: f32,
+    stride_choices: &[Option<i64>],
+) -> (Option<i64>, Option<i64>, Option<i64>) {
+    let from = if rng.random::<f32>() < null_density {
+        None
+    } else {
+        Some(rng.random_range(1..=size))
+    };
+
+    let to = if rng.random::<f32>() < null_density {
+        None
+    } else {
+        match from {
+            Some(from) => Some(rng.random_range(from..=size)),
+            None => Some(rng.random_range(1..=size)),
+        }
+    };
+
+    let stride = stride_choices.choose(rng).cloned().unwrap_or(None);
+
+    if from.is_none() || to.is_none() || stride.is_none_or(|s| s > 0) {
+        (from, to, stride)
+    } else {
+        // stride < 0, swap from and to
+        (to, from, stride)
+    }
+}
+
+fn array_slice_benchmark(
+    name: &str,
+    input: ColumnarValue,
+    mut args: Vec<ColumnarValue>,
+    c: &mut Criterion,
+    size: usize,
+) {
+    args.insert(0, input);
+
+    let array_slice = array_slice_udf();
+    let arg_fields = args
+        .iter()
+        .enumerate()
+        .map(|(idx, arg)| {
+            <Arc<Field>>::from(Field::new(format!("arg_{idx}"), arg.data_type(), true))
+        })
+        .collect::<Vec<_>>();
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            black_box(
+                array_slice
+                    .invoke_with_args(ScalarFunctionArgs {
+                        args: args.clone(),
+                        arg_fields: arg_fields.clone(),
+                        number_rows: size,
+                        return_field: Field::new_list_field(args[0].data_type(), true)
+                            .into(),
+                        config_options: Arc::new(ConfigOptions::default()),
+                    })
+                    .unwrap(),
+            )
+        })
+    });
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let rng = &mut StdRng::seed_from_u64(42);
+    let size = 1_000_000;
+    let child_array_size = 100;
+    let null_density = 0.1;
+
+    let (list_array, list_view_array) =
+        create_inputs(rng, size, child_array_size, null_density);
+
+    let mut array_from = Vec::with_capacity(size);
+    let mut array_to = Vec::with_capacity(size);
+    let mut array_stride = Vec::with_capacity(size);
+    for child_array_size in list_array.offsets().lengths() {
+        let (from, to, stride) = random_from_to_stride(
+            rng,
+            child_array_size as i64,
+            null_density,
+            &[None, Some(-2), Some(-1), Some(1), Some(2)],
+        );
+        array_from.push(from);
+        array_to.push(to);
+        array_stride.push(stride);
+    }
+
+    // input
+    let list_array = ColumnarValue::Array(Arc::new(list_array));
+    let list_view_array = ColumnarValue::Array(Arc::new(list_view_array));
+
+    // args
+    let array_from = ColumnarValue::Array(Arc::new(Int64Array::from(array_from)));
+    let array_to = ColumnarValue::Array(Arc::new(Int64Array::from(array_to)));
+    let array_stride = ColumnarValue::Array(Arc::new(Int64Array::from(array_stride)));
+    let scalar_from = ColumnarValue::Scalar(ScalarValue::from(1i64));
+    let scalar_to = ColumnarValue::Scalar(ScalarValue::from(child_array_size as i64 / 2));
+
+    for input in [list_array, list_view_array] {
+        let input_type = input.data_type().to_string();
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, array args"),
+            input.clone(),
+            vec![array_from.clone(), array_to.clone(), array_stride.clone()],
+            c,
+            size,
+        );
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, array args, no stride"),
+            input.clone(),
+            vec![array_from.clone(), array_to.clone()],
+            c,
+            size,
+        );
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, scalar args, no stride"),
+            input.clone(),
+            vec![scalar_from.clone(), scalar_to.clone()],
+            c,
+            size,
+        );
+
+        for stride in [-2i64, -1i64, 1i64, 2i64] {
+            // swap from and to if stride < 0
+            let (scalar_from, scalar_to) = if stride > 0 {
+                (scalar_from.clone(), scalar_to.clone())
+            } else {
+                (scalar_to.clone(), scalar_from.clone())
+            };
+            let scalar_stride = ColumnarValue::Scalar(ScalarValue::from(stride));
+            array_slice_benchmark(
+                &format!("array_slice: input {input_type}, scalar args, stride={stride}"),
+                input.clone(),
+                vec![scalar_from, scalar_to, scalar_stride],
+                c,
+                size,
+            );
+        }
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs
index 3197cc55cc957..75b4045a193d5 100644
--- a/datafusion/functions-nested/benches/map.rs
+++ b/datafusion/functions-nested/benches/map.rs
@@ -20,15 +20,15 @@ extern crate criterion;
 use arrow::array::{Int32Array, ListArray, StringArray};
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs};
 use datafusion_functions_nested::map::map_udf;
 use datafusion_functions_nested::planner::NestedFunctionPlanner;
-use rand::prelude::ThreadRng;
 use rand::Rng;
+use rand::prelude::ThreadRng;
 use std::collections::HashSet;
 use std::hint::black_box;
 use std::sync::Arc;
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index 080b2f16d92f3..54b94abafb999 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -24,11 +24,11 @@ use arrow::row::{RowConverter, Rows, SortField};
 use datafusion_common::cast::{as_fixed_size_list_array, as_generic_list_array};
 use datafusion_common::utils::string_utils::string_array_to_vec;
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::ExprSimplifyResult;
 use datafusion_expr::{
-    in_list, ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, in_list,
 };
 use datafusion_macros::user_doc;
 use datafusion_physical_expr_common::datum::compare_with_eq;
@@ -136,7 +136,7 @@ impl ScalarUDFImpl for ArrayHas {
                 return Ok(ExprSimplifyResult::Simplified(Expr::Literal(
                     ScalarValue::Boolean(None),
                     None,
-                )))
+                )));
             }
             Expr::Literal(
                 // FixedSizeList gets coerced to List
@@ -239,6 +239,7 @@ fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result<A
     array_has_dispatch_for_array(haystack, needle)
 }
 
+#[derive(Copy, Clone)]
 enum ArrayWrapper<'a> {
     FixedSizeList(&'a arrow::array::FixedSizeListArray),
     List(&'a arrow::array::GenericListArray<i32>),
@@ -317,8 +318,8 @@ impl<'a> ArrayWrapper<'a> {
     }
 }
 
-fn array_has_dispatch_for_array(
-    haystack: ArrayWrapper<'_>,
+fn array_has_dispatch_for_array<'a>(
+    haystack: ArrayWrapper<'a>,
     needle: &ArrayRef,
 ) -> Result<ArrayRef> {
     let mut boolean_builder = BooleanArray::builder(haystack.len());
@@ -365,11 +366,11 @@ fn array_has_dispatch_for_scalar(
         let length = end - start;
 
         // Check if the array at this position is null
-        if let Some(validity_buffer) = validity {
-            if !validity_buffer.is_valid(i) {
-                final_contained[i] = None; // null array -> null result
-                continue;
-            }
+        if let Some(validity_buffer) = validity
+            && !validity_buffer.is_valid(i)
+        {
+            final_contained[i] = None; // null array -> null result
+            continue;
         }
 
         // For non-null arrays: length is 0 for empty arrays
@@ -390,8 +391,8 @@ fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 // General row comparison for array_has_all and array_has_any
 fn general_array_has_for_all_and_any<'a>(
-    haystack: &ArrayWrapper<'a>,
-    needle: &ArrayWrapper<'a>,
+    haystack: ArrayWrapper<'a>,
+    needle: ArrayWrapper<'a>,
     comparison_type: ComparisonType,
 ) -> Result<ArrayRef> {
     let mut boolean_builder = BooleanArray::builder(haystack.len());
@@ -402,8 +403,8 @@ fn general_array_has_for_all_and_any<'a>(
             let arr_values = converter.convert_columns(&[arr])?;
             let sub_arr_values = converter.convert_columns(&[sub_arr])?;
             boolean_builder.append_value(general_array_has_all_and_any_kernel(
-                arr_values,
-                sub_arr_values,
+                &arr_values,
+                &sub_arr_values,
                 comparison_type,
             ));
         } else {
@@ -416,8 +417,8 @@ fn general_array_has_for_all_and_any<'a>(
 
 // String comparison for array_has_all and array_has_any
 fn array_has_all_and_any_string_internal<'a>(
-    haystack: &ArrayWrapper<'a>,
-    needle: &ArrayWrapper<'a>,
+    haystack: ArrayWrapper<'a>,
+    needle: ArrayWrapper<'a>,
     comparison_type: ComparisonType,
 ) -> Result<ArrayRef> {
     let mut boolean_builder = BooleanArray::builder(haystack.len());
@@ -427,8 +428,8 @@ fn array_has_all_and_any_string_internal<'a>(
                 let haystack_array = string_array_to_vec(&arr);
                 let needle_array = string_array_to_vec(&sub_arr);
                 boolean_builder.append_value(array_has_string_kernel(
-                    haystack_array,
-                    needle_array,
+                    &haystack_array,
+                    &needle_array,
                     comparison_type,
                 ));
             }
@@ -442,8 +443,8 @@ fn array_has_all_and_any_string_internal<'a>(
 }
 
 fn array_has_all_and_any_dispatch<'a>(
-    haystack: &ArrayWrapper<'a>,
-    needle: &ArrayWrapper<'a>,
+    haystack: ArrayWrapper<'a>,
+    needle: ArrayWrapper<'a>,
     comparison_type: ComparisonType,
 ) -> Result<ArrayRef> {
     if needle.values().is_empty() {
@@ -468,7 +469,7 @@ fn array_has_all_and_any_inner(
 ) -> Result<ArrayRef> {
     let haystack: ArrayWrapper = args[0].as_ref().try_into()?;
     let needle: ArrayWrapper = args[1].as_ref().try_into()?;
-    array_has_all_and_any_dispatch(&haystack, &needle, comparison_type)
+    array_has_all_and_any_dispatch(haystack, needle, comparison_type)
 }
 
 fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -633,8 +634,8 @@ enum ComparisonType {
 }
 
 fn array_has_string_kernel(
-    haystack: Vec<Option<&str>>,
-    needle: Vec<Option<&str>>,
+    haystack: &[Option<&str>],
+    needle: &[Option<&str>],
     comparison_type: ComparisonType,
 ) -> bool {
     match comparison_type {
@@ -650,8 +651,8 @@ fn array_has_string_kernel(
 }
 
 fn general_array_has_all_and_any_kernel(
-    haystack_rows: Rows,
-    needle_rows: Rows,
+    haystack_rows: &Rows,
+    needle_rows: &Rows,
     comparison_type: ComparisonType,
 ) -> bool {
     match comparison_type {
@@ -674,17 +675,17 @@ mod tests {
 
     use arrow::datatypes::Int32Type;
     use arrow::{
-        array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray},
+        array::{Array, ArrayRef, AsArray, Int32Array, ListArray, create_array},
         buffer::OffsetBuffer,
         datatypes::{DataType, Field},
     };
     use datafusion_common::{
-        config::ConfigOptions, utils::SingleRowListArrayBuilder, DataFusionError,
-        ScalarValue,
+        DataFusionError, ScalarValue, config::ConfigOptions,
+        utils::SingleRowListArrayBuilder,
     };
     use datafusion_expr::{
-        col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult,
-        ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl,
+        ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col,
+        execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult,
     };
 
     use crate::expr_fn::make_array;
diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs
index 6db0011cd0784..c467686b865cc 100644
--- a/datafusion/functions-nested/src/cardinality.rs
+++ b/datafusion/functions-nested/src/cardinality.rs
@@ -25,10 +25,10 @@ use arrow::datatypes::{
     DataType,
     DataType::{LargeList, List, Map, Null, UInt64},
 };
+use datafusion_common::Result;
 use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
 use datafusion_common::exec_err;
-use datafusion_common::utils::{take_function_args, ListCoercion};
-use datafusion_common::Result;
+use datafusion_common::utils::{ListCoercion, take_function_args};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -117,8 +117,7 @@ impl ScalarUDFImpl for Cardinality {
     }
 }
 
-/// Cardinality SQL function
-pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("cardinality", args)?;
     match array.data_type() {
         Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs
index 9a12db525f954..0a7402060a0e6 100644
--- a/datafusion/functions-nested/src/concat.rs
+++ b/datafusion/functions-nested/src/concat.rs
@@ -28,10 +28,10 @@ use arrow::array::{
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::{DataType, Field};
+use datafusion_common::Result;
 use datafusion_common::utils::{
-    base_type, coerced_type_with_base_type_only, ListCoercion,
+    ListCoercion, base_type, coerced_type_with_base_type_only,
 };
-use datafusion_common::Result;
 use datafusion_common::{
     cast::as_generic_list_array,
     exec_err, plan_err,
@@ -297,7 +297,7 @@ impl ScalarUDFImpl for ArrayConcat {
                 DataType::Null | DataType::List(_) | DataType::FixedSizeList(..) => (),
                 DataType::LargeList(_) => large_list = true,
                 arg_type => {
-                    return plan_err!("{} does not support type {arg_type}", self.name())
+                    return plan_err!("{} does not support type {arg_type}", self.name());
                 }
             }
 
@@ -352,8 +352,7 @@ impl ScalarUDFImpl for ArrayConcat {
     }
 }
 
-/// Array_concat/Array_cat SQL function
-pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.is_empty() {
         return exec_err!("array_concat expects at least one argument");
     }
@@ -453,8 +452,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 // Kernel functions
 
-/// Array_append SQL function
-pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, values] = take_function_args("array_append", args)?;
     match array.data_type() {
         DataType::Null => make_array_inner(&[Arc::clone(values)]),
@@ -464,8 +462,7 @@ pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-/// Array_prepend SQL function
-pub(crate) fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [values, array] = take_function_args("array_prepend", args)?;
     match array.data_type() {
         DataType::Null => make_array_inner(&[Arc::clone(values)]),
diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs
index b0fc5bee5494d..93fd57afd81c8 100644
--- a/datafusion/functions-nested/src/dimension.rs
+++ b/datafusion/functions-nested/src/dimension.rs
@@ -28,7 +28,7 @@ use std::any::Any;
 use datafusion_common::cast::{
     as_fixed_size_list_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 
 use crate::utils::{compute_array_dims, make_scalar_function};
 use datafusion_common::utils::list_ndims;
@@ -189,8 +189,7 @@ impl ScalarUDFImpl for ArrayNdims {
     }
 }
 
-/// Array_dims SQL function
-pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("array_dims", args)?;
     let data: Vec<_> = match array.data_type() {
         List(_) => as_list_array(&array)?
@@ -214,8 +213,7 @@ pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(Arc::new(result))
 }
 
-/// Array_ndims SQL function
-pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("array_ndims", args)?;
 
     fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs
index e2e38fbd0d836..f9c42c414423e 100644
--- a/datafusion/functions-nested/src/distance.rs
+++ b/datafusion/functions-nested/src/distance.rs
@@ -29,8 +29,8 @@ use datafusion_common::cast::{
     as_float32_array, as_float64_array, as_generic_list_array, as_int32_array,
     as_int64_array,
 };
-use datafusion_common::utils::{coerced_type_with_base_type_only, ListCoercion};
-use datafusion_common::{exec_err, plan_err, utils::take_function_args, Result};
+use datafusion_common::utils::{ListCoercion, coerced_type_with_base_type_only};
+use datafusion_common::{Result, exec_err, plan_err, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -141,7 +141,7 @@ impl ScalarUDFImpl for ArrayDistance {
     }
 }
 
-pub fn array_distance_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_distance_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array1, array2] = take_function_args("array_distance", args)?;
     match (array1.data_type(), array2.data_type()) {
         (List(_), List(_)) => general_array_distance::<i32>(args),
diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs
index 27a90ab0442bc..ca16e494b12d6 100644
--- a/datafusion/functions-nested/src/empty.rs
+++ b/datafusion/functions-nested/src/empty.rs
@@ -25,7 +25,7 @@ use arrow::datatypes::{
     DataType::{Boolean, FixedSizeList, LargeList, List},
 };
 use datafusion_common::cast::as_generic_list_array;
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -110,8 +110,7 @@ impl ScalarUDFImpl for ArrayEmpty {
     }
 }
 
-/// Array_empty SQL function
-pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("array_empty", args)?;
     match array.data_type() {
         List(_) => general_array_empty::<i32>(array),
diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs
index d6982ab5a2ab0..a8ac997ce33ec 100644
--- a/datafusion/functions-nested/src/except.rs
+++ b/datafusion/functions-nested/src/except.rs
@@ -18,12 +18,12 @@
 //! [`ScalarUDFImpl`] definitions for array_except function.
 
 use crate::utils::{check_datatypes, make_scalar_function};
-use arrow::array::{cast::AsArray, Array, ArrayRef, GenericListArray, OffsetSizeTrait};
+use arrow::array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray};
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::{DataType, FieldRef};
 use arrow::row::{RowConverter, SortField};
-use datafusion_common::utils::{take_function_args, ListCoercion};
-use datafusion_common::{internal_err, HashSet, Result};
+use datafusion_common::utils::{ListCoercion, take_function_args};
+use datafusion_common::{HashSet, Result, internal_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -126,8 +126,7 @@ impl ScalarUDFImpl for ArrayExcept {
     }
 }
 
-/// Array_except SQL function
-pub fn array_except_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_except_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array1, array2] = take_function_args("array_except", args)?;
 
     match (array1.data_type(), array2.data_type()) {
diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs
index 57505c59493af..0f7246c8589cf 100644
--- a/datafusion/functions-nested/src/extract.rs
+++ b/datafusion/functions-nested/src/extract.rs
@@ -35,8 +35,8 @@ use datafusion_common::cast::{
 use datafusion_common::internal_err;
 use datafusion_common::utils::ListCoercion;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_datafusion_err, plan_err,
-    utils::take_function_args, Result,
+    Result, exec_datafusion_err, exec_err, internal_datafusion_err, plan_err,
+    utils::take_function_args,
 };
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, Expr, TypeSignature,
@@ -1034,9 +1034,9 @@ impl ScalarUDFImpl for ArrayAnyValue {
     }
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         match &arg_types[0] {
-            List(field)
-            | LargeList(field)
-            | FixedSizeList(field, _) => Ok(field.data_type().clone()),
+            List(field) | LargeList(field) | FixedSizeList(field, _) => {
+                Ok(field.data_type().clone())
+            }
             _ => plan_err!(
                 "array_any_value can only accept List, LargeList or FixedSizeList as the argument"
             ),
@@ -1129,8 +1129,8 @@ where
 mod tests {
     use super::{array_element_udf, general_list_view_array_slice};
     use arrow::array::{
-        cast::AsArray, Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array,
-        ListViewArray,
+        Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array, ListViewArray,
+        cast::AsArray,
     };
     use arrow::buffer::ScalarBuffer;
     use arrow::datatypes::{DataType, Field};
diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
index 49f4110faeaac..33b3e102ae0bc 100644
--- a/datafusion/functions-nested/src/flatten.rs
+++ b/datafusion/functions-nested/src/flatten.rs
@@ -25,7 +25,7 @@ use arrow::datatypes::{
     DataType::{FixedSizeList, LargeList, List, Null},
 };
 use datafusion_common::cast::{as_large_list_array, as_list_array};
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -130,8 +130,7 @@ impl ScalarUDFImpl for Flatten {
     }
 }
 
-/// Flatten SQL function
-pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("flatten", args)?;
 
     match array.data_type() {
@@ -145,7 +144,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     let (inner_field, inner_offsets, inner_values, _) =
                         as_list_array(&values)?.clone().into_parts();
                     let offsets =
-                        get_offsets_for_flatten::<i32, i32>(inner_offsets, offsets);
+                        get_offsets_for_flatten::<i32, i32>(inner_offsets, &offsets);
                     let flattened_array = GenericListArray::<i32>::new(
                         inner_field,
                         offsets,
@@ -159,7 +158,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     let (inner_field, inner_offsets, inner_values, _) =
                         as_large_list_array(&values)?.clone().into_parts();
                     let offsets =
-                        get_offsets_for_flatten::<i64, i32>(inner_offsets, offsets);
+                        get_offsets_for_flatten::<i64, i32>(inner_offsets, &offsets);
                     let flattened_array = GenericListArray::<i64>::new(
                         inner_field,
                         offsets,
@@ -180,7 +179,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                 List(_) => {
                     let (inner_field, inner_offsets, inner_values, _) =
                         as_list_array(&values)?.clone().into_parts();
-                    let offsets = get_large_offsets_for_flatten(inner_offsets, offsets);
+                    let offsets = get_large_offsets_for_flatten(inner_offsets, &offsets);
                     let flattened_array = GenericListArray::<i64>::new(
                         inner_field,
                         offsets,
@@ -194,7 +193,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     let (inner_field, inner_offsets, inner_values, _) =
                         as_large_list_array(&values)?.clone().into_parts();
                     let offsets =
-                        get_offsets_for_flatten::<i64, i64>(inner_offsets, offsets);
+                        get_offsets_for_flatten::<i64, i64>(inner_offsets, &offsets);
                     let flattened_array = GenericListArray::<i64>::new(
                         inner_field,
                         offsets,
@@ -217,7 +216,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 // Create new offsets that are equivalent to `flatten` the array.
 fn get_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
     inner_offsets: OffsetBuffer<O>,
-    outer_offsets: OffsetBuffer<P>,
+    outer_offsets: &OffsetBuffer<P>,
 ) -> OffsetBuffer<O> {
     let buffer = inner_offsets.into_inner();
     let offsets: Vec<O> = outer_offsets
@@ -230,7 +229,7 @@ fn get_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
 // Create new large offsets that are equivalent to `flatten` the array.
 fn get_large_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
     inner_offsets: OffsetBuffer<O>,
-    outer_offsets: OffsetBuffer<P>,
+    outer_offsets: &OffsetBuffer<P>,
 ) -> OffsetBuffer<i64> {
     let buffer = inner_offsets.into_inner();
     let offsets: Vec<i64> = outer_offsets
diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs
index 060a978185e51..a9a2731c3b329 100644
--- a/datafusion/functions-nested/src/length.rs
+++ b/datafusion/functions-nested/src/length.rs
@@ -29,7 +29,7 @@ use arrow::datatypes::{
 use datafusion_common::cast::{
     as_fixed_size_list_array, as_generic_list_array, as_int64_array,
 };
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -150,8 +150,7 @@ macro_rules! array_length_impl {
     }};
 }
 
-/// Array_length SQL function
-pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 1 && args.len() != 2 {
         return exec_err!("array_length expects one or two arguments");
     }
diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs
index 3a66e65694768..ed9e1af4eaa8f 100644
--- a/datafusion/functions-nested/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Nested type Functions for [DataFusion].
 //!
diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs
index 97d64c70cd364..410a545853acf 100644
--- a/datafusion/functions-nested/src/make_array.rs
+++ b/datafusion/functions-nested/src/make_array.rs
@@ -23,18 +23,18 @@ use std::vec;
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    new_null_array, Array, ArrayData, ArrayRef, Capacities, GenericListArray,
-    MutableArrayData, NullArray, OffsetSizeTrait,
+    Array, ArrayData, ArrayRef, Capacities, GenericListArray, MutableArrayData,
+    NullArray, OffsetSizeTrait, new_null_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::DataType;
 use arrow::datatypes::{DataType::Null, Field};
 use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::{plan_err, Result};
+use datafusion_common::{Result, plan_err};
+use datafusion_expr::TypeSignature;
 use datafusion_expr::binary::{
     try_type_union_resolution_with_struct, type_union_resolution,
 };
-use datafusion_expr::TypeSignature;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -125,19 +125,7 @@ impl ScalarUDFImpl for MakeArray {
     }
 
     fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if let Ok(unified) = try_type_union_resolution_with_struct(arg_types) {
-            return Ok(unified);
-        }
-
-        if let Some(unified) = type_union_resolution(arg_types) {
-            Ok(vec![unified; arg_types.len()])
-        } else {
-            plan_err!(
-                "Failed to unify argument types of {}: [{}]",
-                self.name(),
-                arg_types.iter().join(", ")
-            )
-        }
+        coerce_types_inner(arg_types, self.name())
     }
 
     fn documentation(&self) -> Option<&Documentation> {
@@ -163,7 +151,7 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
             SingleRowListArrayBuilder::new(array).build_list_array(),
         ))
     } else {
-        array_array::<i32>(arrays, data_type.clone())
+        array_array::<i32>(arrays, data_type.clone(), Field::LIST_FIELD_DEFAULT_NAME)
     }
 }
 
@@ -207,9 +195,10 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
 /// └──────────────┘   └──────────────┘        └─────────────────────────────┘
 ///      col1               col2                         output
 /// ```
-fn array_array<O: OffsetSizeTrait>(
+pub fn array_array<O: OffsetSizeTrait>(
     args: &[ArrayRef],
     data_type: DataType,
+    field_name: &str,
 ) -> Result<ArrayRef> {
     // do not accept 0 arguments.
     if args.is_empty() {
@@ -252,9 +241,25 @@ fn array_array<O: OffsetSizeTrait>(
     let data = mutable.freeze();
 
     Ok(Arc::new(GenericListArray::<O>::try_new(
-        Arc::new(Field::new_list_field(data_type, true)),
+        Arc::new(Field::new(field_name, data_type, true)),
         OffsetBuffer::new(offsets.into()),
         arrow::array::make_array(data),
         None,
     )?))
 }
+
+pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result<Vec<DataType>> {
+    if let Ok(unified) = try_type_union_resolution_with_struct(arg_types) {
+        return Ok(unified);
+    }
+
+    if let Some(unified) = type_union_resolution(arg_types) {
+        Ok(vec![unified; arg_types.len()])
+    } else {
+        plan_err!(
+            "Failed to unify argument types of {}: [{}]",
+            name,
+            arg_types.iter().join(", ")
+        )
+    }
+}
diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs
index 03cfdb52c6de7..a96bbc0589e3c 100644
--- a/datafusion/functions-nested/src/map.rs
+++ b/datafusion/functions-nested/src/map.rs
@@ -25,7 +25,7 @@ use arrow::datatypes::{DataType, Field, SchemaBuilder, ToByteSlice};
 
 use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
 use datafusion_common::{
-    exec_err, utils::take_function_args, HashSet, Result, ScalarValue,
+    HashSet, Result, ScalarValue, exec_err, utils::take_function_args,
 };
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{
@@ -61,11 +61,7 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
     let can_evaluate_to_const = can_evaluate_to_const(args);
 
-    // check the keys array is unique
     let keys = get_first_array_ref(keys_arg)?;
-    if keys.null_count() > 0 {
-        return exec_err!("map key cannot be null");
-    }
     let key_array = keys.as_ref();
 
     match keys_arg {
@@ -84,22 +80,31 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
             row_keys
                 .iter()
-                .try_for_each(|key| check_unique_keys(key.as_ref()))?;
+                .try_for_each(|key| validate_map_keys(key.as_ref()))?;
         }
         ColumnarValue::Scalar(_) => {
-            check_unique_keys(key_array)?;
+            validate_map_keys(key_array)?;
         }
     }
 
     let values = get_first_array_ref(values_arg)?;
-    make_map_batch_internal(keys, values, can_evaluate_to_const, keys_arg.data_type())
+
+    make_map_batch_internal(&keys, &values, can_evaluate_to_const, &keys_arg.data_type())
 }
 
-fn check_unique_keys(array: &dyn Array) -> Result<()> {
+/// Validates that map keys are non-null and unique.
+fn validate_map_keys(array: &dyn Array) -> Result<()> {
     let mut seen_keys = HashSet::with_capacity(array.len());
 
     for i in 0..array.len() {
         let key = ScalarValue::try_from_array(array, i)?;
+
+        // Validation 1: Map keys cannot be null
+        if key.is_null() {
+            return exec_err!("map key cannot be null");
+        }
+
+        // Validation 2: Map keys must be unique
         if seen_keys.contains(&key) {
             return exec_err!("map key must be unique, duplicate key found: {}", key);
         }
@@ -121,20 +126,30 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
 }
 
 fn make_map_batch_internal(
-    keys: ArrayRef,
-    values: ArrayRef,
+    keys: &ArrayRef,
+    values: &ArrayRef,
     can_evaluate_to_const: bool,
-    data_type: DataType,
+    data_type: &DataType,
 ) -> Result<ColumnarValue> {
     if keys.len() != values.len() {
         return exec_err!("map requires key and value lists to have the same length");
     }
 
-    if !can_evaluate_to_const {
-        return if let DataType::LargeList(..) = data_type {
-            make_map_array_internal::<i64>(keys, values)
-        } else {
-            make_map_array_internal::<i32>(keys, values)
+    // Use the array path (make_map_array_internal) in these cases:
+    // 1. Not const evaluation (!can_evaluate_to_const) - allows scalar elimination optimization
+    // 2. NULL maps present (keys.null_count() > 0) - fast path doesn't handle NULL list elements
+    if !can_evaluate_to_const || keys.null_count() > 0 {
+        return match data_type {
+            DataType::LargeList(..) => make_map_array_internal::<i64>(keys, values),
+            DataType::List(..) => make_map_array_internal::<i32>(keys, values),
+            DataType::FixedSizeList(..) => {
+                // FixedSizeList doesn't use OffsetSizeTrait, so handle it separately
+                make_map_array_from_fixed_size_list(keys, values)
+            }
+            _ => exec_err!(
+                "Expected List, LargeList, or FixedSizeList, got {:?}",
+                data_type
+            ),
         };
     }
 
@@ -144,8 +159,8 @@ fn make_map_batch_internal(
     let mut entry_offsets_buffer = VecDeque::new();
     entry_offsets_buffer.push_back(0);
 
-    entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(&keys)));
-    entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(&values)));
+    entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(keys)));
+    entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(values)));
     entry_offsets_buffer.push_back(keys.len() as u32);
 
     let entry_struct: Vec<(Arc<Field>, ArrayRef)> = entry_struct_buffer.into();
@@ -353,30 +368,123 @@ fn get_element_type(data_type: &DataType) -> Result<&DataType> {
 /// +-----------+      +-----------+
 /// ```text
 fn make_map_array_internal<O: OffsetSizeTrait>(
-    keys: ArrayRef,
-    values: ArrayRef,
+    keys: &ArrayRef,
+    values: &ArrayRef,
 ) -> Result<ColumnarValue> {
-    let mut offset_buffer = vec![O::zero()];
-    let mut running_offset = O::zero();
+    // Save original data types and array length before list_to_arrays transforms them
+    let keys_data_type = keys.data_type().clone();
+    let values_data_type = values.data_type().clone();
+    let original_len = keys.len(); // This is the number of rows in the input
+
+    // Save the nulls bitmap from the original keys array (before list_to_arrays)
+    // This tells us which MAP values are NULL (not which keys within maps are null)
+    let nulls_bitmap = keys.nulls().cloned();
+
+    let keys = list_to_arrays::<O>(keys);
+    let values = list_to_arrays::<O>(values);
+
+    build_map_array(
+        &keys,
+        &values,
+        &keys_data_type,
+        &values_data_type,
+        original_len,
+        nulls_bitmap,
+    )
+}
 
-    let keys = list_to_arrays::<O>(&keys);
-    let values = list_to_arrays::<O>(&values);
+/// Helper function specifically for FixedSizeList inputs
+/// Similar to make_map_array_internal but uses fixed_size_list_to_arrays instead of list_to_arrays
+fn make_map_array_from_fixed_size_list(
+    keys: &ArrayRef,
+    values: &ArrayRef,
+) -> Result<ColumnarValue> {
+    // Save original data types and array length
+    let keys_data_type = keys.data_type().clone();
+    let values_data_type = values.data_type().clone();
+    let original_len = keys.len();
+
+    // Save the nulls bitmap from the original keys array
+    let nulls_bitmap = keys.nulls().cloned();
+
+    let keys = fixed_size_list_to_arrays(keys);
+    let values = fixed_size_list_to_arrays(values);
+
+    build_map_array(
+        &keys,
+        &values,
+        &keys_data_type,
+        &values_data_type,
+        original_len,
+        nulls_bitmap,
+    )
+}
 
+/// Common logic to build a MapArray from decomposed list arrays
+fn build_map_array(
+    keys: &[ArrayRef],
+    values: &[ArrayRef],
+    keys_data_type: &DataType,
+    values_data_type: &DataType,
+    original_len: usize,
+    nulls_bitmap: Option<arrow::buffer::NullBuffer>,
+) -> Result<ColumnarValue> {
     let mut key_array_vec = vec![];
     let mut value_array_vec = vec![];
     for (k, v) in keys.iter().zip(values.iter()) {
-        running_offset = running_offset.add(O::usize_as(k.len()));
-        offset_buffer.push(running_offset);
         key_array_vec.push(k.as_ref());
         value_array_vec.push(v.as_ref());
     }
 
-    // concatenate all the arrays
-    let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
-    if flattened_keys.null_count() > 0 {
-        return exec_err!("keys cannot be null");
+    // Build offset buffer that accounts for NULL maps
+    // For each row, if it's NULL, the offset stays the same (empty range)
+    // If it's not NULL, the offset advances by the number of entries in that map
+    // NOTE: MapArray always requires i32 offsets, regardless of input list type
+    let mut running_offset = 0i32;
+    let mut offset_buffer = vec![running_offset];
+    let mut non_null_idx = 0;
+    for i in 0..original_len {
+        let is_null = nulls_bitmap.as_ref().is_some_and(|nulls| nulls.is_null(i));
+        if !is_null {
+            let entry_count = keys[non_null_idx].len();
+            // Validate that we won't overflow i32 when converting from potentially i64 offsets
+            let entry_count_i32 = i32::try_from(entry_count).map_err(|_| {
+                datafusion_common::DataFusionError::Execution(format!(
+                    "Map offset overflow: entry count {entry_count} at index {i} exceeds i32::MAX",
+                ))
+            })?;
+            running_offset =
+                running_offset.checked_add(entry_count_i32).ok_or_else(|| {
+                    datafusion_common::DataFusionError::Execution(format!(
+                    "Map offset overflow: cumulative offset exceeds i32::MAX at index {i}",
+                ))
+                })?;
+            non_null_idx += 1;
+        }
+        offset_buffer.push(running_offset);
     }
-    let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
+
+    // concatenate all the arrays
+    // If key_array_vec is empty, it means all maps were NULL (list elements were NULL).
+    // In this case, we need to create empty arrays with the correct data type.
+    let (flattened_keys, flattened_values) = if key_array_vec.is_empty() {
+        // All maps are NULL - create empty arrays
+        // We need to infer the data type from the original keys/values arrays
+        let key_type = get_element_type(keys_data_type)?;
+        let value_type = get_element_type(values_data_type)?;
+
+        (
+            arrow::array::new_empty_array(key_type),
+            arrow::array::new_empty_array(value_type),
+        )
+    } else {
+        let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
+        if flattened_keys.null_count() > 0 {
+            return exec_err!("keys cannot be null");
+        }
+        let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
+        (flattened_keys, flattened_values)
+    };
 
     let fields = vec![
         Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
@@ -393,7 +501,7 @@ fn make_map_array_internal<O: OffsetSizeTrait>(
         .add_child_data(flattened_values.to_data())
         .build()?;
 
-    let map_data = ArrayData::builder(DataType::Map(
+    let mut map_data_builder = ArrayData::builder(DataType::Map(
         Arc::new(Field::new(
             "entries",
             struct_data.data_type().clone(),
@@ -401,9 +509,241 @@ fn make_map_array_internal<O: OffsetSizeTrait>(
         )),
         false,
     ))
-    .len(keys.len())
+    .len(original_len) // Use the original number of rows, not the filtered count
     .add_child_data(struct_data)
-    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()))
-    .build()?;
+    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()));
+
+    // Add the nulls bitmap if present (to preserve NULL map values)
+    if let Some(nulls) = nulls_bitmap {
+        map_data_builder = map_data_builder.nulls(Some(nulls));
+    }
+
+    let map_data = map_data_builder.build()?;
     Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_make_map_with_null_maps() {
+        // Test that NULL map values (entire map is NULL) are correctly handled
+        // This test directly calls make_map_batch with a List containing NULL elements
+        //
+        // Background: On main branch, the code would fail with "map key cannot be null"
+        // because it couldn't distinguish between:
+        // - NULL map (entire map is NULL) - should be allowed
+        // - null key within a map - should be rejected
+
+        // Build keys array: [['a'], NULL, ['b']]
+        // The middle NULL represents an entire NULL map, not a null key
+        let mut key_builder =
+            arrow::array::ListBuilder::new(arrow::array::StringBuilder::new());
+
+        // First map: ['a']
+        key_builder.values().append_value("a");
+        key_builder.append(true);
+
+        // Second map: NULL (entire map is NULL)
+        key_builder.append(false);
+
+        // Third map: ['b']
+        key_builder.values().append_value("b");
+        key_builder.append(true);
+
+        let keys_array = Arc::new(key_builder.finish());
+
+        // Build values array: [[1], [2], [3]]
+        let mut value_builder =
+            arrow::array::ListBuilder::new(arrow::array::Int32Builder::new());
+
+        value_builder.values().append_value(1);
+        value_builder.append(true);
+
+        value_builder.values().append_value(2);
+        value_builder.append(true);
+
+        value_builder.values().append_value(3);
+        value_builder.append(true);
+
+        let values_array = Arc::new(value_builder.finish());
+
+        // Call make_map_batch - should succeed
+        let result = make_map_batch(&[
+            ColumnarValue::Array(keys_array),
+            ColumnarValue::Array(values_array),
+        ]);
+
+        assert!(result.is_ok(), "Should handle NULL maps correctly");
+
+        // Verify the result
+        let map_array = match result.unwrap() {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected Array result"),
+        };
+
+        assert_eq!(map_array.len(), 3, "Should have 3 maps");
+        assert!(!map_array.is_null(0), "First map should not be NULL");
+        assert!(map_array.is_null(1), "Second map should be NULL");
+        assert!(!map_array.is_null(2), "Third map should not be NULL");
+    }
+
+    #[test]
+    fn test_make_map_with_null_key_within_map_should_fail() {
+        // Test that null keys WITHIN a map are properly rejected
+        // This ensures the fix doesn't accidentally allow invalid null keys
+
+        // Build keys array: [['a', NULL, 'b']]
+        // The NULL here is a null key within the map, which is invalid
+        let mut key_builder =
+            arrow::array::ListBuilder::new(arrow::array::StringBuilder::new());
+
+        key_builder.values().append_value("a");
+        key_builder.values().append_null(); // Invalid: null key
+        key_builder.values().append_value("b");
+        key_builder.append(true);
+
+        let keys_array = Arc::new(key_builder.finish());
+
+        // Build values array: [[1, 2, 3]]
+        let mut value_builder =
+            arrow::array::ListBuilder::new(arrow::array::Int32Builder::new());
+
+        value_builder.values().append_value(1);
+        value_builder.values().append_value(2);
+        value_builder.values().append_value(3);
+        value_builder.append(true);
+
+        let values_array = Arc::new(value_builder.finish());
+
+        // Call make_map_batch - should fail
+        let result = make_map_batch(&[
+            ColumnarValue::Array(keys_array),
+            ColumnarValue::Array(values_array),
+        ]);
+
+        assert!(result.is_err(), "Should reject null keys within maps");
+
+        let err_msg = result.unwrap_err().to_string();
+        assert!(
+            err_msg.contains("cannot be null"),
+            "Error should mention null keys, got: {err_msg}"
+        );
+    }
+
+    #[test]
+    fn test_make_map_with_large_list() {
+        // Test that LargeList inputs work correctly with i32 offset conversion
+        // This verifies the fix for the offset buffer type mismatch issue
+
+        // Build keys array as LargeList: [['a', 'b'], ['c']]
+        let mut key_builder =
+            arrow::array::LargeListBuilder::new(arrow::array::StringBuilder::new());
+
+        // First map: ['a', 'b']
+        key_builder.values().append_value("a");
+        key_builder.values().append_value("b");
+        key_builder.append(true);
+
+        // Second map: ['c']
+        key_builder.values().append_value("c");
+        key_builder.append(true);
+
+        let keys_array = Arc::new(key_builder.finish());
+
+        // Build values array as LargeList: [[1, 2], [3]]
+        let mut value_builder =
+            arrow::array::LargeListBuilder::new(arrow::array::Int32Builder::new());
+
+        value_builder.values().append_value(1);
+        value_builder.values().append_value(2);
+        value_builder.append(true);
+
+        value_builder.values().append_value(3);
+        value_builder.append(true);
+
+        let values_array = Arc::new(value_builder.finish());
+
+        // Call make_map_batch - should succeed
+        let result = make_map_batch(&[
+            ColumnarValue::Array(keys_array),
+            ColumnarValue::Array(values_array),
+        ]);
+
+        assert!(
+            result.is_ok(),
+            "Should handle LargeList inputs correctly: {:?}",
+            result.err()
+        );
+
+        // Verify the result
+        let map_array = match result.unwrap() {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected Array result"),
+        };
+
+        assert_eq!(map_array.len(), 2, "Should have 2 maps");
+        assert!(!map_array.is_null(0), "First map should not be NULL");
+        assert!(!map_array.is_null(1), "Second map should not be NULL");
+    }
+
+    #[test]
+    fn test_make_map_with_fixed_size_list() {
+        // Test that FixedSizeList inputs work correctly
+        // This verifies the fix for FixedSizeList support in the data type check
+
+        use arrow::array::FixedSizeListBuilder;
+
+        // Build keys array as FixedSizeList(2): [['a', 'b'], ['c', 'd']]
+        let key_values_builder = arrow::array::StringBuilder::new();
+        let mut key_builder = FixedSizeListBuilder::new(key_values_builder, 2);
+
+        // First map: ['a', 'b']
+        key_builder.values().append_value("a");
+        key_builder.values().append_value("b");
+        key_builder.append(true);
+
+        // Second map: ['c', 'd']
+        key_builder.values().append_value("c");
+        key_builder.values().append_value("d");
+        key_builder.append(true);
+
+        let keys_array = Arc::new(key_builder.finish());
+
+        // Build values array as FixedSizeList(2): [[1, 2], [3, 4]]
+        let value_values_builder = arrow::array::Int32Builder::new();
+        let mut value_builder = FixedSizeListBuilder::new(value_values_builder, 2);
+
+        value_builder.values().append_value(1);
+        value_builder.values().append_value(2);
+        value_builder.append(true);
+
+        value_builder.values().append_value(3);
+        value_builder.values().append_value(4);
+        value_builder.append(true);
+
+        let values_array = Arc::new(value_builder.finish());
+
+        // Call make_map_batch - should succeed
+        let result = make_map_batch(&[
+            ColumnarValue::Array(keys_array),
+            ColumnarValue::Array(values_array),
+        ]);
+
+        assert!(
+            result.is_ok(),
+            "Should handle FixedSizeList inputs correctly: {:?}",
+            result.err()
+        );
+
+        // Verify the result
+        let map_array = match result.unwrap() {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected Array result"),
+        };
+
+        assert_eq!(map_array.len(), 2, "Should have 2 maps");
+        assert!(!map_array.is_null(0), "First map should not be NULL");
+        assert!(!map_array.is_null(1), "Second map should not be NULL");
+    }
+}
diff --git a/datafusion/functions-nested/src/map_entries.rs b/datafusion/functions-nested/src/map_entries.rs
index 7d9d103206dbc..571b5aacafa09 100644
--- a/datafusion/functions-nested/src/map_entries.rs
+++ b/datafusion/functions-nested/src/map_entries.rs
@@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function};
 use arrow::array::{Array, ArrayRef, ListArray};
 use arrow::datatypes::{DataType, Field, Fields};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{cast::as_map_array, exec_err, Result};
+use datafusion_common::{Result, cast::as_map_array, exec_err};
 use datafusion_expr::{
     ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
     TypeSignature, Volatility,
diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs
index 4aab5d7a60d18..3d22d97c45b7b 100644
--- a/datafusion/functions-nested/src/map_extract.rs
+++ b/datafusion/functions-nested/src/map_extract.rs
@@ -19,12 +19,12 @@
 
 use crate::utils::{get_map_entry_field, make_scalar_function};
 use arrow::array::{
-    make_array, Array, ArrayRef, Capacities, ListArray, MapArray, MutableArrayData,
+    Array, ArrayRef, Capacities, ListArray, MapArray, MutableArrayData, make_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{cast::as_map_array, exec_err, Result};
+use datafusion_common::{Result, cast::as_map_array, exec_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs
index 2fc44670d74a2..c603340863299 100644
--- a/datafusion/functions-nested/src/map_keys.rs
+++ b/datafusion/functions-nested/src/map_keys.rs
@@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function};
 use arrow::array::{Array, ArrayRef, ListArray};
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{cast::as_map_array, exec_err, Result};
+use datafusion_common::{Result, cast::as_map_array, exec_err};
 use datafusion_expr::{
     ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
     TypeSignature, Volatility,
diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs
index 6ae8a278063da..4cfb23d3084ad 100644
--- a/datafusion/functions-nested/src/map_values.rs
+++ b/datafusion/functions-nested/src/map_values.rs
@@ -21,7 +21,7 @@ use crate::utils::{get_map_entry_field, make_scalar_function};
 use arrow::array::{Array, ArrayRef, ListArray};
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{cast::as_map_array, exec_err, internal_err, Result};
+use datafusion_common::{Result, cast::as_map_array, exec_err, internal_err};
 use datafusion_expr::{
     ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
     TypeSignature, Volatility,
diff --git a/datafusion/functions-nested/src/min_max.rs b/datafusion/functions-nested/src/min_max.rs
index 117cfbeaa2b2c..e3603b731fd89 100644
--- a/datafusion/functions-nested/src/min_max.rs
+++ b/datafusion/functions-nested/src/min_max.rs
@@ -20,10 +20,10 @@ use crate::utils::make_scalar_function;
 use arrow::array::{ArrayRef, GenericListArray, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{LargeList, List};
+use datafusion_common::Result;
 use datafusion_common::cast::{as_large_list_array, as_list_array};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::Result;
-use datafusion_common::{exec_err, plan_err, ScalarValue};
+use datafusion_common::{ScalarValue, exec_err, plan_err};
 use datafusion_doc::Documentation;
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
@@ -113,14 +113,7 @@ impl ScalarUDFImpl for ArrayMax {
     }
 }
 
-/// array_max SQL function
-///
-/// There is one argument for array_max as the array.
-/// `array_max(array)`
-///
-/// For example:
-/// > array_max(\[1, 3, 2]) -> 3
-pub fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("array_max", args)?;
     match array.data_type() {
         List(_) => array_min_max_helper(as_list_array(array)?, max_batch),
@@ -202,7 +195,7 @@ impl ScalarUDFImpl for ArrayMin {
     }
 }
 
-pub fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array] = take_function_args("array_min", args)?;
     match array.data_type() {
         List(_) => array_min_max_helper(as_list_array(array)?, min_batch),
diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs
index 4fec5e38065b5..afb18a44f48ab 100644
--- a/datafusion/functions-nested/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -18,15 +18,15 @@
 //! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]
 
 use arrow::datatypes::DataType;
-use datafusion_common::{plan_err, utils::list_ndims, DFSchema, Result};
+use datafusion_common::{DFSchema, Result, plan_err, utils::list_ndims};
+use datafusion_expr::AggregateUDF;
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
 #[cfg(feature = "sql")]
 use datafusion_expr::sqlparser::ast::BinaryOperator;
-use datafusion_expr::AggregateUDF;
 use datafusion_expr::{
-    planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
     Expr, ExprSchemable, GetFieldAccess,
+    planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
 };
 #[cfg(not(feature = "sql"))]
 use datafusion_expr_common::operator::Operator as BinaryOperator;
@@ -148,6 +148,9 @@ impl ExprPlanner for FieldAccessPlanner {
 
         match field_access {
             // expr["field"] => get_field(expr, "field")
+            // Nested accesses like expr["a"]["b"] create nested get_field calls,
+            // which are then merged by the SimplifyExpressions optimizer pass via
+            // the GetFieldFunc::simplify() method.
             GetFieldAccess::NamedStructField { name } => {
                 Ok(PlannerResult::Planned(get_field(expr, name)))
             }
diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs
index dae946def8f53..d085fa29cc7e1 100644
--- a/datafusion/functions-nested/src/position.rs
+++ b/datafusion/functions-nested/src/position.rs
@@ -31,13 +31,15 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    types::UInt64Type, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait,
-    UInt64Array,
+    Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, UInt64Array,
+    types::UInt64Type,
 };
 use datafusion_common::cast::{
     as_generic_list_array, as_int64_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::{exec_err, internal_err, utils::take_function_args, Result};
+use datafusion_common::{
+    Result, assert_or_internal_err, exec_err, utils::take_function_args,
+};
 use itertools::Itertools;
 
 use crate::utils::{compare_element_to_list, make_scalar_function};
@@ -139,8 +141,7 @@ impl ScalarUDFImpl for ArrayPosition {
     }
 }
 
-/// Array_position SQL function
-pub fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() < 2 || args.len() > 3 {
         return exec_err!("array_position expects two or three arguments");
     }
@@ -150,6 +151,7 @@ pub fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
         array_type => exec_err!("array_position does not support type '{array_type}'."),
     }
 }
+
 fn general_position_dispatch<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_generic_list_array::<O>(&args[0])?;
     let element_array = &args[1];
@@ -172,22 +174,20 @@ fn general_position_dispatch<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<Ar
 
     // if `start_from` index is out of bounds, return error
     for (arr, &from) in list_array.iter().zip(arr_from.iter()) {
-        if let Some(arr) = arr {
-            if from < 0 || from as usize > arr.len() {
-                return internal_err!("start_from index out of bounds");
-            }
-        } else {
-            // We will get null if we got null in the array, so we don't need to check
-        }
+        // If `arr` is `None`: we will get null if we got null in the array, so we don't need to check
+        assert_or_internal_err!(
+            arr.is_none_or(|arr| from >= 0 && (from as usize) <= arr.len()),
+            "start_from index out of bounds"
+        );
     }
 
-    generic_position::<O>(list_array, element_array, arr_from)
+    generic_position::<O>(list_array, element_array, &arr_from)
 }
 
 fn generic_position<OffsetSize: OffsetSizeTrait>(
     list_array: &GenericListArray<OffsetSize>,
     element_array: &ArrayRef,
-    arr_from: Vec<i64>, // 0-indexed
+    arr_from: &[i64], // 0-indexed
 ) -> Result<ArrayRef> {
     let mut data = Vec::with_capacity(list_array.len());
 
@@ -292,8 +292,7 @@ impl ScalarUDFImpl for ArrayPositions {
     }
 }
 
-/// Array_positions SQL function
-pub fn array_positions_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_positions_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, element] = take_function_args("array_positions", args)?;
 
     match &array.data_type() {
diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs
index e570ecf97420f..aae641ceeb358 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -23,28 +23,28 @@ use arrow::datatypes::TimeUnit;
 use arrow::datatypes::{DataType, Field, IntervalUnit::MonthDayNano};
 use arrow::{
     array::{
+        Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullBufferBuilder,
         builder::{Date32Builder, TimestampNanosecondBuilder},
         temporal_conversions::as_datetime_with_timezone,
         timezone::Tz,
         types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType},
-        Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullBufferBuilder,
     },
     compute::cast,
 };
 use datafusion_common::internal_err;
 use datafusion_common::{
+    Result, exec_datafusion_err, exec_err, not_impl_datafusion_err,
+    utils::take_function_args,
+};
+use datafusion_common::{
+    ScalarValue,
     cast::{
         as_date32_array, as_int64_array, as_interval_mdn_array,
         as_timestamp_nanosecond_array,
     },
     types::{
-        logical_date, logical_int64, logical_interval_mdn, logical_string, NativeType,
+        NativeType, logical_date, logical_int64, logical_interval_mdn, logical_string,
     },
-    ScalarValue,
-};
-use datafusion_common::{
-    exec_datafusion_err, exec_err, not_impl_datafusion_err, utils::take_function_args,
-    Result,
 };
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
@@ -573,7 +573,7 @@ fn gen_range_iter(
 }
 
 fn parse_tz(tz: &Option<&str>) -> Result<Tz> {
-    let tz = tz.as_ref().map_or_else(|| "+00", |s| s);
+    let tz = tz.unwrap_or_else(|| "+00");
 
     Tz::from_str(tz)
         .map_err(|op| exec_datafusion_err!("failed to parse timezone {tz}: {:?}", op))
diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs
index d330606cdd894..41c06cb9c4cbf 100644
--- a/datafusion/functions-nested/src/remove.rs
+++ b/datafusion/functions-nested/src/remove.rs
@@ -20,14 +20,14 @@
 use crate::utils;
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    cast::AsArray, new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, BooleanArray, GenericListArray, OffsetSizeTrait, cast::AsArray,
+    new_empty_array,
 };
 use arrow::buffer::OffsetBuffer;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, FieldRef};
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, internal_err, utils::take_function_args};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -99,8 +99,15 @@ impl ScalarUDFImpl for ArrayRemove {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        Ok(Arc::clone(&args.arg_fields[0]))
     }
 
     fn invoke_with_args(
@@ -187,8 +194,15 @@ impl ScalarUDFImpl for ArrayRemoveN {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        Ok(Arc::clone(&args.arg_fields[0]))
     }
 
     fn invoke_with_args(
@@ -264,8 +278,15 @@ impl ScalarUDFImpl for ArrayRemoveAll {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        Ok(Arc::clone(&args.arg_fields[0]))
     }
 
     fn invoke_with_args(
@@ -284,34 +305,31 @@ impl ScalarUDFImpl for ArrayRemoveAll {
     }
 }
 
-/// Array_remove SQL function
-pub fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, element] = take_function_args("array_remove", args)?;
 
     let arr_n = vec![1; array.len()];
-    array_remove_internal(array, element, arr_n)
+    array_remove_internal(array, element, &arr_n)
 }
 
-/// Array_remove_n SQL function
-pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, element, max] = take_function_args("array_remove_n", args)?;
 
     let arr_n = as_int64_array(max)?.values().to_vec();
-    array_remove_internal(array, element, arr_n)
+    array_remove_internal(array, element, &arr_n)
 }
 
-/// Array_remove_all SQL function
-pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, element] = take_function_args("array_remove_all", args)?;
 
     let arr_n = vec![i64::MAX; array.len()];
-    array_remove_internal(array, element, arr_n)
+    array_remove_internal(array, element, &arr_n)
 }
 
 fn array_remove_internal(
     array: &ArrayRef,
     element_array: &ArrayRef,
-    arr_n: Vec<i64>,
+    arr_n: &[i64],
 ) -> Result<ArrayRef> {
     match array.data_type() {
         DataType::List(_) => {
@@ -348,9 +366,18 @@ fn array_remove_internal(
 fn general_remove<OffsetSize: OffsetSizeTrait>(
     list_array: &GenericListArray<OffsetSize>,
     element_array: &ArrayRef,
-    arr_n: Vec<i64>,
+    arr_n: &[i64],
 ) -> Result<ArrayRef> {
-    let data_type = list_array.value_type();
+    let list_field = match list_array.data_type() {
+        DataType::List(field) | DataType::LargeList(field) => field,
+        _ => {
+            return exec_err!(
+                "Expected List or LargeList data type, got {:?}",
+                list_array.data_type()
+            );
+        }
+    };
+    let data_type = list_field.data_type();
     let mut new_values = vec![];
     // Build up the offsets for the final output array
     let mut offsets = Vec::<OffsetSize>::with_capacity(arr_n.len() + 1);
@@ -406,16 +433,442 @@ fn general_remove<OffsetSize: OffsetSizeTrait>(
     }
 
     let values = if new_values.is_empty() {
-        new_empty_array(&data_type)
+        new_empty_array(data_type)
     } else {
         let new_values = new_values.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
         arrow::compute::concat(&new_values)?
     };
 
     Ok(Arc::new(GenericListArray::<OffsetSize>::try_new(
-        Arc::new(Field::new_list_field(data_type, true)),
+        Arc::clone(list_field),
         OffsetBuffer::new(offsets.into()),
         values,
         list_array.nulls().cloned(),
     )?))
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::remove::{ArrayRemove, ArrayRemoveAll, ArrayRemoveN};
+    use arrow::array::{
+        Array, ArrayRef, AsArray, GenericListArray, ListArray, OffsetSizeTrait,
+    };
+    use arrow::datatypes::{DataType, Field, Int32Type};
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl};
+    use datafusion_expr_common::columnar_value::ColumnarValue;
+    use std::ops::Deref;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_array_remove_nullability() {
+        for nullability in [true, false] {
+            for item_nullability in [true, false] {
+                let input_field = Arc::new(Field::new(
+                    "num",
+                    DataType::new_list(DataType::Int32, item_nullability),
+                    nullability,
+                ));
+                let args_fields = vec![
+                    Arc::clone(&input_field),
+                    Arc::new(Field::new("a", DataType::Int32, false)),
+                ];
+                let scalar_args = vec![None, Some(&ScalarValue::Int32(Some(1)))];
+
+                let result = ArrayRemove::new()
+                    .return_field_from_args(ReturnFieldArgs {
+                        arg_fields: &args_fields,
+                        scalar_arguments: &scalar_args,
+                    })
+                    .unwrap();
+
+                assert_eq!(result, input_field);
+            }
+        }
+    }
+
+    #[test]
+    fn test_array_remove_n_nullability() {
+        for nullability in [true, false] {
+            for item_nullability in [true, false] {
+                let input_field = Arc::new(Field::new(
+                    "num",
+                    DataType::new_list(DataType::Int32, item_nullability),
+                    nullability,
+                ));
+                let args_fields = vec![
+                    Arc::clone(&input_field),
+                    Arc::new(Field::new("a", DataType::Int32, false)),
+                    Arc::new(Field::new("b", DataType::Int64, false)),
+                ];
+                let scalar_args = vec![
+                    None,
+                    Some(&ScalarValue::Int32(Some(1))),
+                    Some(&ScalarValue::Int64(Some(1))),
+                ];
+
+                let result = ArrayRemoveN::new()
+                    .return_field_from_args(ReturnFieldArgs {
+                        arg_fields: &args_fields,
+                        scalar_arguments: &scalar_args,
+                    })
+                    .unwrap();
+
+                assert_eq!(result, input_field);
+            }
+        }
+    }
+
+    #[test]
+    fn test_array_remove_all_nullability() {
+        for nullability in [true, false] {
+            for item_nullability in [true, false] {
+                let input_field = Arc::new(Field::new(
+                    "num",
+                    DataType::new_list(DataType::Int32, item_nullability),
+                    nullability,
+                ));
+                let result = ArrayRemoveAll::new()
+                    .return_field_from_args(ReturnFieldArgs {
+                        arg_fields: &[Arc::clone(&input_field)],
+                        scalar_arguments: &[None],
+                    })
+                    .unwrap();
+
+                assert_eq!(result, input_field);
+            }
+        }
+    }
+
+    fn ensure_field_nullability<O: OffsetSizeTrait>(
+        field_nullable: bool,
+        list: GenericListArray<O>,
+    ) -> GenericListArray<O> {
+        let (field, offsets, values, nulls) = list.into_parts();
+
+        if field.is_nullable() == field_nullable {
+            return GenericListArray::new(field, offsets, values, nulls);
+        }
+        if !field_nullable {
+            assert_eq!(nulls, None);
+        }
+
+        let field = Arc::new(field.deref().clone().with_nullable(field_nullable));
+
+        GenericListArray::new(field, offsets, values, nulls)
+    }
+
+    #[test]
+    fn test_array_remove_non_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 2, 2, 3, 2, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 2, 55, 63, 2]).iter().copied().map(Some)),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 2, 3, 2, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 55, 63, 2]).iter().copied().map(Some)),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove(input_list, expected_list, element_to_remove);
+    }
+
+    #[test]
+    fn test_array_remove_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![
+                    Some(1),
+                    Some(2),
+                    Some(2),
+                    Some(3),
+                    None,
+                    Some(1),
+                    Some(4),
+                ]),
+                Some(vec![Some(42), Some(2), None, Some(63), Some(2)]),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![Some(1), Some(2), Some(3), None, Some(1), Some(4)]),
+                Some(vec![Some(42), None, Some(63), Some(2)]),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove(input_list, expected_list, element_to_remove);
+    }
+
+    fn assert_array_remove(
+        input_list: ArrayRef,
+        expected_list: GenericListArray<i32>,
+        element_to_remove: ScalarValue,
+    ) {
+        assert_eq!(input_list.data_type(), expected_list.data_type());
+        assert_eq!(expected_list.value_type(), element_to_remove.data_type());
+        let input_list_len = input_list.len();
+        let input_list_data_type = input_list.data_type().clone();
+
+        let udf = ArrayRemove::new();
+        let args_fields = vec![
+            Arc::new(Field::new("num", input_list.data_type().clone(), false)),
+            Arc::new(Field::new(
+                "el",
+                element_to_remove.data_type(),
+                element_to_remove.is_null(),
+            )),
+        ];
+        let scalar_args = vec![None, Some(&element_to_remove)];
+
+        let return_field = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &args_fields,
+                scalar_arguments: &scalar_args,
+            })
+            .unwrap();
+
+        let result = udf
+            .invoke_with_args(ScalarFunctionArgs {
+                args: vec![
+                    ColumnarValue::Array(input_list),
+                    ColumnarValue::Scalar(element_to_remove),
+                ],
+                arg_fields: args_fields,
+                number_rows: input_list_len,
+                return_field,
+                config_options: Arc::new(Default::default()),
+            })
+            .unwrap();
+
+        assert_eq!(result.data_type(), input_list_data_type);
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_list = array.as_list::<i32>();
+                assert_eq!(result_list, &expected_list);
+            }
+            _ => panic!("Expected ColumnarValue::Array"),
+        }
+    }
+
+    #[test]
+    fn test_array_remove_n_non_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 2, 2, 3, 2, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 2, 55, 63, 2]).iter().copied().map(Some)),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 3, 2, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 55, 63]).iter().copied().map(Some)),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove_n(input_list, expected_list, element_to_remove, 2);
+    }
+
+    #[test]
+    fn test_array_remove_n_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![
+                    Some(1),
+                    Some(2),
+                    Some(2),
+                    Some(3),
+                    None,
+                    Some(1),
+                    Some(4),
+                ]),
+                Some(vec![Some(42), Some(2), None, Some(63), Some(2)]),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![Some(1), Some(3), None, Some(1), Some(4)]),
+                Some(vec![Some(42), None, Some(63)]),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove_n(input_list, expected_list, element_to_remove, 2);
+    }
+
+    fn assert_array_remove_n(
+        input_list: ArrayRef,
+        expected_list: GenericListArray<i32>,
+        element_to_remove: ScalarValue,
+        n: i64,
+    ) {
+        assert_eq!(input_list.data_type(), expected_list.data_type());
+        assert_eq!(expected_list.value_type(), element_to_remove.data_type());
+        let input_list_len = input_list.len();
+        let input_list_data_type = input_list.data_type().clone();
+
+        let count_scalar = ScalarValue::Int64(Some(n));
+
+        let udf = ArrayRemoveN::new();
+        let args_fields = vec![
+            Arc::new(Field::new("num", input_list.data_type().clone(), false)),
+            Arc::new(Field::new(
+                "el",
+                element_to_remove.data_type(),
+                element_to_remove.is_null(),
+            )),
+            Arc::new(Field::new("count", DataType::Int64, false)),
+        ];
+        let scalar_args = vec![None, Some(&element_to_remove), Some(&count_scalar)];
+
+        let return_field = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &args_fields,
+                scalar_arguments: &scalar_args,
+            })
+            .unwrap();
+
+        let result = udf
+            .invoke_with_args(ScalarFunctionArgs {
+                args: vec![
+                    ColumnarValue::Array(input_list),
+                    ColumnarValue::Scalar(element_to_remove),
+                    ColumnarValue::Scalar(count_scalar),
+                ],
+                arg_fields: args_fields,
+                number_rows: input_list_len,
+                return_field,
+                config_options: Arc::new(Default::default()),
+            })
+            .unwrap();
+
+        assert_eq!(result.data_type(), input_list_data_type);
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_list = array.as_list::<i32>();
+                assert_eq!(result_list, &expected_list);
+            }
+            _ => panic!("Expected ColumnarValue::Array"),
+        }
+    }
+
+    #[test]
+    fn test_array_remove_all_non_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 2, 2, 3, 2, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 2, 55, 63, 2]).iter().copied().map(Some)),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            false,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(([1, 3, 1, 4]).iter().copied().map(Some)),
+                Some(([42, 55, 63]).iter().copied().map(Some)),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove_all(input_list, expected_list, element_to_remove);
+    }
+
+    #[test]
+    fn test_array_remove_all_nullable() {
+        let input_list = Arc::new(ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![
+                    Some(1),
+                    Some(2),
+                    Some(2),
+                    Some(3),
+                    None,
+                    Some(1),
+                    Some(4),
+                ]),
+                Some(vec![Some(42), Some(2), None, Some(63), Some(2)]),
+            ]),
+        ));
+        let expected_list = ensure_field_nullability(
+            true,
+            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![Some(1), Some(3), None, Some(1), Some(4)]),
+                Some(vec![Some(42), None, Some(63)]),
+            ]),
+        );
+
+        let element_to_remove = ScalarValue::Int32(Some(2));
+
+        assert_array_remove_all(input_list, expected_list, element_to_remove);
+    }
+
+    fn assert_array_remove_all(
+        input_list: ArrayRef,
+        expected_list: GenericListArray<i32>,
+        element_to_remove: ScalarValue,
+    ) {
+        assert_eq!(input_list.data_type(), expected_list.data_type());
+        assert_eq!(expected_list.value_type(), element_to_remove.data_type());
+        let input_list_len = input_list.len();
+        let input_list_data_type = input_list.data_type().clone();
+
+        let udf = ArrayRemoveAll::new();
+        let args_fields = vec![
+            Arc::new(Field::new("num", input_list.data_type().clone(), false)),
+            Arc::new(Field::new(
+                "el",
+                element_to_remove.data_type(),
+                element_to_remove.is_null(),
+            )),
+        ];
+        let scalar_args = vec![None, Some(&element_to_remove)];
+
+        let return_field = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &args_fields,
+                scalar_arguments: &scalar_args,
+            })
+            .unwrap();
+
+        let result = udf
+            .invoke_with_args(ScalarFunctionArgs {
+                args: vec![
+                    ColumnarValue::Array(input_list),
+                    ColumnarValue::Scalar(element_to_remove),
+                ],
+                arg_fields: args_fields,
+                number_rows: input_list_len,
+                return_field,
+                config_options: Arc::new(Default::default()),
+            })
+            .unwrap();
+
+        assert_eq!(result.data_type(), input_list_data_type);
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_list = array.as_list::<i32>();
+                assert_eq!(result_list, &expected_list);
+            }
+            _ => panic!("Expected ColumnarValue::Array"),
+        }
+    }
+}
diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs
index ed66b9e396762..a121b5f03162e 100644
--- a/datafusion/functions-nested/src/repeat.rs
+++ b/datafusion/functions-nested/src/repeat.rs
@@ -19,8 +19,8 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    new_null_array, Array, ArrayRef, Capacities, GenericListArray, ListArray,
-    MutableArrayData, OffsetSizeTrait, UInt64Array,
+    Array, ArrayRef, Capacities, GenericListArray, ListArray, MutableArrayData,
+    OffsetSizeTrait, UInt64Array, new_null_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::compute;
@@ -31,7 +31,7 @@ use arrow::datatypes::{
     Field,
 };
 use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array};
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -148,8 +148,7 @@ impl ScalarUDFImpl for ArrayRepeat {
     }
 }
 
-/// Array_repeat SQL function
-pub fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let element = &args[0];
     let count_array = &args[1];
 
diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs
index 4314d41419bcc..03524924e3c3d 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -18,15 +18,15 @@
 //! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions.
 
 use arrow::array::{
-    new_null_array, Array, ArrayRef, AsArray, Capacities, GenericListArray,
-    MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
+    Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData,
+    NullBufferBuilder, OffsetSizeTrait, new_null_array,
 };
 use arrow::datatypes::{DataType, Field};
 
 use arrow::buffer::OffsetBuffer;
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -328,7 +328,7 @@ fn general_replace<O: OffsetSizeTrait>(
     list_array: &GenericListArray<O>,
     from_array: &ArrayRef,
     to_array: &ArrayRef,
-    arr_n: Vec<i64>,
+    arr_n: &[i64],
 ) -> Result<ArrayRef> {
     // Build up the offsets for the final output array
     let mut offsets: Vec<O> = vec![O::usize_as(0)];
@@ -418,7 +418,7 @@ fn general_replace<O: OffsetSizeTrait>(
     )?))
 }
 
-pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, from, to] = take_function_args("array_replace", args)?;
 
     // replace at most one occurrence for each element
@@ -426,18 +426,18 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     match array.data_type() {
         DataType::List(_) => {
             let list_array = array.as_list::<i32>();
-            general_replace::<i32>(list_array, from, to, arr_n)
+            general_replace::<i32>(list_array, from, to, &arr_n)
         }
         DataType::LargeList(_) => {
             let list_array = array.as_list::<i64>();
-            general_replace::<i64>(list_array, from, to, arr_n)
+            general_replace::<i64>(list_array, from, to, &arr_n)
         }
         DataType::Null => Ok(new_null_array(array.data_type(), 1)),
         array_type => exec_err!("array_replace does not support type '{array_type}'."),
     }
 }
 
-pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, from, to, max] = take_function_args("array_replace_n", args)?;
 
     // replace the specified number of occurrences
@@ -445,11 +445,11 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     match array.data_type() {
         DataType::List(_) => {
             let list_array = array.as_list::<i32>();
-            general_replace::<i32>(list_array, from, to, arr_n)
+            general_replace::<i32>(list_array, from, to, &arr_n)
         }
         DataType::LargeList(_) => {
             let list_array = array.as_list::<i64>();
-            general_replace::<i64>(list_array, from, to, arr_n)
+            general_replace::<i64>(list_array, from, to, &arr_n)
         }
         DataType::Null => Ok(new_null_array(array.data_type(), 1)),
         array_type => {
@@ -458,7 +458,7 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array, from, to] = take_function_args("array_replace_all", args)?;
 
     // replace all occurrences (up to "i64::MAX")
@@ -466,11 +466,11 @@ pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     match array.data_type() {
         DataType::List(_) => {
             let list_array = array.as_list::<i32>();
-            general_replace::<i32>(list_array, from, to, arr_n)
+            general_replace::<i32>(list_array, from, to, &arr_n)
         }
         DataType::LargeList(_) => {
             let list_array = array.as_list::<i64>();
-            general_replace::<i64>(list_array, from, to, arr_n)
+            general_replace::<i64>(list_array, from, to, &arr_n)
         }
         DataType::Null => Ok(new_null_array(array.data_type(), 1)),
         array_type => {
diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs
index 09f67a75fd56a..486163df9754d 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -19,8 +19,8 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    new_null_array, Array, ArrayRef, Capacities, GenericListArray, Int64Array,
-    MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
+    Array, ArrayRef, Capacities, GenericListArray, Int64Array, MutableArrayData,
+    NullBufferBuilder, OffsetSizeTrait, new_null_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::DataType;
@@ -31,7 +31,7 @@ use arrow::datatypes::{
 };
 use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -152,8 +152,7 @@ impl ScalarUDFImpl for ArrayResize {
     }
 }
 
-/// array_resize SQL function
-pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
     if arg.len() < 2 || arg.len() > 3 {
         return exec_err!("array_resize needs two or three arguments");
     }
@@ -169,7 +168,7 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
                 return exec_err!(
                     "array_resize does not support type '{:?}'.",
                     array.data_type()
-                )
+                );
             }
         };
         return Ok(new_null_array(&return_type, array.len()));
diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs
index 635f23967a198..114f9a0056ab5 100644
--- a/datafusion/functions-nested/src/reverse.rs
+++ b/datafusion/functions-nested/src/reverse.rs
@@ -19,8 +19,8 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
-    GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
+    Array, ArrayRef, FixedSizeListArray, GenericListArray, GenericListViewArray,
+    OffsetSizeTrait, UInt32Array, UInt64Array,
 };
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::compute::take;
@@ -32,7 +32,7 @@ use datafusion_common::cast::{
     as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
     as_list_array, as_list_view_array,
 };
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -155,11 +155,8 @@ fn general_array_reverse<O: OffsetSizeTrait>(
     field: &FieldRef,
 ) -> Result<ArrayRef> {
     let values = array.values();
-    let original_data = values.to_data();
-    let capacity = Capacities::Array(original_data.len());
     let mut offsets = vec![O::usize_as(0)];
-    let mut mutable =
-        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let mut indices: Vec<O> = Vec::with_capacity(values.len());
 
     for (row_index, (&start, &end)) in array.offsets().iter().tuple_windows().enumerate()
     {
@@ -171,18 +168,34 @@ fn general_array_reverse<O: OffsetSizeTrait>(
 
         let mut index = end - O::one();
         while index >= start {
-            mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
+            indices.push(index);
             index = index - O::one();
         }
         let size = end - start;
         offsets.push(offsets[row_index] + size);
     }
 
-    let data = mutable.freeze();
+    // Materialize values from underlying array with take
+    let indices_array: ArrayRef = if O::IS_LARGE {
+        Arc::new(UInt64Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u64)
+                .collect::<Vec<_>>(),
+        ))
+    } else {
+        Arc::new(UInt32Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u32)
+                .collect::<Vec<_>>(),
+        ))
+    };
+    let values = take(&values, &indices_array, None)?;
     Ok(Arc::new(GenericListArray::<O>::try_new(
         Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
-        arrow::array::make_array(data),
+        values,
         array.nulls().cloned(),
     )?))
 }
@@ -231,7 +244,7 @@ fn list_view_reverse<O: OffsetSizeTrait>(
 
     // Materialize values from underlying array with take
     let indices_array: ArrayRef = if O::IS_LARGE {
-        Arc::new(arrow::array::UInt64Array::from(
+        Arc::new(UInt64Array::from(
             indices
                 .iter()
                 .map(|i| i.as_usize() as u64)
@@ -245,13 +258,12 @@ fn list_view_reverse<O: OffsetSizeTrait>(
                 .collect::<Vec<_>>(),
         ))
     };
-    let values_reversed = take(&values, &indices_array, None)?;
-
+    let values = take(&values, &indices_array, None)?;
     Ok(Arc::new(GenericListViewArray::<O>::try_new(
         Arc::clone(field),
         ScalarBuffer::from(new_offsets),
         ScalarBuffer::from(new_sizes),
-        values_reversed,
+        values,
         array.nulls().cloned(),
     )?))
 }
@@ -260,42 +272,34 @@ fn fixed_size_array_reverse(
     array: &FixedSizeListArray,
     field: &FieldRef,
 ) -> Result<ArrayRef> {
-    let values = array.values();
-    let original_data = values.to_data();
-    let capacity = Capacities::Array(original_data.len());
-    let mut mutable =
-        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
-    let value_length = array.value_length() as usize;
+    let values: &Arc<dyn Array> = array.values();
 
-    for row_index in 0..array.len() {
-        // skip the null value
-        if array.is_null(row_index) {
-            mutable.extend(0, 0, value_length);
-            continue;
-        }
-        let start = row_index * value_length;
-        let end = start + value_length;
-        for idx in (start..end).rev() {
-            mutable.extend(0, idx, idx + 1);
-        }
+    // Since each fixed size list in the physical array is the same size and we keep the order
+    // of the fixed size lists, we can reverse the indices for each fixed size list.
+    let mut indices: Vec<u64> = (0..values.len() as u64).collect();
+    for chunk in indices.chunks_mut(array.value_length() as usize) {
+        chunk.reverse();
     }
 
-    let data = mutable.freeze();
+    // Materialize values from underlying array with take
+    let indices_array: ArrayRef = Arc::new(UInt64Array::from(indices));
+    let values = take(&values, &indices_array, None)?;
+
     Ok(Arc::new(FixedSizeListArray::try_new(
         Arc::clone(field),
         array.value_length(),
-        arrow::array::make_array(data),
+        values,
         array.nulls().cloned(),
     )?))
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::reverse::list_view_reverse;
+    use crate::reverse::{fixed_size_array_reverse, list_view_reverse};
     use arrow::{
         array::{
-            AsArray, GenericListViewArray, Int32Array, LargeListViewArray, ListViewArray,
-            OffsetSizeTrait,
+            AsArray, FixedSizeListArray, GenericListViewArray, Int32Array,
+            LargeListViewArray, ListViewArray, OffsetSizeTrait,
         },
         buffer::{NullBuffer, ScalarBuffer},
         datatypes::{DataType, Field, Int32Type},
@@ -312,6 +316,13 @@ mod tests {
             .collect()
     }
 
+    fn fixed_size_list_values(array: &FixedSizeListArray) -> Vec<Option<Vec<i32>>> {
+        array
+            .iter()
+            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
+            .collect()
+    }
+
     #[test]
     fn test_reverse_list_view() -> Result<()> {
         let field = Arc::new(Field::new("a", DataType::Int32, false));
@@ -450,4 +461,40 @@ mod tests {
         assert_eq!(expected, reversed);
         Ok(())
     }
+
+    #[test]
+    fn test_reverse_fixed_size_list() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
+        let result = fixed_size_array_reverse(
+            &FixedSizeListArray::new(
+                field,
+                3,
+                values,
+                Some(NullBuffer::from(vec![true, false, true])),
+            ),
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = fixed_size_list_values(result.as_fixed_size_list());
+        let expected = vec![Some(vec![3, 2, 1]), None, Some(vec![9, 8, 7])];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_fixed_size_list_empty() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let empty_array: Vec<i32> = vec![];
+        let values = Arc::new(Int32Array::from(empty_array));
+        let nulls = None;
+        let fixed_size_list = FixedSizeListArray::new(field, 3, values, nulls);
+        let result = fixed_size_array_reverse(
+            &fixed_size_list,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = fixed_size_list_values(result.as_fixed_size_list());
+        let expected: Vec<Option<Vec<i32>>> = vec![];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs
index 53642bf1622b0..a8dd857947b12 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -19,8 +19,8 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    new_null_array, Array, ArrayRef, GenericListArray, LargeListArray, ListArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, GenericListArray, LargeListArray, ListArray, OffsetSizeTrait,
+    new_null_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::compute;
@@ -29,7 +29,9 @@ use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow::row::{RowConverter, SortField};
 use datafusion_common::cast::{as_large_list_array, as_list_array};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{exec_err, internal_err, utils::take_function_args, Result};
+use datafusion_common::{
+    Result, assert_eq_or_internal_err, exec_err, internal_err, utils::take_function_args,
+};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -324,7 +326,7 @@ fn array_distinct_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 enum SetOp {
     Union,
     Intersect,
@@ -353,9 +355,11 @@ fn generic_set_lists<OffsetSize: OffsetSizeTrait>(
         return general_array_distinct::<OffsetSize>(l, &field);
     }
 
-    if l.value_type() != r.value_type() {
-        return internal_err!("{set_op:?} is not implemented for '{l:?}' and '{r:?}'");
-    }
+    assert_eq_or_internal_err!(
+        l.value_type(),
+        r.value_type(),
+        "{set_op:?} is not implemented for '{l:?}' and '{r:?}'"
+    );
 
     let mut offsets = vec![OffsetSize::usize_as(0)];
     let mut new_arrays = vec![];
@@ -496,13 +500,11 @@ fn general_set_op(
     }
 }
 
-/// Array_union SQL function
 fn array_union_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array1, array2] = take_function_args("array_union", args)?;
     general_set_op(array1, array2, SetOp::Union)
 }
 
-/// array_intersect SQL function
 fn array_intersect_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let [array1, array2] = take_function_args("array_intersect", args)?;
     general_set_op(array1, array2, SetOp::Intersect)
@@ -536,7 +538,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
         let array = match arrays.first() {
             Some(array) => Arc::clone(array),
             None => {
-                return internal_err!("array_distinct: failed to get array from rows")
+                return internal_err!("array_distinct: failed to get array from rows");
             }
         };
         new_arrays.push(array);
@@ -565,14 +567,14 @@ mod tests {
         buffer::OffsetBuffer,
         datatypes::{DataType, Field},
     };
-    use datafusion_common::{config::ConfigOptions, DataFusionError};
+    use datafusion_common::{DataFusionError, config::ConfigOptions};
     use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 
     use crate::set_ops::array_distinct_udf;
 
     #[test]
-    fn test_array_distinct_inner_nullability_result_type_match_return_type(
-    ) -> Result<(), DataFusionError> {
+    fn test_array_distinct_inner_nullability_result_type_match_return_type()
+    -> Result<(), DataFusionError> {
         let udf = array_distinct_udf();
 
         for inner_nullable in [true, false] {
diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs
index 4a7aa31c755b7..ba2da0f760eee 100644
--- a/datafusion/functions-nested/src/sort.rs
+++ b/datafusion/functions-nested/src/sort.rs
@@ -19,7 +19,7 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    new_null_array, Array, ArrayRef, GenericListArray, NullBufferBuilder, OffsetSizeTrait,
+    Array, ArrayRef, GenericListArray, NullBufferBuilder, OffsetSizeTrait, new_null_array,
 };
 use arrow::buffer::OffsetBuffer;
 use arrow::compute::SortColumn;
@@ -27,7 +27,7 @@ use arrow::datatypes::{DataType, FieldRef};
 use arrow::{compute, compute::SortOptions};
 use datafusion_common::cast::{as_large_list_array, as_list_array, as_string_array};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{exec_err, plan_err, Result};
+use datafusion_common::{Result, exec_err, plan_err};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
     ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -164,8 +164,7 @@ impl ScalarUDFImpl for ArraySort {
     }
 }
 
-/// Array_sort SQL function
-pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.is_empty() || args.len() > 3 {
         return exec_err!("array_sort expects one to three arguments");
     }
@@ -218,8 +217,7 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-/// Array_sort SQL function
-pub fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
+fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
     list_array: &GenericListArray<OffsetSize>,
     field: &FieldRef,
     sort_options: Option<SortOptions>,
diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs
index 61caa3ac70764..1c8d58fca80d0 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -19,22 +19,22 @@
 
 use arrow::array::{
     Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericListArray,
-    Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, ListBuilder,
-    OffsetSizeTrait, StringArray, StringBuilder, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
+    Int8Array, Int16Array, Int32Array, Int64Array, LargeStringArray, ListBuilder,
+    OffsetSizeTrait, StringArray, StringBuilder, UInt8Array, UInt16Array, UInt32Array,
+    UInt64Array,
 };
 use arrow::datatypes::{DataType, Field};
 
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, not_impl_err};
 
 use std::any::Any;
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
+    GenericStringArray, StringArrayType, StringViewArray,
     builder::{ArrayBuilder, LargeStringBuilder, StringViewBuilder},
     cast::AsArray,
-    GenericStringArray, StringArrayType, StringViewArray,
 };
 use arrow::compute::cast;
 use arrow::datatypes::DataType::{
@@ -329,8 +329,7 @@ impl ScalarUDFImpl for StringToArray {
     }
 }
 
-/// Array_to_string SQL function
-pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() < 2 || args.len() > 3 {
         return exec_err!("array_to_string expects two or three arguments");
     }
@@ -341,7 +340,11 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
         Utf8 => args[1].as_string::<i32>().iter().collect(),
         Utf8View => args[1].as_string_view().iter().collect(),
         LargeUtf8 => args[1].as_string::<i64>().iter().collect(),
-        other => return exec_err!("unsupported type for second argument to array_to_string function as {other:?}")
+        other => {
+            return exec_err!(
+                "unsupported type for second argument to array_to_string function as {other:?}"
+            );
+        }
     };
 
     let mut null_string = String::from("");
@@ -351,20 +354,24 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             Utf8 => args[2].as_string::<i32>().value(0).to_string(),
             Utf8View => args[2].as_string_view().value(0).to_string(),
             LargeUtf8 => args[2].as_string::<i64>().value(0).to_string(),
-            other => return exec_err!("unsupported type for second argument to array_to_string function as {other:?}")
+            other => {
+                return exec_err!(
+                    "unsupported type for second argument to array_to_string function as {other:?}"
+                );
+            }
         };
         with_null_string = true;
     }
 
     /// Creates a single string from single element of a ListArray (which is
     /// itself another Array)
-    fn compute_array_to_string(
-        arg: &mut String,
-        arr: ArrayRef,
+    fn compute_array_to_string<'a>(
+        arg: &'a mut String,
+        arr: &ArrayRef,
         delimiter: String,
         null_string: String,
         with_null_string: bool,
-    ) -> Result<&mut String> {
+    ) -> Result<&'a mut String> {
         match arr.data_type() {
             List(..) => {
                 let list_array = as_list_array(&arr)?;
@@ -372,7 +379,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     if !list_array.is_null(i) {
                         compute_array_to_string(
                             arg,
-                            list_array.value(i),
+                            &list_array.value(i),
                             delimiter.clone(),
                             null_string.clone(),
                             with_null_string,
@@ -392,7 +399,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     if !list_array.is_null(i) {
                         compute_array_to_string(
                             arg,
-                            list_array.value(i),
+                            &list_array.value(i),
                             delimiter.clone(),
                             null_string.clone(),
                             with_null_string,
@@ -411,7 +418,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     if !list_array.is_null(i) {
                         compute_array_to_string(
                             arg,
-                            list_array.value(i),
+                            &list_array.value(i),
                             delimiter.clone(),
                             null_string.clone(),
                             with_null_string,
@@ -434,7 +441,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                 })?;
                 compute_array_to_string(
                     arg,
-                    values,
+                    &values,
                     delimiter,
                     null_string,
                     with_null_string,
@@ -461,8 +468,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     fn generate_string_array<O: OffsetSizeTrait>(
         list_arr: &GenericListArray<O>,
-        delimiters: Vec<Option<&str>>,
-        null_string: String,
+        delimiters: &[Option<&str>],
+        null_string: &str,
         with_null_string: bool,
     ) -> Result<StringArray> {
         let mut res: Vec<Option<String>> = Vec::new();
@@ -471,9 +478,9 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                 let mut arg = String::from("");
                 let s = compute_array_to_string(
                     &mut arg,
-                    arr,
+                    &arr,
                     delimiter.to_string(),
-                    null_string.clone(),
+                    null_string.to_string(),
                     with_null_string,
                 )?
                 .clone();
@@ -496,8 +503,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let list_array = as_list_array(&arr)?;
             generate_string_array::<i32>(
                 list_array,
-                delimiters,
-                null_string,
+                &delimiters,
+                &null_string,
                 with_null_string,
             )?
         }
@@ -505,8 +512,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let list_array = as_large_list_array(&arr)?;
             generate_string_array::<i64>(
                 list_array,
-                delimiters,
-                null_string,
+                &delimiters,
+                &null_string,
                 with_null_string,
             )?
         }
@@ -528,26 +535,46 @@ fn string_to_array_inner<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayR
     match args[0].data_type() {
         Utf8 => {
             let string_array = args[0].as_string::<T>();
-            let builder = StringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size());
-            string_to_array_inner_2::<&GenericStringArray<T>, StringBuilder>(args, string_array, builder)
+            let builder = StringBuilder::with_capacity(
+                string_array.len(),
+                string_array.get_buffer_memory_size(),
+            );
+            string_to_array_inner_2::<&GenericStringArray<T>, StringBuilder>(
+                args,
+                &string_array,
+                builder,
+            )
         }
         Utf8View => {
             let string_array = args[0].as_string_view();
             let builder = StringViewBuilder::with_capacity(string_array.len());
-            string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, string_array, builder)
+            string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(
+                args,
+                &string_array,
+                builder,
+            )
         }
         LargeUtf8 => {
             let string_array = args[0].as_string::<T>();
-            let builder = LargeStringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size());
-            string_to_array_inner_2::<&GenericStringArray<T>, LargeStringBuilder>(args, string_array, builder)
+            let builder = LargeStringBuilder::with_capacity(
+                string_array.len(),
+                string_array.get_buffer_memory_size(),
+            );
+            string_to_array_inner_2::<&GenericStringArray<T>, LargeStringBuilder>(
+                args,
+                &string_array,
+                builder,
+            )
         }
-        other =>  exec_err!("unsupported type for first argument to string_to_array function as {other:?}")
+        other => exec_err!(
+            "unsupported type for first argument to string_to_array function as {other:?}"
+        ),
     }
 }
 
 fn string_to_array_inner_2<'a, StringArrType, StringBuilderType>(
     args: &'a [ArrayRef],
-    string_array: StringArrType,
+    string_array: &StringArrType,
     string_builder: StringBuilderType,
 ) -> Result<ArrayRef>
 where
@@ -563,11 +590,13 @@ where
                     &GenericStringArray<i32>,
                     &StringViewArray,
                     StringBuilderType,
-                >(string_array, delimiter_array, None, string_builder)
+                >(string_array, &delimiter_array, None, string_builder)
             } else {
-                string_to_array_inner_3::<StringArrType,
+                string_to_array_inner_3::<
+                    StringArrType,
                     &GenericStringArray<i32>,
-                    StringBuilderType>(args, string_array, delimiter_array, string_builder)
+                    StringBuilderType,
+                >(args, string_array, &delimiter_array, string_builder)
             }
         }
         Utf8View => {
@@ -579,11 +608,13 @@ where
                     &StringViewArray,
                     &StringViewArray,
                     StringBuilderType,
-                >(string_array, delimiter_array, None, string_builder)
+                >(string_array, &delimiter_array, None, string_builder)
             } else {
-                string_to_array_inner_3::<StringArrType,
+                string_to_array_inner_3::<
+                    StringArrType,
                     &StringViewArray,
-                    StringBuilderType>(args, string_array, delimiter_array, string_builder)
+                    StringBuilderType,
+                >(args, string_array, &delimiter_array, string_builder)
             }
         }
         LargeUtf8 => {
@@ -594,21 +625,25 @@ where
                     &GenericStringArray<i64>,
                     &StringViewArray,
                     StringBuilderType,
-                >(string_array, delimiter_array, None, string_builder)
+                >(string_array, &delimiter_array, None, string_builder)
             } else {
-                string_to_array_inner_3::<StringArrType,
+                string_to_array_inner_3::<
+                    StringArrType,
                     &GenericStringArray<i64>,
-                    StringBuilderType>(args, string_array, delimiter_array, string_builder)
+                    StringBuilderType,
+                >(args, string_array, &delimiter_array, string_builder)
             }
         }
-        other =>  exec_err!("unsupported type for second argument to string_to_array function as {other:?}")
+        other => exec_err!(
+            "unsupported type for second argument to string_to_array function as {other:?}"
+        ),
     }
 }
 
 fn string_to_array_inner_3<'a, StringArrType, DelimiterArrType, StringBuilderType>(
     args: &'a [ArrayRef],
-    string_array: StringArrType,
-    delimiter_array: DelimiterArrType,
+    string_array: &StringArrType,
+    delimiter_array: &DelimiterArrType,
     string_builder: StringBuilderType,
 ) -> Result<ArrayRef>
 where
@@ -672,8 +707,8 @@ fn string_to_array_impl<
     NullValueArrType,
     StringBuilderType,
 >(
-    string_array: StringArrType,
-    delimiter_array: DelimiterArrType,
+    string_array: &StringArrType,
+    delimiter_array: &DelimiterArrType,
     null_value_array: Option<NullValueArrType>,
     string_builder: StringBuilderType,
 ) -> Result<ArrayRef>
diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs
index 464301b6ffcf0..d2a69c010e8e7 100644
--- a/datafusion/functions-nested/src/utils.rs
+++ b/datafusion/functions-nested/src/utils.rs
@@ -28,7 +28,7 @@ use arrow::buffer::OffsetBuffer;
 use datafusion_common::cast::{
     as_fixed_size_list_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err};
 
 use datafusion_expr::ColumnarValue;
 use itertools::Itertools as _;
diff --git a/datafusion/functions-table/Cargo.toml b/datafusion/functions-table/Cargo.toml
index a5f50c072d1c5..aa401fbd7d4ed 100644
--- a/datafusion/functions-table/Cargo.toml
+++ b/datafusion/functions-table/Cargo.toml
@@ -48,7 +48,7 @@ datafusion-common = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 parking_lot = { workspace = true }
-paste = "1.0.14"
+paste = { workspace = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs
index d71c5945aafcc..b806798bcecc0 100644
--- a/datafusion/functions-table/src/generate_series.rs
+++ b/datafusion/functions-table/src/generate_series.rs
@@ -26,10 +26,10 @@ use async_trait::async_trait;
 use datafusion_catalog::Session;
 use datafusion_catalog::TableFunctionImpl;
 use datafusion_catalog::TableProvider;
-use datafusion_common::{plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, plan_err};
 use datafusion_expr::{Expr, TableType};
-use datafusion_physical_plan::memory::{LazyBatchGenerator, LazyMemoryExec};
 use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::memory::{LazyBatchGenerator, LazyMemoryExec};
 use parking_lot::RwLock;
 use std::any::Any;
 use std::fmt;
@@ -56,6 +56,10 @@ impl LazyBatchGenerator for Empty {
     fn generate_next_batch(&mut self) -> Result<Option<RecordBatch>> {
         Ok(None)
     }
+
+    fn reset_state(&self) -> Arc<RwLock<dyn LazyBatchGenerator>> {
+        Arc::new(RwLock::new(Empty { name: self.name }))
+    }
 }
 
 impl fmt::Display for Empty {
@@ -398,6 +402,12 @@ impl<T: SeriesValue> LazyBatchGenerator for GenericSeriesState<T> {
         let batch = RecordBatch::try_new(Arc::clone(&self.schema), vec![array])?;
         Ok(Some(batch))
     }
+
+    fn reset_state(&self) -> Arc<RwLock<dyn LazyBatchGenerator>> {
+        let mut new = self.clone();
+        new.current = new.start.clone();
+        Arc::new(RwLock::new(new))
+    }
 }
 
 impl<T: SeriesValue> fmt::Display for GenericSeriesState<T> {
@@ -415,11 +425,7 @@ impl<T: SeriesValue> fmt::Display for GenericSeriesState<T> {
 
 fn reach_end_int64(val: i64, end: i64, step: i64, include_end: bool) -> bool {
     if step > 0 {
-        if include_end {
-            val > end
-        } else {
-            val >= end
-        }
+        if include_end { val > end } else { val >= end }
     } else if include_end {
         val < end
     } else {
@@ -440,11 +446,15 @@ fn validate_interval_step(
     let step_is_negative = step.months < 0 || step.days < 0 || step.nanoseconds < 0;
 
     if start > end && step_is_positive {
-        return plan_err!("Start is bigger than end, but increment is positive: Cannot generate infinite series");
+        return plan_err!(
+            "Start is bigger than end, but increment is positive: Cannot generate infinite series"
+        );
     }
 
     if start < end && step_is_negative {
-        return plan_err!("Start is smaller than end, but increment is negative: Cannot generate infinite series");
+        return plan_err!(
+            "Start is smaller than end, but increment is negative: Cannot generate infinite series"
+        );
     }
 
     Ok(())
@@ -529,7 +539,7 @@ impl GenerateSeriesFuncImpl {
                         "Argument #{} must be an INTEGER or NULL, got {:?}",
                         expr_index + 1,
                         other
-                    )
+                    );
                 }
             };
         }
@@ -558,11 +568,15 @@ impl GenerateSeriesFuncImpl {
         };
 
         if start > end && step > 0 {
-            return plan_err!("Start is bigger than end, but increment is positive: Cannot generate infinite series");
+            return plan_err!(
+                "Start is bigger than end, but increment is positive: Cannot generate infinite series"
+            );
         }
 
         if start < end && step < 0 {
-            return plan_err!("Start is smaller than end, but increment is negative: Cannot generate infinite series");
+            return plan_err!(
+                "Start is smaller than end, but increment is negative: Cannot generate infinite series"
+            );
         }
 
         if step == 0 {
@@ -598,7 +612,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "First argument must be a timestamp or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -610,7 +624,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "Second argument must be a timestamp or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -622,7 +636,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "Third argument must be an interval or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -685,7 +699,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "First argument must be a date or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -703,7 +717,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "Second argument must be a date or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -723,7 +737,7 @@ impl GenerateSeriesFuncImpl {
                 return plan_err!(
                     "Third argument must be an interval or NULL, got {:?}",
                     other
-                )
+                );
             }
         };
 
@@ -775,3 +789,40 @@ impl TableFunctionImpl for RangeFunc {
         impl_func.call(exprs)
     }
 }
+
+#[cfg(test)]
+mod generate_series_tests {
+    use std::sync::Arc;
+
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::Result;
+    use datafusion_physical_plan::memory::LazyBatchGenerator;
+
+    use crate::generate_series::GenericSeriesState;
+
+    #[test]
+    fn test_generic_series_state_reset() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, false)]));
+        let mut state = GenericSeriesState::<i64> {
+            schema,
+            start: 1,
+            end: 5,
+            step: 1,
+            current: 1,
+            batch_size: 8192,
+            include_end: true,
+            name: "test",
+        };
+        let batch = state.generate_next_batch()?.expect("missing batch");
+
+        let state_reset = state.reset_state();
+        let reset_batch = state_reset
+            .write()
+            .generate_next_batch()?
+            .expect("missing reset batch");
+
+        assert_eq!(batch, reset_batch);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-table/src/lib.rs b/datafusion/functions-table/src/lib.rs
index b339a8f4a52f3..1783c15b14b58 100644
--- a/datafusion/functions-table/src/lib.rs
+++ b/datafusion/functions-table/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -23,6 +24,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 pub mod generate_series;
 
diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs
index 76341239f6a5a..210e54d672893 100644
--- a/datafusion/functions-window-common/src/lib.rs
+++ b/datafusion/functions-window-common/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -23,6 +24,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Common user-defined window functionality for [DataFusion]
 //!
diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml
index 7036bbec9d2cb..42690907ae26c 100644
--- a/datafusion/functions-window/Cargo.toml
+++ b/datafusion/functions-window/Cargo.toml
@@ -50,4 +50,4 @@ datafusion-macros = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 log = { workspace = true }
-paste = "1.0.15"
+paste = { workspace = true }
diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs
index 372086b12d5ee..dccb9148d2c5e 100644
--- a/datafusion/functions-window/src/cume_dist.rs
+++ b/datafusion/functions-window/src/cume_dist.rs
@@ -18,10 +18,10 @@
 //! `cume_dist` window function implementation
 
 use arrow::datatypes::FieldRef;
+use datafusion_common::Result;
 use datafusion_common::arrow::array::{ArrayRef, Float64Array};
 use datafusion_common::arrow::datatypes::DataType;
 use datafusion_common::arrow::datatypes::Field;
-use datafusion_common::Result;
 use datafusion_expr::{
     Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl,
 };
@@ -167,7 +167,7 @@ mod tests {
     }
 
     #[test]
-    #[allow(clippy::single_range_in_vec_init)]
+    #[expect(clippy::single_range_in_vec_init)]
     fn test_cume_dist() -> Result<()> {
         test_f64_result(0, vec![], vec![])?;
 
diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs
index 02d7fc290b32c..7569dac9ac106 100644
--- a/datafusion/functions-window/src/lead_lag.rs
+++ b/datafusion/functions-window/src/lead_lag.rs
@@ -22,7 +22,7 @@ use arrow::datatypes::FieldRef;
 use datafusion_common::arrow::array::ArrayRef;
 use datafusion_common::arrow::datatypes::DataType;
 use datafusion_common::arrow::datatypes::Field;
-use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result, ScalarValue, arrow_datafusion_err};
 use datafusion_doc::window_doc_sections::DOC_SECTION_ANALYTICAL;
 use datafusion_expr::{
     Documentation, LimitEffect, Literal, PartitionEvaluator, ReversedUDWF, Signature,
@@ -264,7 +264,7 @@ impl WindowUDFImpl for WindowShift {
     ) -> Result<Box<dyn PartitionEvaluator>> {
         let shift_offset =
             get_scalar_value_from_args(partition_evaluator_args.input_exprs(), 1)?
-                .map(get_signed_integer)
+                .map(|v| get_signed_integer(&v))
                 .map_or(Ok(None), |v| v.map(Some))
                 .map(|n| self.kind.shift_offset(n))
                 .map(|offset| {
@@ -640,7 +640,7 @@ impl PartitionEvaluator for WindowShiftEvaluator {
         // OR
         // - ignore nulls mode and current value is null and is within window bounds
         // .unwrap() is safe here as there is a none check in front
-        #[allow(clippy::unnecessary_unwrap)]
+        #[expect(clippy::unnecessary_unwrap)]
         if !(idx.is_none() || (self.ignore_nulls && array.is_null(idx.unwrap()))) {
             ScalarValue::try_from_array(array, idx.unwrap())
         } else {
diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs
index 0093a1c235228..300313387388a 100644
--- a/datafusion/functions-window/src/lib.rs
+++ b/datafusion/functions-window/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -23,6 +24,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Window Function packages for [DataFusion].
 //!
@@ -35,8 +38,8 @@ use std::sync::Arc;
 
 use log::debug;
 
-use datafusion_expr::registry::FunctionRegistry;
 use datafusion_expr::WindowUDF;
+use datafusion_expr::registry::FunctionRegistry;
 
 #[macro_use]
 pub mod macros;
diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs
index 1ba6ad5ce0d49..be08f25ec404b 100644
--- a/datafusion/functions-window/src/nth_value.rs
+++ b/datafusion/functions-window/src/nth_value.rs
@@ -22,7 +22,7 @@ use crate::utils::{get_scalar_value_from_args, get_signed_integer};
 use arrow::datatypes::FieldRef;
 use datafusion_common::arrow::array::ArrayRef;
 use datafusion_common::arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_datafusion_err, exec_err};
 use datafusion_doc::window_doc_sections::DOC_SECTION_ANALYTICAL;
 use datafusion_expr::window_state::WindowAggState;
 use datafusion_expr::{
@@ -276,27 +276,30 @@ impl WindowUDFImpl for NthValue {
             }));
         }
 
-        let n =
-            match get_scalar_value_from_args(partition_evaluator_args.input_exprs(), 1)
-                .map_err(|_e| {
-                    exec_datafusion_err!(
-                "Expected a signed integer literal for the second argument of nth_value")
-                })?
-                .map(get_signed_integer)
-            {
-                Some(Ok(n)) => {
-                    if partition_evaluator_args.is_reversed() {
-                        -n
-                    } else {
-                        n
-                    }
-                }
-                _ => {
-                    return exec_err!(
+        let n = match get_scalar_value_from_args(
+            partition_evaluator_args.input_exprs(),
+            1,
+        )
+        .map_err(|_e| {
+            exec_datafusion_err!(
                 "Expected a signed integer literal for the second argument of nth_value"
             )
+        })?
+        .map(|v| get_signed_integer(&v))
+        {
+            Some(Ok(n)) => {
+                if partition_evaluator_args.is_reversed() {
+                    -n
+                } else {
+                    n
                 }
-            };
+            }
+            _ => {
+                return exec_err!(
+                    "Expected a signed integer literal for the second argument of nth_value"
+                );
+            }
+        };
 
         Ok(Box::new(NthValueEvaluator {
             state,
diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs
index 008caaa848aab..21ce2795b4c23 100644
--- a/datafusion/functions-window/src/ntile.rs
+++ b/datafusion/functions-window/src/ntile.rs
@@ -23,7 +23,7 @@ use crate::utils::{
 use arrow::datatypes::FieldRef;
 use datafusion_common::arrow::array::{ArrayRef, UInt64Array};
 use datafusion_common::arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_datafusion_err, exec_err, Result};
+use datafusion_common::{Result, exec_datafusion_err, exec_err};
 use datafusion_expr::{
     Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl,
 };
@@ -135,10 +135,10 @@ impl WindowUDFImpl for Ntile {
         }
 
         if scalar_n.is_unsigned() {
-            let n = get_unsigned_integer(scalar_n)?;
+            let n = get_unsigned_integer(&scalar_n)?;
             Ok(Box::new(NtileEvaluator { n }))
         } else {
-            let n: i64 = get_signed_integer(scalar_n)?;
+            let n: i64 = get_signed_integer(&scalar_n)?;
             if n <= 0 {
                 return exec_err!("NTILE requires a positive integer");
             }
diff --git a/datafusion/functions-window/src/planner.rs b/datafusion/functions-window/src/planner.rs
index 84836ad569ff8..6f4eb2051f047 100644
--- a/datafusion/functions-window/src/planner.rs
+++ b/datafusion/functions-window/src/planner.rs
@@ -19,11 +19,11 @@
 
 use datafusion_common::Result;
 use datafusion_expr::{
+    Expr,
     expr::{WindowFunction, WindowFunctionParams},
     expr_rewriter::NamePreserver,
     planner::{ExprPlanner, PlannerResult, RawWindowExpr},
     utils::COUNT_STAR_EXPANSION,
-    Expr,
 };
 
 #[derive(Debug)]
diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs
index 6d891e76671d7..9d5af64eb9c64 100644
--- a/datafusion/functions-window/src/rank.rs
+++ b/datafusion/functions-window/src/rank.rs
@@ -18,7 +18,6 @@
 //! Implementation of `rank`, `dense_rank`, and `percent_rank` window functions,
 //! which can be evaluated at runtime during query execution.
 
-use crate::define_udwf_and_expr;
 use arrow::datatypes::FieldRef;
 use datafusion_common::arrow::array::ArrayRef;
 use datafusion_common::arrow::array::{Float64Array, UInt64Array};
@@ -26,7 +25,7 @@ use datafusion_common::arrow::compute::SortOptions;
 use datafusion_common::arrow::datatypes::DataType;
 use datafusion_common::arrow::datatypes::Field;
 use datafusion_common::utils::get_row_at_idx;
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_doc::window_doc_sections::DOC_SECTION_RANKING;
 use datafusion_expr::{
     Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl,
@@ -381,7 +380,7 @@ mod tests {
         test_i32_result(expr, vec![0..2, 2..3, 3..6, 6..7, 7..8], expected)
     }
 
-    #[allow(clippy::single_range_in_vec_init)]
+    #[expect(clippy::single_range_in_vec_init)]
     fn test_without_rank(expr: &Rank, expected: Vec<u64>) -> Result<()> {
         test_i32_result(expr, vec![0..8], expected)
     }
@@ -434,7 +433,7 @@ mod tests {
     }
 
     #[test]
-    #[allow(clippy::single_range_in_vec_init)]
+    #[expect(clippy::single_range_in_vec_init)]
     fn test_percent_rank() -> Result<()> {
         let r = Rank::percent_rank();
 
diff --git a/datafusion/functions-window/src/utils.rs b/datafusion/functions-window/src/utils.rs
index 3f8061dbea3e1..4d0c4d181aefa 100644
--- a/datafusion/functions-window/src/utils.rs
+++ b/datafusion/functions-window/src/utils.rs
@@ -16,12 +16,12 @@
 // under the License.
 
 use datafusion_common::arrow::datatypes::DataType;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err};
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::sync::Arc;
 
-pub(crate) fn get_signed_integer(value: ScalarValue) -> Result<i64> {
+pub(crate) fn get_signed_integer(value: &ScalarValue) -> Result<i64> {
     if value.is_null() {
         return Ok(0);
     }
@@ -52,7 +52,7 @@ pub(crate) fn get_scalar_value_from_args(
     })
 }
 
-pub(crate) fn get_unsigned_integer(value: ScalarValue) -> Result<u64> {
+pub(crate) fn get_unsigned_integer(value: &ScalarValue) -> Result<u64> {
     if value.is_null() {
         return Ok(0);
     }
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index ad52a551a7c17..54f84a8963293 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -40,7 +40,7 @@ workspace = true
 [features]
 crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
 # enable datetime functions
-datetime_expressions = []
+datetime_expressions = ["chrono-tz"]
 # Enable encoding by default so the doctests work. In general don't automatically enable all packages.
 default = [
     "datetime_expressions",
@@ -71,13 +71,14 @@ base64 = { version = "0.22", optional = true }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.8", optional = true }
 chrono = { workspace = true }
+chrono-tz = { version = "0.10.4", optional = true }
 datafusion-common = { workspace = true }
 datafusion-doc = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-expr-common = { workspace = true }
 datafusion-macros = { workspace = true }
-hex = { version = "0.4", optional = true }
+hex = { workspace = true, optional = true }
 itertools = { workspace = true }
 log = { workspace = true }
 md-5 = { version = "^0.10.0", optional = true }
@@ -86,7 +87,7 @@ rand = { workspace = true }
 regex = { workspace = true, optional = true }
 sha2 = { version = "^0.10.9", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
-uuid = { version = "1.18", features = ["v4"], optional = true }
+uuid = { version = "1.19", features = ["v4"], optional = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
@@ -254,3 +255,13 @@ required-features = ["unicode_expressions"]
 harness = false
 name = "find_in_set"
 required-features = ["unicode_expressions"]
+
+[[bench]]
+harness = false
+name = "starts_with"
+required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "ends_with"
+required-features = ["string_expressions"]
diff --git a/datafusion/functions/benches/ascii.rs b/datafusion/functions/benches/ascii.rs
index 03d25e9c3d4fe..66d81261bfe85 100644
--- a/datafusion/functions/benches/ascii.rs
+++ b/datafusion/functions/benches/ascii.rs
@@ -19,7 +19,7 @@ extern crate criterion;
 mod helper;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs
index 4a1a63d62765f..35a0cf886b7f0 100644
--- a/datafusion/functions/benches/character_length.rs
+++ b/datafusion/functions/benches/character_length.rs
@@ -18,7 +18,7 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs
index 8356cf7c31726..9a6342ca40bb6 100644
--- a/datafusion/functions/benches/chr.rs
+++ b/datafusion/functions/benches/chr.rs
@@ -18,7 +18,7 @@
 extern crate criterion;
 
 use arrow::{array::PrimitiveArray, datatypes::Int64Type};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string::chr;
 use rand::{Rng, SeedableRng};
diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs
index 09200139a244b..f7ef978920908 100644
--- a/datafusion/functions/benches/concat.rs
+++ b/datafusion/functions/benches/concat.rs
@@ -18,9 +18,9 @@
 use arrow::array::ArrayRef;
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string::concat;
 use std::hint::black_box;
diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs
index 97f21ccd6d55e..c47198d4a6208 100644
--- a/datafusion/functions/benches/cot.rs
+++ b/datafusion/functions/benches/cot.rs
@@ -21,7 +21,7 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::cot;
 use std::hint::black_box;
diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs
index 74390491d538c..eb4e960d8312b 100644
--- a/datafusion/functions/benches/date_bin.rs
+++ b/datafusion/functions/benches/date_bin.rs
@@ -22,13 +22,13 @@ use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, TimestampSecondArray};
 use arrow::datatypes::Field;
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::date_bin;
-use rand::rngs::ThreadRng;
 use rand::Rng;
+use rand::rngs::ThreadRng;
 
 fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
     let mut seconds = vec![];
diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs
index 498a3e63ef290..f5c8ceb5fe9d5 100644
--- a/datafusion/functions/benches/date_trunc.rs
+++ b/datafusion/functions/benches/date_trunc.rs
@@ -22,13 +22,13 @@ use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, TimestampSecondArray};
 use arrow::datatypes::Field;
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::date_trunc;
-use rand::rngs::ThreadRng;
 use rand::Rng;
+use rand::rngs::ThreadRng;
 
 fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
     let mut seconds = vec![];
diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs
index 98faee91e1911..8a7c2b7b664b7 100644
--- a/datafusion/functions/benches/encoding.rs
+++ b/datafusion/functions/benches/encoding.rs
@@ -20,7 +20,7 @@ extern crate criterion;
 use arrow::array::Array;
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::encoding;
diff --git a/datafusion/functions/benches/ends_with.rs b/datafusion/functions/benches/ends_with.rs
new file mode 100644
index 0000000000000..926fd9ff72a5a
--- /dev/null
+++ b/datafusion/functions/benches/ends_with.rs
@@ -0,0 +1,185 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{StringArray, StringViewArray};
+use arrow::datatypes::{DataType, Field};
+use criterion::{Criterion, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use rand::distr::Alphanumeric;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::hint::black_box;
+use std::sync::Arc;
+
+/// Generate a StringArray/StringViewArray with random ASCII strings
+fn gen_string_array(
+    n_rows: usize,
+    str_len: usize,
+    is_string_view: bool,
+) -> ColumnarValue {
+    let mut rng = StdRng::seed_from_u64(42);
+    let strings: Vec<Option<String>> = (0..n_rows)
+        .map(|_| {
+            let s: String = (&mut rng)
+                .sample_iter(&Alphanumeric)
+                .take(str_len)
+                .map(char::from)
+                .collect();
+            Some(s)
+        })
+        .collect();
+
+    if is_string_view {
+        ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
+    } else {
+        ColumnarValue::Array(Arc::new(StringArray::from(strings)))
+    }
+}
+
+/// Generate a scalar suffix string
+fn gen_scalar_suffix(suffix_str: &str, is_string_view: bool) -> ColumnarValue {
+    if is_string_view {
+        ColumnarValue::Scalar(ScalarValue::Utf8View(Some(suffix_str.to_string())))
+    } else {
+        ColumnarValue::Scalar(ScalarValue::Utf8(Some(suffix_str.to_string())))
+    }
+}
+
+/// Generate an array of suffix strings (same string repeated)
+fn gen_array_suffix(
+    suffix_str: &str,
+    n_rows: usize,
+    is_string_view: bool,
+) -> ColumnarValue {
+    let strings: Vec<Option<String>> =
+        (0..n_rows).map(|_| Some(suffix_str.to_string())).collect();
+
+    if is_string_view {
+        ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
+    } else {
+        ColumnarValue::Array(Arc::new(StringArray::from(strings)))
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let ends_with = datafusion_functions::string::ends_with();
+    let n_rows = 8192;
+    let str_len = 128;
+    let suffix_str = "xyz"; // A pattern that likely won't match
+
+    // Benchmark: StringArray with scalar suffix (the optimized path)
+    let str_array = gen_string_array(n_rows, str_len, false);
+    let scalar_suffix = gen_scalar_suffix(suffix_str, false);
+    let arg_fields = vec![
+        Field::new("a", DataType::Utf8, true).into(),
+        Field::new("b", DataType::Utf8, true).into(),
+    ];
+    let return_field = Field::new("f", DataType::Boolean, true).into();
+    let config_options = Arc::new(ConfigOptions::default());
+
+    c.bench_function("ends_with_StringArray_scalar_suffix", |b| {
+        b.iter(|| {
+            black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_array.clone(), scalar_suffix.clone()],
+                arg_fields: arg_fields.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringArray with array suffix (for comparison)
+    let array_suffix = gen_array_suffix(suffix_str, n_rows, false);
+    c.bench_function("ends_with_StringArray_array_suffix", |b| {
+        b.iter(|| {
+            black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_array.clone(), array_suffix.clone()],
+                arg_fields: arg_fields.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringViewArray with scalar suffix (the optimized path)
+    let str_view_array = gen_string_array(n_rows, str_len, true);
+    let scalar_suffix_view = gen_scalar_suffix(suffix_str, true);
+    let arg_fields_view = vec![
+        Field::new("a", DataType::Utf8View, true).into(),
+        Field::new("b", DataType::Utf8View, true).into(),
+    ];
+
+    c.bench_function("ends_with_StringViewArray_scalar_suffix", |b| {
+        b.iter(|| {
+            black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_view_array.clone(), scalar_suffix_view.clone()],
+                arg_fields: arg_fields_view.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringViewArray with array suffix (for comparison)
+    let array_suffix_view = gen_array_suffix(suffix_str, n_rows, true);
+    c.bench_function("ends_with_StringViewArray_array_suffix", |b| {
+        b.iter(|| {
+            black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_view_array.clone(), array_suffix_view.clone()],
+                arg_fields: arg_fields_view.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark different string lengths with scalar suffix
+    for str_len in [8, 32, 128, 512] {
+        let str_array = gen_string_array(n_rows, str_len, true);
+        let scalar_suffix = gen_scalar_suffix(suffix_str, true);
+        let arg_fields = vec![
+            Field::new("a", DataType::Utf8View, true).into(),
+            Field::new("b", DataType::Utf8View, true).into(),
+        ];
+
+        c.bench_function(
+            &format!("ends_with_StringViewArray_scalar_strlen_{str_len}"),
+            |b| {
+                b.iter(|| {
+                    black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
+                        args: vec![str_array.clone(), scalar_suffix.clone()],
+                        arg_fields: arg_fields.clone(),
+                        number_rows: n_rows,
+                        return_field: Arc::clone(&return_field),
+                        config_options: Arc::clone(&config_options),
+                    }))
+                })
+            },
+        );
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs
index a928f5655806c..e207c1fa48ab3 100644
--- a/datafusion/functions/benches/find_in_set.rs
+++ b/datafusion/functions/benches/find_in_set.rs
@@ -22,9 +22,9 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use rand::distr::Alphanumeric;
 use rand::prelude::StdRng;
diff --git a/datafusion/functions/benches/gcd.rs b/datafusion/functions/benches/gcd.rs
index 19e196d9a3eab..9705af8a2fcd2 100644
--- a/datafusion/functions/benches/gcd.rs
+++ b/datafusion/functions/benches/gcd.rs
@@ -22,9 +22,9 @@ use arrow::{
     array::{ArrayRef, Int64Array},
     datatypes::DataType,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::gcd;
 use rand::Rng;
diff --git a/datafusion/functions/benches/helper.rs b/datafusion/functions/benches/helper.rs
index a2b110ae4d63b..d6d6afd48f2ca 100644
--- a/datafusion/functions/benches/helper.rs
+++ b/datafusion/functions/benches/helper.rs
@@ -18,7 +18,7 @@
 use arrow::array::{StringArray, StringViewArray};
 use datafusion_expr::ColumnarValue;
 use rand::distr::Alphanumeric;
-use rand::{rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::sync::Arc;
 
 /// gen_arr(4096, 128, 0.1, 0.1, true) will generate a StringViewArray with
diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs
index 50aee8dbb9161..ba055d58f5664 100644
--- a/datafusion/functions/benches/initcap.rs
+++ b/datafusion/functions/benches/initcap.rs
@@ -22,7 +22,7 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode;
diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs
index 4a90d45d66223..d4e41e882fe20 100644
--- a/datafusion/functions/benches/isnan.rs
+++ b/datafusion/functions/benches/isnan.rs
@@ -22,7 +22,7 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::isnan;
diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs
index 961cba7200ce0..53e38745afa92 100644
--- a/datafusion/functions/benches/iszero.rs
+++ b/datafusion/functions/benches/iszero.rs
@@ -22,7 +22,7 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::iszero;
diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs
index 6a5178b87fdce..333dca390054b 100644
--- a/datafusion/functions/benches/lower.rs
+++ b/datafusion/functions/benches/lower.rs
@@ -22,7 +22,7 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs
index 4458af614396d..3fce426a917fe 100644
--- a/datafusion/functions/benches/ltrim.rs
+++ b/datafusion/functions/benches/ltrim.rs
@@ -20,14 +20,14 @@ extern crate criterion;
 use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
 use arrow::datatypes::{DataType, Field};
 use criterion::{
-    criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion,
-    SamplingMode,
+    BenchmarkGroup, Criterion, SamplingMode, criterion_group, criterion_main,
+    measurement::Measurement,
 };
-use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
 use datafusion_functions::string;
-use rand::{distr::Alphanumeric, rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, distr::Alphanumeric, rngs::StdRng};
 use std::hint::black_box;
 use std::{fmt, sync::Arc};
 
diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs
index 15a895468db93..8b1b32edfc9c5 100644
--- a/datafusion/functions/benches/make_date.rs
+++ b/datafusion/functions/benches/make_date.rs
@@ -22,13 +22,13 @@ use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, Int32Array};
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::make_date;
-use rand::rngs::ThreadRng;
 use rand::Rng;
+use rand::rngs::ThreadRng;
 
 fn years(rng: &mut ThreadRng) -> Int32Array {
     let mut years = vec![];
diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs
index d649697cc5188..f937d19421e89 100644
--- a/datafusion/functions/benches/nullif.rs
+++ b/datafusion/functions/benches/nullif.rs
@@ -19,9 +19,9 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::core::nullif;
 use std::hint::black_box;
diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
index f92a69bbf4f92..c9220d8d3fce1 100644
--- a/datafusion/functions/benches/pad.rs
+++ b/datafusion/functions/benches/pad.rs
@@ -20,13 +20,13 @@ use arrow::datatypes::{DataType, Field, Int64Type};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use datafusion_common::config::ConfigOptions;
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use datafusion_common::DataFusionError;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode::{lpad, rpad};
-use rand::distr::{Distribution, Uniform};
 use rand::Rng;
+use rand::distr::{Distribution, Uniform};
 use std::hint::black_box;
 use std::sync::Arc;
 
@@ -98,6 +98,7 @@ fn create_args<O: OffsetSizeTrait>(
     }
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn invoke_pad_with_args(
     args: Vec<ColumnarValue>,
     number_rows: usize,
diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs
index 88efb2d1b5b93..3d8631140c05f 100644
--- a/datafusion/functions/benches/random.rs
+++ b/datafusion/functions/benches/random.rs
@@ -18,7 +18,7 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl};
 use datafusion_functions::math::random::RandomFunc;
diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs
index a415330245bf5..32378ccd126e5 100644
--- a/datafusion/functions/benches/regx.rs
+++ b/datafusion/functions/benches/regx.rs
@@ -21,16 +21,16 @@ use arrow::array::builder::StringBuilder;
 use arrow::array::{ArrayRef, AsArray, Int64Array, StringArray, StringViewArray};
 use arrow::compute::cast;
 use arrow::datatypes::DataType;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_functions::regex::regexpcount::regexp_count_func;
 use datafusion_functions::regex::regexpinstr::regexp_instr_func;
 use datafusion_functions::regex::regexplike::regexp_like;
 use datafusion_functions::regex::regexpmatch::regexp_match;
 use datafusion_functions::regex::regexpreplace::regexp_replace;
+use rand::Rng;
 use rand::distr::Alphanumeric;
 use rand::prelude::IndexedRandom;
 use rand::rngs::ThreadRng;
-use rand::Rng;
 use std::hint::black_box;
 use std::iter;
 use std::sync::Arc;
diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs
index 80ffa8ee38f1a..304739b42f5fc 100644
--- a/datafusion/functions/benches/repeat.rs
+++ b/datafusion/functions/benches/repeat.rs
@@ -22,9 +22,9 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
 use datafusion_common::DataFusionError;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
 use std::hint::black_box;
diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs
index b1eca654fb254..73f5be5b45df0 100644
--- a/datafusion/functions/benches/reverse.rs
+++ b/datafusion/functions/benches/reverse.rs
@@ -19,7 +19,7 @@ extern crate criterion;
 mod helper;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs
index 24b8861e4d28c..08a197a60eb75 100644
--- a/datafusion/functions/benches/signum.rs
+++ b/datafusion/functions/benches/signum.rs
@@ -22,7 +22,7 @@ use arrow::{
     datatypes::{Field, Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::signum;
diff --git a/datafusion/functions/benches/starts_with.rs b/datafusion/functions/benches/starts_with.rs
new file mode 100644
index 0000000000000..9ee39b694539c
--- /dev/null
+++ b/datafusion/functions/benches/starts_with.rs
@@ -0,0 +1,185 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{StringArray, StringViewArray};
+use arrow::datatypes::{DataType, Field};
+use criterion::{Criterion, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use rand::distr::Alphanumeric;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::hint::black_box;
+use std::sync::Arc;
+
+/// Generate a StringArray/StringViewArray with random ASCII strings
+fn gen_string_array(
+    n_rows: usize,
+    str_len: usize,
+    is_string_view: bool,
+) -> ColumnarValue {
+    let mut rng = StdRng::seed_from_u64(42);
+    let strings: Vec<Option<String>> = (0..n_rows)
+        .map(|_| {
+            let s: String = (&mut rng)
+                .sample_iter(&Alphanumeric)
+                .take(str_len)
+                .map(char::from)
+                .collect();
+            Some(s)
+        })
+        .collect();
+
+    if is_string_view {
+        ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
+    } else {
+        ColumnarValue::Array(Arc::new(StringArray::from(strings)))
+    }
+}
+
+/// Generate a scalar prefix string
+fn gen_scalar_prefix(prefix_str: &str, is_string_view: bool) -> ColumnarValue {
+    if is_string_view {
+        ColumnarValue::Scalar(ScalarValue::Utf8View(Some(prefix_str.to_string())))
+    } else {
+        ColumnarValue::Scalar(ScalarValue::Utf8(Some(prefix_str.to_string())))
+    }
+}
+
+/// Generate an array of prefix strings (same string repeated)
+fn gen_array_prefix(
+    prefix_str: &str,
+    n_rows: usize,
+    is_string_view: bool,
+) -> ColumnarValue {
+    let strings: Vec<Option<String>> =
+        (0..n_rows).map(|_| Some(prefix_str.to_string())).collect();
+
+    if is_string_view {
+        ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
+    } else {
+        ColumnarValue::Array(Arc::new(StringArray::from(strings)))
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let starts_with = datafusion_functions::string::starts_with();
+    let n_rows = 8192;
+    let str_len = 128;
+    let prefix_str = "xyz"; // A pattern that likely won't match
+
+    // Benchmark: StringArray with scalar prefix (the optimized path)
+    let str_array = gen_string_array(n_rows, str_len, false);
+    let scalar_prefix = gen_scalar_prefix(prefix_str, false);
+    let arg_fields = vec![
+        Field::new("a", DataType::Utf8, true).into(),
+        Field::new("b", DataType::Utf8, true).into(),
+    ];
+    let return_field = Field::new("f", DataType::Boolean, true).into();
+    let config_options = Arc::new(ConfigOptions::default());
+
+    c.bench_function("starts_with_StringArray_scalar_prefix", |b| {
+        b.iter(|| {
+            black_box(starts_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_array.clone(), scalar_prefix.clone()],
+                arg_fields: arg_fields.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringArray with array prefix (for comparison)
+    let array_prefix = gen_array_prefix(prefix_str, n_rows, false);
+    c.bench_function("starts_with_StringArray_array_prefix", |b| {
+        b.iter(|| {
+            black_box(starts_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_array.clone(), array_prefix.clone()],
+                arg_fields: arg_fields.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringViewArray with scalar prefix (the optimized path)
+    let str_view_array = gen_string_array(n_rows, str_len, true);
+    let scalar_prefix_view = gen_scalar_prefix(prefix_str, true);
+    let arg_fields_view = vec![
+        Field::new("a", DataType::Utf8View, true).into(),
+        Field::new("b", DataType::Utf8View, true).into(),
+    ];
+
+    c.bench_function("starts_with_StringViewArray_scalar_prefix", |b| {
+        b.iter(|| {
+            black_box(starts_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_view_array.clone(), scalar_prefix_view.clone()],
+                arg_fields: arg_fields_view.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark: StringViewArray with array prefix (for comparison)
+    let array_prefix_view = gen_array_prefix(prefix_str, n_rows, true);
+    c.bench_function("starts_with_StringViewArray_array_prefix", |b| {
+        b.iter(|| {
+            black_box(starts_with.invoke_with_args(ScalarFunctionArgs {
+                args: vec![str_view_array.clone(), array_prefix_view.clone()],
+                arg_fields: arg_fields_view.clone(),
+                number_rows: n_rows,
+                return_field: Arc::clone(&return_field),
+                config_options: Arc::clone(&config_options),
+            }))
+        })
+    });
+
+    // Benchmark different string lengths with scalar prefix
+    for str_len in [8, 32, 128, 512] {
+        let str_array = gen_string_array(n_rows, str_len, true);
+        let scalar_prefix = gen_scalar_prefix(prefix_str, true);
+        let arg_fields = vec![
+            Field::new("a", DataType::Utf8View, true).into(),
+            Field::new("b", DataType::Utf8View, true).into(),
+        ];
+
+        c.bench_function(
+            &format!("starts_with_StringViewArray_scalar_strlen_{str_len}"),
+            |b| {
+                b.iter(|| {
+                    black_box(starts_with.invoke_with_args(ScalarFunctionArgs {
+                        args: vec![str_array.clone(), scalar_prefix.clone()],
+                        arg_fields: arg_fields.clone(),
+                        number_rows: n_rows,
+                        return_field: Arc::clone(&return_field),
+                        config_options: Arc::clone(&config_options),
+                    }))
+                })
+            },
+        );
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs
index 18a99e44bf487..9babf1d05c059 100644
--- a/datafusion/functions/benches/strpos.rs
+++ b/datafusion/functions/benches/strpos.rs
@@ -19,7 +19,7 @@ extern crate criterion;
 
 use arrow::array::{StringArray, StringViewArray};
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use rand::distr::Alphanumeric;
diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs
index 771413458c1fb..a6989c1bca456 100644
--- a/datafusion/functions/benches/substr.rs
+++ b/datafusion/functions/benches/substr.rs
@@ -22,9 +22,9 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
-use datafusion_common::config::ConfigOptions;
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
 use datafusion_common::DataFusionError;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode;
 use std::hint::black_box;
@@ -99,6 +99,7 @@ fn create_args_with_count<O: OffsetSizeTrait>(
     }
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn invoke_substr_with_args(
     args: Vec<ColumnarValue>,
     number_rows: usize,
diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs
index d0941d9baedda..88600317c9967 100644
--- a/datafusion/functions/benches/substr_index.rs
+++ b/datafusion/functions/benches/substr_index.rs
@@ -22,13 +22,13 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int64Array, StringArray};
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode::substr_index;
+use rand::Rng;
 use rand::distr::{Alphanumeric, Uniform};
 use rand::prelude::Distribution;
-use rand::Rng;
 
 struct Filter<Dist, Test> {
     dist: Dist,
diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs
index 945508aec7405..ac5b5dc7e03a3 100644
--- a/datafusion/functions/benches/to_char.rs
+++ b/datafusion/functions/benches/to_char.rs
@@ -22,17 +22,17 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Date32Array, StringArray};
 use arrow::datatypes::{DataType, Field};
-use chrono::prelude::*;
 use chrono::TimeDelta;
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_common::config::ConfigOptions;
+use chrono::prelude::*;
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
 use datafusion_common::ScalarValue::TimestampNanosecond;
+use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::to_char;
+use rand::Rng;
 use rand::prelude::IndexedRandom;
 use rand::rngs::ThreadRng;
-use rand::Rng;
 
 fn pick_date_in_range(
     rng: &mut ThreadRng,
diff --git a/datafusion/functions/benches/to_hex.rs b/datafusion/functions/benches/to_hex.rs
index a75ed9258791e..1c6757a291b24 100644
--- a/datafusion/functions/benches/to_hex.rs
+++ b/datafusion/functions/benches/to_hex.rs
@@ -17,56 +17,102 @@
 
 extern crate criterion;
 
+use arrow::array::Int64Array;
 use arrow::datatypes::{DataType, Field, Int32Type, Int64Type};
 use arrow::util::bench_util::create_primitive_array;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
 use std::hint::black_box;
 use std::sync::Arc;
+use std::time::Duration;
 
 fn criterion_benchmark(c: &mut Criterion) {
     let hex = string::to_hex();
-    let size = 1024;
-    let i32_array = Arc::new(create_primitive_array::<Int32Type>(size, 0.2));
-    let batch_len = i32_array.len();
-    let i32_args = vec![ColumnarValue::Array(i32_array)];
     let config_options = Arc::new(ConfigOptions::default());
 
-    c.bench_function(&format!("to_hex i32 array: {size}"), |b| {
-        b.iter(|| {
-            let args_cloned = i32_args.clone();
-            black_box(
-                hex.invoke_with_args(ScalarFunctionArgs {
-                    args: args_cloned,
-                    arg_fields: vec![Field::new("a", DataType::Int32, false).into()],
-                    number_rows: batch_len,
-                    return_field: Field::new("f", DataType::Utf8, true).into(),
-                    config_options: Arc::clone(&config_options),
-                })
-                .unwrap(),
-            )
-        })
-    });
-    let i64_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.2));
-    let batch_len = i64_array.len();
-    let i64_args = vec![ColumnarValue::Array(i64_array)];
-    c.bench_function(&format!("to_hex i64 array: {size}"), |b| {
-        b.iter(|| {
-            let args_cloned = i64_args.clone();
-            black_box(
-                hex.invoke_with_args(ScalarFunctionArgs {
-                    args: args_cloned,
-                    arg_fields: vec![Field::new("a", DataType::Int64, false).into()],
-                    number_rows: batch_len,
-                    return_field: Field::new("f", DataType::Utf8, true).into(),
-                    config_options: Arc::clone(&config_options),
+    for size in [1024, 4096, 8192] {
+        let mut group = c.benchmark_group(format!("to_hex size={size}"));
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+        group.measurement_time(Duration::from_secs(10));
+
+        // i32 array with random values
+        let i32_array = Arc::new(create_primitive_array::<Int32Type>(size, 0.1));
+        let batch_len = i32_array.len();
+        let i32_args = vec![ColumnarValue::Array(i32_array)];
+
+        group.bench_function("i32_random", |b| {
+            b.iter(|| {
+                let args_cloned = i32_args.clone();
+                black_box(
+                    hex.invoke_with_args(ScalarFunctionArgs {
+                        args: args_cloned,
+                        arg_fields: vec![Field::new("a", DataType::Int32, true).into()],
+                        number_rows: batch_len,
+                        return_field: Field::new("f", DataType::Utf8, true).into(),
+                        config_options: Arc::clone(&config_options),
+                    })
+                    .unwrap(),
+                )
+            })
+        });
+
+        // i64 array with random values (produces longer hex strings)
+        let i64_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.1));
+        let batch_len = i64_array.len();
+        let i64_args = vec![ColumnarValue::Array(i64_array)];
+
+        group.bench_function("i64_random", |b| {
+            b.iter(|| {
+                let args_cloned = i64_args.clone();
+                black_box(
+                    hex.invoke_with_args(ScalarFunctionArgs {
+                        args: args_cloned,
+                        arg_fields: vec![Field::new("a", DataType::Int64, true).into()],
+                        number_rows: batch_len,
+                        return_field: Field::new("f", DataType::Utf8, true).into(),
+                        config_options: Arc::clone(&config_options),
+                    })
+                    .unwrap(),
+                )
+            })
+        });
+
+        // i64 array with large values (max length hex strings)
+        let i64_large_array = Arc::new(Int64Array::from(
+            (0..size)
+                .map(|i| {
+                    if i % 10 == 0 {
+                        None
+                    } else {
+                        Some(i64::MAX - i as i64)
+                    }
                 })
-                .unwrap(),
-            )
-        })
-    });
+                .collect::<Vec<_>>(),
+        ));
+        let batch_len = i64_large_array.len();
+        let i64_large_args = vec![ColumnarValue::Array(i64_large_array)];
+
+        group.bench_function("i64_large_values", |b| {
+            b.iter(|| {
+                let args_cloned = i64_large_args.clone();
+                black_box(
+                    hex.invoke_with_args(ScalarFunctionArgs {
+                        args: args_cloned,
+                        arg_fields: vec![Field::new("a", DataType::Int64, true).into()],
+                        number_rows: batch_len,
+                        return_field: Field::new("f", DataType::Utf8, true).into(),
+                        config_options: Arc::clone(&config_options),
+                    })
+                    .unwrap(),
+                )
+            })
+        });
+
+        group.finish();
+    }
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs
index a8f5c5816d4da..ed865fa6e8d50 100644
--- a/datafusion/functions/benches/to_timestamp.rs
+++ b/datafusion/functions/benches/to_timestamp.rs
@@ -24,7 +24,7 @@ use arrow::array::builder::StringBuilder;
 use arrow::array::{Array, ArrayRef, StringArray};
 use arrow::compute::cast;
 use arrow::datatypes::{DataType, Field, TimeUnit};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::to_timestamp;
@@ -114,16 +114,21 @@ fn criterion_benchmark(c: &mut Criterion) {
         Field::new("f", DataType::Timestamp(TimeUnit::Nanosecond, None), true).into();
     let arg_field = Field::new("a", DataType::Utf8, false).into();
     let arg_fields = vec![arg_field];
-    let config_options = Arc::new(ConfigOptions::default());
+    let mut options = ConfigOptions::default();
+    options.execution.time_zone = Some("UTC".into());
+    let config_options = Arc::new(options);
+
+    let to_timestamp_udf = to_timestamp(config_options.as_ref());
 
     c.bench_function("to_timestamp_no_formats_utf8", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let arr_data = data();
         let batch_len = arr_data.len();
         let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef);
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: vec![string_array.clone()],
                         arg_fields: arg_fields.clone(),
@@ -137,13 +142,14 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("to_timestamp_no_formats_largeutf8", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let data = cast(&data(), &DataType::LargeUtf8).unwrap();
         let batch_len = data.len();
         let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: vec![string_array.clone()],
                         arg_fields: arg_fields.clone(),
@@ -157,13 +163,14 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("to_timestamp_no_formats_utf8view", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let data = cast(&data(), &DataType::Utf8View).unwrap();
         let batch_len = data.len();
         let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: vec![string_array.clone()],
                         arg_fields: arg_fields.clone(),
@@ -177,6 +184,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("to_timestamp_with_formats_utf8", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let (inputs, format1, format2, format3) = data_with_formats();
         let batch_len = inputs.len();
 
@@ -196,7 +204,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: args.clone(),
                         arg_fields: arg_fields.clone(),
@@ -210,6 +218,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("to_timestamp_with_formats_largeutf8", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let (inputs, format1, format2, format3) = data_with_formats();
         let batch_len = inputs.len();
 
@@ -237,7 +246,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: args.clone(),
                         arg_fields: arg_fields.clone(),
@@ -251,6 +260,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("to_timestamp_with_formats_utf8view", |b| {
+        let to_timestamp_udf = Arc::clone(&to_timestamp_udf);
         let (inputs, format1, format2, format3) = data_with_formats();
 
         let batch_len = inputs.len();
@@ -279,7 +289,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
-                to_timestamp()
+                to_timestamp_udf
                     .invoke_with_args(ScalarFunctionArgs {
                         args: args.clone(),
                         arg_fields: arg_fields.clone(),
diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs
index 6e225e0e7038b..d0a6e2be75e0b 100644
--- a/datafusion/functions/benches/trunc.rs
+++ b/datafusion/functions/benches/trunc.rs
@@ -21,7 +21,7 @@ use arrow::{
     datatypes::{Field, Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::trunc;
 use std::hint::black_box;
diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs
index 7328b32574a4a..51ce1da0fa1f9 100644
--- a/datafusion/functions/benches/upper.rs
+++ b/datafusion/functions/benches/upper.rs
@@ -19,7 +19,7 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
diff --git a/datafusion/functions/benches/uuid.rs b/datafusion/functions/benches/uuid.rs
index 1368e2f2af5d1..df9b2bed4be2b 100644
--- a/datafusion/functions/benches/uuid.rs
+++ b/datafusion/functions/benches/uuid.rs
@@ -18,7 +18,7 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use datafusion_functions::string;
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index c4e58601cd106..a0101dc09da91 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -20,11 +20,9 @@
 use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow::error::ArrowError;
 use datafusion_common::{
-    arrow_datafusion_err, exec_err, internal_err, Result, ScalarValue,
-};
-use datafusion_common::{
-    exec_datafusion_err, utils::take_function_args, DataFusionError,
+    Result, ScalarValue, arrow_datafusion_err, exec_err, internal_err,
 };
+use datafusion_common::{exec_datafusion_err, utils::take_function_args};
 use std::any::Any;
 
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
diff --git a/datafusion/functions/src/core/arrow_metadata.rs b/datafusion/functions/src/core/arrow_metadata.rs
new file mode 100644
index 0000000000000..92873889b02c1
--- /dev/null
+++ b/datafusion/functions/src/core/arrow_metadata.rs
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{MapBuilder, StringBuilder};
+use arrow::datatypes::{DataType, Field, Fields};
+use datafusion_common::{Result, ScalarValue, exec_err};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
+};
+use datafusion_macros::user_doc;
+use std::any::Any;
+use std::sync::Arc;
+
+#[user_doc(
+    doc_section(label = "Other Functions"),
+    description = "Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata.",
+    syntax_example = "arrow_metadata(expression, [key])",
+    sql_example = r#"```sql
+> select arrow_metadata(col) from table;
++----------------------------+
+| arrow_metadata(table.col)  |
++----------------------------+
+| {k: v}                     |
++----------------------------+
+> select arrow_metadata(col, 'k') from table;
++-------------------------------+
+| arrow_metadata(table.col, 'k')|
++-------------------------------+
+| v                             |
++-------------------------------+
+```"#,
+    argument(
+        name = "expression",
+        description = "The expression to retrieve metadata from. Can be a column or other expression."
+    ),
+    argument(
+        name = "key",
+        description = "Optional. The specific metadata key to retrieve."
+    )
+)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ArrowMetadataFunc {
+    signature: Signature,
+}
+
+impl ArrowMetadataFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl Default for ArrowMetadataFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ScalarUDFImpl for ArrowMetadataFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "arrow_metadata"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() == 2 {
+            Ok(DataType::Utf8)
+        } else if arg_types.len() == 1 {
+            Ok(DataType::Map(
+                Arc::new(Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("keys", DataType::Utf8, false),
+                        Field::new("values", DataType::Utf8, true),
+                    ])),
+                    false,
+                )),
+                false,
+            ))
+        } else {
+            exec_err!("arrow_metadata requires 1 or 2 arguments")
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let metadata = args.arg_fields[0].metadata();
+
+        if args.args.len() == 2 {
+            let key = match &args.args[1] {
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(k))) => k,
+                _ => {
+                    return exec_err!(
+                        "Second argument to arrow_metadata must be a string literal key"
+                    );
+                }
+            };
+            let value = metadata.get(key).cloned();
+            Ok(ColumnarValue::Scalar(ScalarValue::Utf8(value)))
+        } else if args.args.len() == 1 {
+            let mut map_builder =
+                MapBuilder::new(None, StringBuilder::new(), StringBuilder::new());
+
+            let mut entries: Vec<_> = metadata.iter().collect();
+            entries.sort_by_key(|(k, _)| *k);
+
+            for (k, v) in entries {
+                map_builder.keys().append_value(k);
+                map_builder.values().append_value(v);
+            }
+            map_builder.append(true)?;
+
+            let map_array = map_builder.finish();
+
+            Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(
+                &map_array, 0,
+            )?))
+        } else {
+            exec_err!("arrow_metadata requires 1 or 2 arguments")
+        }
+    }
+}
diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs
index f178890f93704..f34a2abafd793 100644
--- a/datafusion/functions/src/core/arrowtypeof.rs
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{utils::take_function_args, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, utils::take_function_args};
 use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs
index aab1f445d5590..1404f68570974 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow::datatypes::{DataType, Field, FieldRef};
-use datafusion_common::{exec_err, internal_err, plan_err, Result};
+use datafusion_common::{Result, exec_err, internal_err, plan_err};
 use datafusion_expr::binary::try_type_union_resolution;
 use datafusion_expr::conditional_expressions::CaseBuilder;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index d18bd6e31f72e..3e961e4da4e75 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -15,64 +15,76 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
+use std::sync::Arc;
+
 use arrow::array::{
-    make_array, make_comparator, Array, BooleanArray, Capacities, MutableArrayData,
-    Scalar,
+    Array, BooleanArray, Capacities, MutableArrayData, Scalar, make_array,
+    make_comparator,
 };
 use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow_buffer::NullBuffer;
+
 use datafusion_common::cast::{as_map_array, as_struct_array};
 use datafusion_common::{
-    exec_err, internal_err, plan_datafusion_err, utils::take_function_args, Result,
-    ScalarValue,
+    Result, ScalarValue, exec_err, internal_err, plan_datafusion_err,
 };
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::simplify::ExprSimplifyResult;
 use datafusion_expr::{
-    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF,
+    ScalarUDFImpl, Signature, Volatility,
 };
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
-use std::any::Any;
-use std::sync::Arc;
 
 #[user_doc(
     doc_section(label = "Other Functions"),
     description = r#"Returns a field within a map or a struct with the given key.
+    Supports nested field access by providing multiple field names.
     Note: most users invoke `get_field` indirectly via field access
     syntax such as `my_struct_col['field_name']` which results in a call to
-    `get_field(my_struct_col, 'field_name')`."#,
-    syntax_example = "get_field(expression1, expression2)",
+    `get_field(my_struct_col, 'field_name')`.
+    Nested access like `my_struct['a']['b']` is optimized to a single call:
+    `get_field(my_struct, 'a', 'b')`."#,
+    syntax_example = "get_field(expression, field_name[, field_name2, ...])",
     sql_example = r#"```sql
-> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
-> select struct(idx, v) from t as c;
-+-------------------------+
-| struct(c.idx,c.v)       |
-+-------------------------+
-| {c0: data, c1: fusion}  |
-| {c0: apache, c1: arrow} |
-+-------------------------+
-> select get_field((select struct(idx, v) from t), 'c0');
-+-----------------------+
-| struct(t.idx,t.v)[c0] |
-+-----------------------+
-| data                  |
-| apache                |
-+-----------------------+
-> select get_field((select struct(idx, v) from t), 'c1');
-+-----------------------+
-| struct(t.idx,t.v)[c1] |
-+-----------------------+
-| fusion                |
-| arrow                 |
-+-----------------------+
+> -- Access a field from a struct column
+> create table test( struct_col) as values
+    ({name: 'Alice', age: 30}),
+    ({name: 'Bob', age: 25});
+> select struct_col from test;
++-----------------------------+
+| struct_col                  |
++-----------------------------+
+| {name: Alice, age: 30}      |
+| {name: Bob, age: 25}        |
++-----------------------------+
+> select struct_col['name'] as name from test;
++-------+
+| name  |
++-------+
+| Alice |
+| Bob   |
++-------+
+
+> -- Nested field access with multiple arguments
+> create table test(struct_col) as values
+    ({outer: {inner_val: 42}});
+> select struct_col['outer']['inner_val'] as result from test;
++--------+
+| result |
++--------+
+| 42     |
++--------+
 ```"#,
     argument(
-        name = "expression1",
-        description = "The map or struct to retrieve a field for."
+        name = "expression",
+        description = "The map or struct to retrieve a field from."
     ),
     argument(
-        name = "expression2",
-        description = "The field name in the map or struct to retrieve data for. Must evaluate to a string."
+        name = "field_name",
+        description = "The field name(s) to access, in order for nested access. Must evaluate to strings."
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
@@ -86,10 +98,144 @@ impl Default for GetFieldFunc {
     }
 }
 
+/// Process a map array by finding matching keys and extracting corresponding values.
+///
+/// This function handles both simple (scalar) and nested key types by using
+/// appropriate comparison strategies.
+fn process_map_array(
+    array: &dyn Array,
+    key_array: Arc<dyn Array>,
+) -> Result<ColumnarValue> {
+    let map_array = as_map_array(array)?;
+    let keys = if key_array.data_type().is_nested() {
+        let comparator = make_comparator(
+            map_array.keys().as_ref(),
+            key_array.as_ref(),
+            SortOptions::default(),
+        )?;
+        let len = map_array.keys().len().min(key_array.len());
+        let values = (0..len).map(|i| comparator(i, i).is_eq()).collect();
+        let nulls = NullBuffer::union(map_array.keys().nulls(), key_array.nulls());
+        BooleanArray::new(values, nulls)
+    } else {
+        let be_compared = Scalar::new(key_array);
+        arrow::compute::kernels::cmp::eq(&be_compared, map_array.keys())?
+    };
+
+    let original_data = map_array.entries().column(1).to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+
+    for entry in 0..map_array.len() {
+        let start = map_array.value_offsets()[entry] as usize;
+        let end = map_array.value_offsets()[entry + 1] as usize;
+
+        let maybe_matched = keys
+            .slice(start, end - start)
+            .iter()
+            .enumerate()
+            .find(|(_, t)| t.unwrap());
+
+        if maybe_matched.is_none() {
+            mutable.extend_nulls(1);
+            continue;
+        }
+        let (match_offset, _) = maybe_matched.unwrap();
+        mutable.extend(0, start + match_offset, start + match_offset + 1);
+    }
+
+    let data = mutable.freeze();
+    let data = make_array(data);
+    Ok(ColumnarValue::Array(data))
+}
+
+/// Process a map array with a nested key type by iterating through entries
+/// and using a comparator for key matching.
+///
+/// This specialized version is used when the key type is nested (e.g., struct, list).
+fn process_map_with_nested_key(
+    array: &dyn Array,
+    key_array: &dyn Array,
+) -> Result<ColumnarValue> {
+    let map_array = as_map_array(array)?;
+
+    let comparator =
+        make_comparator(map_array.keys().as_ref(), key_array, SortOptions::default())?;
+
+    let original_data = map_array.entries().column(1).to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+
+    for entry in 0..map_array.len() {
+        let start = map_array.value_offsets()[entry] as usize;
+        let end = map_array.value_offsets()[entry + 1] as usize;
+
+        let mut found_match = false;
+        for i in start..end {
+            if comparator(i, 0).is_eq() {
+                mutable.extend(0, i, i + 1);
+                found_match = true;
+                break;
+            }
+        }
+
+        if !found_match {
+            mutable.extend_nulls(1);
+        }
+    }
+
+    let data = mutable.freeze();
+    let data = make_array(data);
+    Ok(ColumnarValue::Array(data))
+}
+
+/// Extract a single field from a struct or map array
+fn extract_single_field(base: ColumnarValue, name: ScalarValue) -> Result<ColumnarValue> {
+    let arrays = ColumnarValue::values_to_arrays(&[base])?;
+    let array = Arc::clone(&arrays[0]);
+
+    let string_value = name.try_as_str().flatten().map(|s| s.to_string());
+
+    match (array.data_type(), name, string_value) {
+        (DataType::Map(_, _), ScalarValue::List(arr), _) => {
+            let key_array: Arc<dyn Array> = arr;
+            process_map_array(&array, key_array)
+        }
+        (DataType::Map(_, _), ScalarValue::Struct(arr), _) => {
+            process_map_array(&array, arr as Arc<dyn Array>)
+        }
+        (DataType::Map(_, _), other, _) => {
+            let data_type = other.data_type();
+            if data_type.is_nested() {
+                process_map_with_nested_key(&array, &other.to_array()?)
+            } else {
+                process_map_array(&array, other.to_array()?)
+            }
+        }
+        (DataType::Struct(_), _, Some(k)) => {
+            let as_struct_array = as_struct_array(&array)?;
+            match as_struct_array.column_by_name(&k) {
+                None => exec_err!("Field {k} not found in struct"),
+                Some(col) => Ok(ColumnarValue::Array(Arc::clone(col))),
+            }
+        }
+        (DataType::Struct(_), name, _) => exec_err!(
+            "get_field is only possible on struct with utf8 indexes. \
+                         Received with {name:?} index"
+        ),
+        (DataType::Null, _, _) => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
+        (dt, name, _) => exec_err!(
+            "get_field is only possible on maps or structs. Received {dt} with {name:?} index"
+        ),
+    }
+}
+
 impl GetFieldFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::any(2, Volatility::Immutable),
+            signature: Signature::user_defined(Volatility::Immutable),
         }
     }
 }
@@ -105,24 +251,47 @@ impl ScalarUDFImpl for GetFieldFunc {
     }
 
     fn display_name(&self, args: &[Expr]) -> Result<String> {
-        let [base, field_name] = take_function_args(self.name(), args)?;
+        if args.len() < 2 {
+            return exec_err!(
+                "get_field requires at least 2 arguments, got {}",
+                args.len()
+            );
+        }
 
-        let name = match field_name {
-            Expr::Literal(name, _) => name.to_string(),
-            other => other.schema_name().to_string(),
-        };
+        let base = &args[0];
+        let field_names: Vec<String> = args[1..]
+            .iter()
+            .map(|f| match f {
+                Expr::Literal(name, _) => name.to_string(),
+                other => other.schema_name().to_string(),
+            })
+            .collect();
 
-        Ok(format!("{base}[{name}]"))
+        Ok(format!("{}[{}]", base, field_names.join("][")))
     }
 
     fn schema_name(&self, args: &[Expr]) -> Result<String> {
-        let [base, field_name] = take_function_args(self.name(), args)?;
-        let name = match field_name {
-            Expr::Literal(name, _) => name.to_string(),
-            other => other.schema_name().to_string(),
-        };
+        if args.len() < 2 {
+            return exec_err!(
+                "get_field requires at least 2 arguments, got {}",
+                args.len()
+            );
+        }
 
-        Ok(format!("{}[{}]", base.schema_name(), name))
+        let base = &args[0];
+        let field_names: Vec<String> = args[1..]
+            .iter()
+            .map(|f| match f {
+                Expr::Literal(name, _) => name.to_string(),
+                other => other.schema_name().to_string(),
+            })
+            .collect();
+
+        Ok(format!(
+            "{}[{}]",
+            base.schema_name(),
+            field_names.join("][")
+        ))
     }
 
     fn signature(&self) -> &Signature {
@@ -134,153 +303,243 @@ impl ScalarUDFImpl for GetFieldFunc {
     }
 
     fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
-        // Length check handled in the signature
-        debug_assert_eq!(args.scalar_arguments.len(), 2);
-
-        match (&args.arg_fields[0].data_type(), args.scalar_arguments[1].as_ref()) {
-            (DataType::Map(fields, _), _) => {
-                match fields.data_type() {
-                    DataType::Struct(fields) if fields.len() == 2 => {
-                        // Arrow's MapArray is essentially a ListArray of structs with two columns. They are
-                        // often named "key", and "value", but we don't require any specific naming here;
-                        // instead, we assume that the second column is the "value" column both here and in
-                        // execution.
-                        let value_field = fields.get(1).expect("fields should have exactly two members");
-
-                        Ok(value_field.as_ref().clone().with_nullable(true).into())
-                    },
-                    _ => exec_err!("Map fields must contain a Struct with exactly 2 fields"),
+        // Validate minimum 2 arguments: base expression + at least one field name
+        if args.scalar_arguments.len() < 2 {
+            return exec_err!(
+                "get_field requires at least 2 arguments, got {}",
+                args.scalar_arguments.len()
+            );
+        }
+
+        let mut current_field = Arc::clone(&args.arg_fields[0]);
+
+        // Iterate through each field name (starting from index 1)
+        for (i, sv) in args.scalar_arguments.iter().enumerate().skip(1) {
+            match current_field.data_type() {
+                DataType::Map(map_field, _) => {
+                    match map_field.data_type() {
+                        DataType::Struct(fields) if fields.len() == 2 => {
+                            // Arrow's MapArray is essentially a ListArray of structs with two columns. They are
+                            // often named "key", and "value", but we don't require any specific naming here;
+                            // instead, we assume that the second column is the "value" column both here and in
+                            // execution.
+                            let value_field = fields
+                                .get(1)
+                                .expect("fields should have exactly two members");
+
+                            current_field = Arc::new(
+                                value_field.as_ref().clone().with_nullable(true),
+                            );
+                        }
+                        _ => {
+                            return exec_err!(
+                                "Map fields must contain a Struct with exactly 2 fields"
+                            );
+                        }
+                    }
+                }
+                DataType::Struct(fields) => {
+                    let field_name = sv
+                        .as_ref()
+                        .and_then(|sv| {
+                            sv.try_as_str().flatten().filter(|s| !s.is_empty())
+                        })
+                        .ok_or_else(|| {
+                            datafusion_common::DataFusionError::Execution(
+                                "Field name must be a non-empty string".to_string(),
+                            )
+                        })?;
+
+                    let child_field = fields
+                        .iter()
+                        .find(|f| f.name() == field_name)
+                        .ok_or_else(|| {
+                            plan_datafusion_err!("Field {field_name} not found in struct")
+                        })?;
+
+                    let mut new_field = child_field.as_ref().clone();
+
+                    // If the parent is nullable, then getting the child must be nullable
+                    if current_field.is_nullable() {
+                        new_field = new_field.with_nullable(true);
+                    }
+                    current_field = Arc::new(new_field);
+                }
+                DataType::Null => {
+                    return Ok(Field::new(self.name(), DataType::Null, true).into());
+                }
+                other => {
+                    return exec_err!(
+                        "Cannot access field at argument {}: type {} is not Struct, Map, or Null",
+                        i,
+                        other
+                    );
                 }
             }
-            (DataType::Struct(fields),sv) => {
-                sv.and_then(|sv| sv.try_as_str().flatten().filter(|s| !s.is_empty()))
-                .map_or_else(
-                    || exec_err!("Field name must be a non-empty string"),
-                    |field_name| {
-                    fields.iter().find(|f| f.name() == field_name)
-                    .ok_or(plan_datafusion_err!("Field {field_name} not found in struct"))
-                    .map(|f| {
-                        let mut child_field = f.as_ref().clone();
-
-                        // If the parent is nullable, then getting the child must be nullable,
-                        // so potentially override the return value
-
-                        if args.arg_fields[0].is_nullable() {
-                            child_field = child_field.with_nullable(true);
-                        }
-                        Arc::new(child_field)
-                    })
-                })
-            },
-            (DataType::Null, _) => Ok(Field::new(self.name(), DataType::Null, true).into()),
-            (other, _) => exec_err!("The expression to get an indexed field is only valid for `Struct`, `Map` or `Null` types, got {other}"),
         }
+
+        Ok(current_field)
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        let [base, field_name] = take_function_args(self.name(), args.args)?;
+        if args.args.len() < 2 {
+            return exec_err!(
+                "get_field requires at least 2 arguments, got {}",
+                args.args.len()
+            );
+        }
 
-        if base.data_type().is_null() {
+        let mut current = args.args[0].clone();
+
+        // Early exit for null base
+        if current.data_type().is_null() {
             return Ok(ColumnarValue::Scalar(ScalarValue::Null));
         }
 
-        let arrays =
-            ColumnarValue::values_to_arrays(&[base.clone(), field_name.clone()])?;
-        let array = Arc::clone(&arrays[0]);
-        let name = match field_name {
-            ColumnarValue::Scalar(name) => name,
-            _ => {
-                return exec_err!(
-                    "get_field function requires the argument field_name to be a string"
-                );
+        // Iterate through each field name
+        for field_name in args.args.iter().skip(1) {
+            let field_name_scalar = match field_name {
+                ColumnarValue::Scalar(name) => name.clone(),
+                _ => {
+                    return exec_err!(
+                        "get_field function requires all field_name arguments to be scalars"
+                    );
+                }
+            };
+
+            current = extract_single_field(current, field_name_scalar)?;
+
+            // Early exit if we hit null
+            if current.data_type().is_null() {
+                return Ok(ColumnarValue::Scalar(ScalarValue::Null));
             }
-        };
+        }
 
-        fn process_map_array(
-            array: Arc<dyn Array>,
-            key_array: Arc<dyn Array>,
-        ) -> Result<ColumnarValue> {
-            let map_array = as_map_array(array.as_ref())?;
-            let keys = if key_array.data_type().is_nested() {
-                let comparator = make_comparator(
-                    map_array.keys().as_ref(),
-                    key_array.as_ref(),
-                    SortOptions::default(),
-                )?;
-                let len = map_array.keys().len().min(key_array.len());
-                let values = (0..len).map(|i| comparator(i, i).is_eq()).collect();
-                let nulls =
-                    NullBuffer::union(map_array.keys().nulls(), key_array.nulls());
-                BooleanArray::new(values, nulls)
-            } else {
-                let be_compared = Scalar::new(key_array);
-                arrow::compute::kernels::cmp::eq(&be_compared, map_array.keys())?
-            };
+        Ok(current)
+    }
 
-            let original_data = map_array.entries().column(1).to_data();
-            let capacity = Capacities::Array(original_data.len());
-            let mut mutable =
-                MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        _info: &dyn datafusion_expr::simplify::SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        // Need at least 2 args (base + field)
+        if args.len() < 2 {
+            return Ok(ExprSimplifyResult::Original(args));
+        }
 
-            for entry in 0..map_array.len() {
-                let start = map_array.value_offsets()[entry] as usize;
-                let end = map_array.value_offsets()[entry + 1] as usize;
+        // Flatten all nested get_field calls in a single pass
+        // Pattern: get_field(get_field(get_field(base, a), b), c) => get_field(base, a, b, c)
 
-                let maybe_matched = keys
-                    .slice(start, end - start)
-                    .iter()
-                    .enumerate()
-                    .find(|(_, t)| t.unwrap());
+        // Collect path arguments from all nested levels
+        let mut path_args_stack = Vec::new();
+        let mut current_expr = &args[0];
 
-                if maybe_matched.is_none() {
-                    mutable.extend_nulls(1);
-                    continue;
-                }
-                let (match_offset, _) = maybe_matched.unwrap();
-                mutable.extend(0, start + match_offset, start + match_offset + 1);
+        // Push the outermost path arguments first
+        path_args_stack.push(&args[1..]);
+
+        // Walk down the chain of nested get_field calls
+        let base_expr = loop {
+            if let Expr::ScalarFunction(ScalarFunction {
+                func,
+                args: inner_args,
+            }) = current_expr
+                && func
+                    .inner()
+                    .as_any()
+                    .downcast_ref::<GetFieldFunc>()
+                    .is_some()
+            {
+                // Store this level's path arguments (all except the first, which is base/nested call)
+                path_args_stack.push(&inner_args[1..]);
+
+                // Move to the next level down
+                current_expr = &inner_args[0];
+                continue;
             }
+            // Not a get_field call, this is the base expression
+            break current_expr;
+        };
 
-            let data = mutable.freeze();
-            let data = make_array(data);
-            Ok(ColumnarValue::Array(data))
+        // If no nested get_field calls were found, return original
+        if path_args_stack.len() == args.len() - 1 {
+            return Ok(ExprSimplifyResult::Original(args));
         }
 
-        match (array.data_type(), name) {
-            (DataType::Map(_, _), ScalarValue::List(arr)) => {
-                let key_array: Arc<dyn Array> = arr;
-                process_map_array(array, key_array)
-            }
-            (DataType::Map(_, _), ScalarValue::Struct(arr)) => {
-                process_map_array(array, arr as Arc<dyn Array>)
-            }
-            (DataType::Map(_, _), other) => {
-                let data_type = other.data_type();
-                if data_type.is_nested() {
-                    exec_err!("unsupported type {} for map access", data_type)
-                } else {
-                    process_map_array(array, other.to_array()?)
-                }
-            }
-            (DataType::Struct(_), ScalarValue::Utf8(Some(k))) => {
-                let as_struct_array = as_struct_array(&array)?;
-                match as_struct_array.column_by_name(&k) {
-                    None => exec_err!("get indexed field {k} not found in struct"),
-                    Some(col) => Ok(ColumnarValue::Array(Arc::clone(col))),
-                }
-            }
-            (DataType::Struct(_), name) => exec_err!(
-                "get_field is only possible on struct with utf8 indexes. \
-                             Received with {name:?} index"
-            ),
-            (DataType::Null, _) => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
-            (dt, name) => exec_err!(
-                "get_field is only possible on maps with utf8 indexes or struct \
-                                         with utf8 indexes. Received {dt} with {name:?} index"
+        // If we found any nested get_field calls, flatten them
+        // Build merged args: [base, ...all_path_args_in_correct_order]
+        let mut merged_args = vec![base_expr.clone()];
+
+        // Add path args in reverse order (innermost to outermost)
+        // Stack is: [outermost_paths, ..., innermost_paths]
+        // We want: [base, innermost_paths, ..., outermost_paths]
+        for path_slice in path_args_stack.iter().rev() {
+            merged_args.extend_from_slice(path_slice);
+        }
+
+        Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction(
+            ScalarFunction::new_udf(
+                Arc::new(ScalarUDF::new_from_impl(GetFieldFunc::new())),
+                merged_args,
             ),
+        )))
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.len() < 2 {
+            return exec_err!(
+                "get_field requires at least 2 arguments, got {}",
+                arg_types.len()
+            );
         }
+        // Accept types as-is, validation happens in return_field_from_args
+        Ok(arg_types.to_vec())
     }
 
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::{ArrayRef, Int32Array, StructArray};
+    use arrow::datatypes::Fields;
+
+    #[test]
+    fn test_get_field_utf8view_key() -> Result<()> {
+        // Create a struct array with fields "a" and "b"
+        let a_values = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
+        let b_values = Int32Array::from(vec![Some(10), Some(20), Some(30)]);
+
+        let fields: Fields = vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]
+        .into();
+
+        let struct_array = StructArray::new(
+            fields,
+            vec![
+                Arc::new(a_values) as ArrayRef,
+                Arc::new(b_values) as ArrayRef,
+            ],
+            None,
+        );
+
+        let base = ColumnarValue::Array(Arc::new(struct_array));
+
+        // Use Utf8View key to access field "a"
+        let key = ScalarValue::Utf8View(Some("a".to_string()));
+
+        let result = extract_single_field(base, key)?;
+
+        let result_array = result.into_array(3)?;
+        let expected = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
+
+        assert_eq!(result_array.as_ref(), &expected as &dyn Array);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions/src/core/greatest.rs b/datafusion/functions/src/core/greatest.rs
index 6afc5b25512f4..fb7592c6290ad 100644
--- a/datafusion/functions/src/core/greatest.rs
+++ b/datafusion/functions/src/core/greatest.rs
@@ -16,12 +16,12 @@
 // under the License.
 
 use crate::core::greatest_least_utils::GreatestLeastOperator;
-use arrow::array::{make_comparator, Array, BooleanArray};
+use arrow::array::{Array, BooleanArray, make_comparator};
 use arrow::buffer::BooleanBuffer;
-use arrow::compute::kernels::cmp;
 use arrow::compute::SortOptions;
+use arrow::compute::kernels::cmp;
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err};
 use datafusion_doc::Documentation;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -90,11 +90,7 @@ impl GreatestLeastOperator for GreatestFunc {
             SORT_OPTIONS,
         )?;
 
-        if cmp(0, 0).is_ge() {
-            Ok(lhs)
-        } else {
-            Ok(rhs)
-        }
+        if cmp(0, 0).is_ge() { Ok(lhs) } else { Ok(rhs) }
     }
 
     /// Return boolean array where `arr[i] = lhs[i] >= rhs[i]` for all i, where `arr` is the result array
@@ -113,11 +109,11 @@ impl GreatestLeastOperator for GreatestFunc {
 
         let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?;
 
-        if lhs.len() != rhs.len() {
-            return internal_err!(
-                "All arrays should have the same length for greatest comparison"
-            );
-        }
+        assert_eq_or_internal_err!(
+            lhs.len(),
+            rhs.len(),
+            "All arrays should have the same length for greatest comparison"
+        );
 
         let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_ge());
 
diff --git a/datafusion/functions/src/core/greatest_least_utils.rs b/datafusion/functions/src/core/greatest_least_utils.rs
index 46b3645e703a2..5f8b4a51186fe 100644
--- a/datafusion/functions/src/core/greatest_least_utils.rs
+++ b/datafusion/functions/src/core/greatest_least_utils.rs
@@ -18,7 +18,7 @@
 use arrow::array::{Array, ArrayRef, BooleanArray};
 use arrow::compute::kernels::zip::zip;
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_or_internal_err, plan_err};
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
 use std::sync::Arc;
@@ -36,11 +36,11 @@ pub(super) trait GreatestLeastOperator {
 }
 
 fn keep_array<Op: GreatestLeastOperator>(
-    lhs: ArrayRef,
-    rhs: ArrayRef,
+    lhs: &dyn Array,
+    rhs: &dyn Array,
 ) -> Result<ArrayRef> {
     // True for values that we should keep from the left array
-    let keep_lhs = Op::get_indexes_to_keep(lhs.as_ref(), rhs.as_ref())?;
+    let keep_lhs = Op::get_indexes_to_keep(lhs, rhs)?;
 
     let result = zip(&keep_lhs, &lhs, &rhs)?;
 
@@ -50,12 +50,11 @@ fn keep_array<Op: GreatestLeastOperator>(
 pub(super) fn execute_conditional<Op: GreatestLeastOperator>(
     args: &[ColumnarValue],
 ) -> Result<ColumnarValue> {
-    if args.is_empty() {
-        return internal_err!(
-            "{} was called with no arguments. It requires at least 1.",
-            Op::NAME
-        );
-    }
+    assert_or_internal_err!(
+        !args.is_empty(),
+        "{} was called with no arguments. It requires at least 1.",
+        Op::NAME
+    );
 
     // Some engines (e.g. SQL Server) allow greatest/least with single arg, it's a noop
     if args.len() == 1 {
@@ -101,8 +100,8 @@ pub(super) fn execute_conditional<Op: GreatestLeastOperator>(
 
         // Start with the result value
         result = keep_array::<Op>(
-            Arc::clone(first_array),
-            result_scalar.to_array_of_size(first_array.len())?,
+            first_array,
+            &result_scalar.to_array_of_size(first_array.len())?,
         )?;
     } else {
         // If we only have arrays, start with the first array
@@ -111,7 +110,7 @@ pub(super) fn execute_conditional<Op: GreatestLeastOperator>(
     }
 
     for array in arrays_iter {
-        result = keep_array::<Op>(Arc::clone(array), result)?;
+        result = keep_array::<Op>(array, &result)?;
     }
 
     Ok(ColumnarValue::Array(result))
diff --git a/datafusion/functions/src/core/least.rs b/datafusion/functions/src/core/least.rs
index 31cdf54441117..fc67924888a73 100644
--- a/datafusion/functions/src/core/least.rs
+++ b/datafusion/functions/src/core/least.rs
@@ -16,12 +16,12 @@
 // under the License.
 
 use crate::core::greatest_least_utils::GreatestLeastOperator;
-use arrow::array::{make_comparator, Array, BooleanArray};
+use arrow::array::{Array, BooleanArray, make_comparator};
 use arrow::buffer::BooleanBuffer;
-use arrow::compute::kernels::cmp;
 use arrow::compute::SortOptions;
+use arrow::compute::kernels::cmp;
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err};
 use datafusion_doc::Documentation;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -103,11 +103,7 @@ impl GreatestLeastOperator for LeastFunc {
             SORT_OPTIONS,
         )?;
 
-        if cmp(0, 0).is_le() {
-            Ok(lhs)
-        } else {
-            Ok(rhs)
-        }
+        if cmp(0, 0).is_le() { Ok(lhs) } else { Ok(rhs) }
     }
 
     /// Return boolean array where `arr[i] = lhs[i] <= rhs[i]` for all i, where `arr` is the result array
@@ -126,11 +122,11 @@ impl GreatestLeastOperator for LeastFunc {
 
         let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?;
 
-        if lhs.len() != rhs.len() {
-            return internal_err!(
-                "All arrays should have the same length for least comparison"
-            );
-        }
+        assert_eq_or_internal_err!(
+            lhs.len(),
+            rhs.len(),
+            "All arrays should have the same length for least comparison"
+        );
 
         let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_le());
 
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index db080cd628478..a14d563737240 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -21,6 +21,7 @@ use datafusion_expr::ScalarUDF;
 use std::sync::Arc;
 
 pub mod arrow_cast;
+pub mod arrow_metadata;
 pub mod arrowtypeof;
 pub mod coalesce;
 pub mod expr_ext;
@@ -55,6 +56,7 @@ make_udf_function!(least::LeastFunc, least);
 make_udf_function!(union_extract::UnionExtractFun, union_extract);
 make_udf_function!(union_tag::UnionTagFunc, union_tag);
 make_udf_function!(version::VersionFunc, version);
+make_udf_function!(arrow_metadata::ArrowMetadataFunc, arrow_metadata);
 
 pub mod expr_fn {
     use datafusion_expr::{Expr, Literal};
@@ -83,6 +85,10 @@ pub mod expr_fn {
         arrow_typeof,
         "Returns the Arrow type of the input expression.",
         arg1
+    ),(
+        arrow_metadata,
+        "Returns the metadata of the input expression",
+        args,
     ),(
         r#struct,
         "Returns a struct with the given arguments",
@@ -110,11 +116,20 @@ pub mod expr_fn {
     ));
 
     #[doc = "Returns the value of the field with the given name from the struct"]
+    #[expect(clippy::needless_pass_by_value)]
     pub fn get_field(arg1: Expr, arg2: impl Literal) -> Expr {
         super::get_field().call(vec![arg1, arg2.lit()])
     }
 
+    #[doc = "Returns the value of nested fields by traversing multiple field names"]
+    pub fn get_field_path(base: Expr, field_names: Vec<Expr>) -> Expr {
+        let mut args = vec![base];
+        args.extend(field_names);
+        super::get_field().call(args)
+    }
+
     #[doc = "Returns the value of the field with the given name from the union when it's selected, or NULL otherwise"]
+    #[expect(clippy::needless_pass_by_value)]
     pub fn union_extract(arg1: Expr, arg2: impl Literal) -> Expr {
         super::union_extract().call(vec![arg1, arg2.lit()])
     }
@@ -125,6 +140,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         nullif(),
         arrow_cast(),
+        arrow_metadata(),
         nvl(),
         nvl2(),
         overlay(),
diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs
index 1da5148474f8c..933151fcbd31e 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -17,7 +17,7 @@
 
 use arrow::array::StructArray;
 use arrow::datatypes::{DataType, Field, FieldRef, Fields};
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs,
 };
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 69d86360cb3cb..a8a512d35a36b 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -20,7 +20,7 @@ use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
 
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::kernels::nullif::nullif;
-use datafusion_common::{utils::take_function_args, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, utils::take_function_args};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
 use std::any::Any;
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index 45cb6760d062d..eda59fe07f57e 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -16,13 +16,13 @@
 // under the License.
 
 use arrow::datatypes::{DataType, Field, FieldRef};
-use datafusion_common::{internal_err, utils::take_function_args, Result};
+use datafusion_common::{Result, internal_err, utils::take_function_args};
 use datafusion_expr::{
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, Volatility,
     conditional_expressions::CaseBuilder,
     simplify::{ExprSimplifyResult, SimplifyInfo},
     type_coercion::binary::comparison_coercion,
-    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
-    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 
diff --git a/datafusion/functions/src/core/overlay.rs b/datafusion/functions/src/core/overlay.rs
index 165bc571afe09..179ad9a460a08 100644
--- a/datafusion/functions/src/core/overlay.rs
+++ b/datafusion/functions/src/core/overlay.rs
@@ -25,7 +25,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::{
     as_generic_string_array, as_int64_array, as_string_view_array,
 };
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
@@ -201,7 +201,7 @@ fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         3 => {
             let string_array = as_generic_string_array::<T>(&args[0])?;
@@ -227,7 +227,7 @@ pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
     }
 }
 
-pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         3 => {
             let string_array = as_string_view_array(&args[0])?;
diff --git a/datafusion/functions/src/core/planner.rs b/datafusion/functions/src/core/planner.rs
index 227e401156173..4d6b744b3e68b 100644
--- a/datafusion/functions/src/core/planner.rs
+++ b/datafusion/functions/src/core/planner.rs
@@ -20,7 +20,7 @@ use datafusion_common::Result;
 use datafusion_common::{Column, DFSchema, ScalarValue, TableReference};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawDictionaryExpr};
-use datafusion_expr::{lit, Expr};
+use datafusion_expr::{Expr, lit};
 
 use super::named_struct;
 
diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs
index 32c7af80e397f..352f258643921 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -17,7 +17,7 @@
 
 use arrow::array::StructArray;
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs
index a71e2e87388d5..56d4f23cc4e2e 100644
--- a/datafusion/functions/src/core/union_extract.rs
+++ b/datafusion/functions/src/core/union_extract.rs
@@ -20,7 +20,7 @@ use arrow::datatypes::{DataType, Field, FieldRef, UnionFields};
 use datafusion_common::cast::as_union_array;
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, Result, ScalarValue,
+    Result, ScalarValue, exec_datafusion_err, exec_err, internal_err,
 };
 use datafusion_doc::Documentation;
 use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs};
@@ -117,9 +117,16 @@ impl ScalarUDFImpl for UnionExtractFun {
         let [array, target_name] = take_function_args("union_extract", args.args)?;
 
         let target_name = match target_name {
-            ColumnarValue::Scalar(ScalarValue::Utf8(Some(target_name))) => Ok(target_name),
-            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => exec_err!("union_extract second argument must be a non-null string literal, got a null instead"),
-            _ => exec_err!("union_extract second argument must be a non-null string literal, got {} instead", target_name.data_type()),
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some(target_name))) => {
+                Ok(target_name)
+            }
+            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => exec_err!(
+                "union_extract second argument must be a non-null string literal, got a null instead"
+            ),
+            _ => exec_err!(
+                "union_extract second argument must be a non-null string literal, got {} instead",
+                target_name.data_type()
+            ),
         }?;
 
         match array {
diff --git a/datafusion/functions/src/core/union_tag.rs b/datafusion/functions/src/core/union_tag.rs
index aeadb8292ba1e..809679dea6465 100644
--- a/datafusion/functions/src/core/union_tag.rs
+++ b/datafusion/functions/src/core/union_tag.rs
@@ -18,7 +18,7 @@
 use arrow::array::{Array, AsArray, DictionaryArray, Int8Array, StringArray};
 use arrow::datatypes::DataType;
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_datafusion_err, exec_err};
 use datafusion_doc::Documentation;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -156,8 +156,8 @@ impl ScalarUDFImpl for UnionTagFunc {
 mod tests {
     use super::UnionTagFunc;
     use arrow::datatypes::{DataType, Field, UnionFields, UnionMode};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
     use std::sync::Arc;
 
diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs
index ef3c5aafa4801..436551184f609 100644
--- a/datafusion/functions/src/core/version.rs
+++ b/datafusion/functions/src/core/version.rs
@@ -18,7 +18,7 @@
 //! [`VersionFunc`]: Implementation of the `version` function.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{utils::take_function_args, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, utils::take_function_args};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     Volatility,
@@ -53,7 +53,7 @@ impl Default for VersionFunc {
 impl VersionFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::exact(vec![], Volatility::Immutable),
+            signature: Signature::nullary(Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/crypto/basic.rs b/datafusion/functions/src/crypto/basic.rs
index 5bf83943a92da..3cb53d3b60789 100644
--- a/datafusion/functions/src/crypto/basic.rs
+++ b/datafusion/functions/src/crypto/basic.rs
@@ -18,10 +18,8 @@
 //! "crypto" DataFusion functions
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BinaryArrayType, BinaryViewArray, GenericBinaryArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType, StringViewArray,
 };
-use arrow::array::{AsArray, GenericStringArray, StringViewArray};
 use arrow::datatypes::DataType;
 use blake2::{Blake2b512, Blake2s256, Digest};
 use blake3::Hasher as Blake3;
@@ -29,8 +27,8 @@ use datafusion_common::cast::as_binary_array;
 
 use arrow::compute::StringArrayType;
 use datafusion_common::{
-    exec_err, internal_err, plan_err, utils::take_function_args, DataFusionError, Result,
-    ScalarValue,
+    DataFusionError, Result, ScalarValue, exec_err, internal_err, plan_err,
+    utils::take_function_args,
 };
 use datafusion_expr::ColumnarValue;
 use md5::Md5;
@@ -84,17 +82,7 @@ define_digest_function!(
     "computes blake3 hash digest of the given input"
 );
 
-macro_rules! digest_to_scalar {
-    ($METHOD: ident, $INPUT:expr) => {{
-        ScalarValue::Binary($INPUT.as_ref().map(|v| {
-            let mut digest = $METHOD::default();
-            digest.update(v);
-            digest.finalize().as_slice().to_vec()
-        }))
-    }};
-}
-
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
 pub enum DigestAlgorithm {
     Md5,
     Sha224,
@@ -106,23 +94,6 @@ pub enum DigestAlgorithm {
     Blake3,
 }
 
-/// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`].
-/// Second argument is the algorithm to use.
-/// Standard algorithms are md5, sha1, sha224, sha256, sha384 and sha512.
-pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let [data, digest_algorithm] = take_function_args("digest", args)?;
-    let digest_algorithm = match digest_algorithm {
-        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
-            Some(Some(method)) => method.parse::<DigestAlgorithm>(),
-            _ => exec_err!("Unsupported data type {scalar:?} for function digest"),
-        },
-        ColumnarValue::Array(_) => {
-            internal_err!("Digest using dynamically decided method is not yet supported")
-        }
-    }?;
-    digest_process(data, digest_algorithm)
-}
-
 impl FromStr for DigestAlgorithm {
     type Err = DataFusionError;
     fn from_str(name: &str) -> Result<DigestAlgorithm> {
@@ -182,7 +153,7 @@ pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
             ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode::<_>)))
         }
-        _ => return exec_err!("Impossibly got invalid results from digest"),
+        _ => return internal_err!("Impossibly got invalid results from digest"),
     })
 }
 
@@ -197,25 +168,7 @@ fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
     }
     s
 }
-pub fn utf8_or_binary_to_binary_type(
-    arg_type: &DataType,
-    name: &str,
-) -> Result<DataType> {
-    Ok(match arg_type {
-        DataType::Utf8View
-        | DataType::LargeUtf8
-        | DataType::Utf8
-        | DataType::Binary
-        | DataType::BinaryView
-        | DataType::LargeBinary => DataType::Binary,
-        DataType::Null => DataType::Null,
-        _ => {
-            return plan_err!(
-                "The {name:?} function can only accept strings or binary arrays."
-            );
-        }
-    })
-}
+
 macro_rules! digest_to_array {
     ($METHOD:ident, $INPUT:expr) => {{
         let binary_array: BinaryArray = $INPUT
@@ -231,9 +184,20 @@ macro_rules! digest_to_array {
         Arc::new(binary_array)
     }};
 }
+
+macro_rules! digest_to_scalar {
+    ($METHOD: ident, $INPUT:expr) => {{
+        ScalarValue::Binary($INPUT.as_ref().map(|v| {
+            let mut digest = $METHOD::default();
+            digest.update(v);
+            digest.finalize().as_slice().to_vec()
+        }))
+    }};
+}
+
 impl DigestAlgorithm {
     /// digest an optional string to its hash value, null values are returned as is
-    pub fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
+    fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
         ColumnarValue::Scalar(match self {
             Self::Md5 => digest_to_scalar!(Md5, value),
             Self::Sha224 => digest_to_scalar!(Sha224, value),
@@ -250,51 +214,9 @@ impl DigestAlgorithm {
         })
     }
 
-    /// digest a binary array to their hash values
-    pub fn digest_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
-    {
-        let array = match value.data_type() {
-            DataType::Binary | DataType::LargeBinary => {
-                let v = value.as_binary::<T>();
-                self.digest_binary_array_impl::<&GenericBinaryArray<T>>(v)
-            }
-            DataType::BinaryView => {
-                let v = value.as_binary_view();
-                self.digest_binary_array_impl::<&BinaryViewArray>(v)
-            }
-            other => {
-                return exec_err!("unsupported type for digest_utf_array: {other:?}")
-            }
-        };
-        Ok(ColumnarValue::Array(array))
-    }
-
-    /// digest a string array to their hash values
-    pub fn digest_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
-    {
-        let array = match value.data_type() {
-            DataType::Utf8 | DataType::LargeUtf8 => {
-                let v = value.as_string::<T>();
-                self.digest_utf8_array_impl::<&GenericStringArray<T>>(v)
-            }
-            DataType::Utf8View => {
-                let v = value.as_string_view();
-                self.digest_utf8_array_impl::<&StringViewArray>(v)
-            }
-            other => {
-                return exec_err!("unsupported type for digest_utf_array: {other:?}")
-            }
-        };
-        Ok(ColumnarValue::Array(array))
-    }
-
-    pub fn digest_utf8_array_impl<'a, StringArrType>(
+    fn digest_utf8_array_impl<'a, StringArrType>(
         self,
-        input_value: StringArrType,
+        input_value: &StringArrType,
     ) -> ArrayRef
     where
         StringArrType: StringArrayType<'a>,
@@ -323,9 +245,9 @@ impl DigestAlgorithm {
         }
     }
 
-    pub fn digest_binary_array_impl<'a, BinaryArrType>(
+    fn digest_binary_array_impl<'a, BinaryArrType>(
         self,
-        input_value: BinaryArrType,
+        input_value: &BinaryArrType,
     ) -> ArrayRef
     where
         BinaryArrType: BinaryArrayType<'a>,
@@ -354,26 +276,40 @@ impl DigestAlgorithm {
         }
     }
 }
+
 pub fn digest_process(
     value: &ColumnarValue,
     digest_algorithm: DigestAlgorithm,
 ) -> Result<ColumnarValue> {
     match value {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8View => digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
-            DataType::Utf8 => digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
-            DataType::LargeUtf8 => digest_algorithm.digest_utf8_array::<i64>(a.as_ref()),
-            DataType::Binary => digest_algorithm.digest_binary_array::<i32>(a.as_ref()),
-            DataType::LargeBinary => {
-                digest_algorithm.digest_binary_array::<i64>(a.as_ref())
-            }
-            DataType::BinaryView => {
-                digest_algorithm.digest_binary_array::<i32>(a.as_ref())
-            }
-            other => exec_err!(
-                "Unsupported data type {other:?} for function {digest_algorithm}"
-            ),
-        },
+        ColumnarValue::Array(a) => {
+            let output = match a.data_type() {
+                DataType::Utf8View => {
+                    digest_algorithm.digest_utf8_array_impl(&a.as_string_view())
+                }
+                DataType::Utf8 => {
+                    digest_algorithm.digest_utf8_array_impl(&a.as_string::<i32>())
+                }
+                DataType::LargeUtf8 => {
+                    digest_algorithm.digest_utf8_array_impl(&a.as_string::<i64>())
+                }
+                DataType::Binary => {
+                    digest_algorithm.digest_binary_array_impl(&a.as_binary::<i32>())
+                }
+                DataType::LargeBinary => {
+                    digest_algorithm.digest_binary_array_impl(&a.as_binary::<i64>())
+                }
+                DataType::BinaryView => {
+                    digest_algorithm.digest_binary_array_impl(&a.as_binary_view())
+                }
+                other => {
+                    return exec_err!(
+                        "Unsupported data type {other:?} for function {digest_algorithm}"
+                    );
+                }
+            };
+            Ok(ColumnarValue::Array(output))
+        }
         ColumnarValue::Scalar(scalar) => {
             match scalar {
                 ScalarValue::Utf8View(a)
diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs
index a4999f72f8d56..4d3ff06777249 100644
--- a/datafusion/functions/src/crypto/digest.rs
+++ b/datafusion/functions/src/crypto/digest.rs
@@ -15,12 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! "crypto" DataFusion functions
-use super::basic::{digest, utf8_or_binary_to_binary_type};
+use crate::crypto::basic::{DigestAlgorithm, digest_process};
+
 use arrow::datatypes::DataType;
 use datafusion_common::{
+    Result, exec_err, not_impl_err,
     types::{logical_binary, logical_string},
-    Result,
+    utils::take_function_args,
 };
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -36,16 +37,16 @@ use std::any::Any;
     syntax_example = "digest(expression, algorithm)",
     sql_example = r#"```sql
 > select digest('foo', 'sha256');
-+------------------------------------------+
-| digest(Utf8("foo"), Utf8("sha256"))      |
-+------------------------------------------+
-| <binary_hash_result>                     |
-+------------------------------------------+
++------------------------------------------------------------------+
+| digest(Utf8("foo"),Utf8("sha256"))                               |
++------------------------------------------------------------------+
+| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
++------------------------------------------------------------------+
 ```"#,
     standard_argument(name = "expression", prefix = "String"),
     argument(
         name = "algorithm",
-        description = "String expression specifying algorithm to use. Must be one of:       
+        description = "String expression specifying algorithm to use. Must be one of:
     - md5
     - sha224
     - sha256
@@ -60,6 +61,7 @@ use std::any::Any;
 pub struct DigestFunc {
     signature: Signature,
 }
+
 impl Default for DigestFunc {
     fn default() -> Self {
         Self::new()
@@ -85,6 +87,7 @@ impl DigestFunc {
         }
     }
 }
+
 impl ScalarUDFImpl for DigestFunc {
     fn as_any(&self) -> &dyn Any {
         self
@@ -98,14 +101,35 @@ impl ScalarUDFImpl for DigestFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Binary)
     }
+
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        digest(&args.args)
+        let [data, digest_algorithm] = take_function_args(self.name(), &args.args)?;
+        digest(data, digest_algorithm)
     }
 
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
 }
+
+/// Compute binary hash of the given `data` (String or Binary array), according
+/// to the specified `digest_algorithm`. See [`DigestAlgorithm`] for supported
+/// algorithms.
+fn digest(
+    data: &ColumnarValue,
+    digest_algorithm: &ColumnarValue,
+) -> Result<ColumnarValue> {
+    let digest_algorithm = match digest_algorithm {
+        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
+            Some(Some(method)) => method.parse::<DigestAlgorithm>(),
+            _ => exec_err!("Unsupported data type {scalar:?} for function digest"),
+        },
+        ColumnarValue::Array(_) => {
+            not_impl_err!("Digest using dynamically decided method is not yet supported")
+        }
+    }?;
+    digest_process(data, digest_algorithm)
+}
diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs
index 88859fdee34a7..728e0d4a33099 100644
--- a/datafusion/functions/src/crypto/md5.rs
+++ b/datafusion/functions/src/crypto/md5.rs
@@ -15,13 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! "crypto" DataFusion functions
 use crate::crypto::basic::md5;
 use arrow::datatypes::DataType;
 use datafusion_common::{
-    plan_err,
-    types::{logical_binary, logical_string, NativeType},
     Result,
+    types::{logical_binary, logical_string},
 };
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -37,11 +35,11 @@ use std::any::Any;
     syntax_example = "md5(expression)",
     sql_example = r#"```sql
 > select md5('foo');
-+-------------------------------------+
-| md5(Utf8("foo"))                    |
-+-------------------------------------+
-| <md5_checksum_result>               |
-+-------------------------------------+
++----------------------------------+
+| md5(Utf8("foo"))                 |
++----------------------------------+
+| acbd18db4cc2f85cedef654fccc4a4d8 |
++----------------------------------+
 ```"#,
     standard_argument(name = "expression", prefix = "String")
 )]
@@ -49,6 +47,7 @@ use std::any::Any;
 pub struct Md5Func {
     signature: Signature,
 }
+
 impl Default for Md5Func {
     fn default() -> Self {
         Self::new()
@@ -60,15 +59,11 @@ impl Md5Func {
         Self {
             signature: Signature::one_of(
                 vec![
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_string())],
-                        NativeType::String,
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Native(logical_string()),
                     )]),
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
                         TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_binary())],
-                        NativeType::Binary,
                     )]),
                 ],
                 Volatility::Immutable,
@@ -76,6 +71,7 @@ impl Md5Func {
         }
     }
 }
+
 impl ScalarUDFImpl for Md5Func {
     fn as_any(&self) -> &dyn Any {
         self
@@ -89,30 +85,10 @@ impl ScalarUDFImpl for Md5Func {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-        Ok(match &arg_types[0] {
-            LargeUtf8 | LargeBinary => Utf8View,
-            Utf8View | Utf8 | Binary | BinaryView => Utf8View,
-            Null => Null,
-            Dictionary(_, t) => match **t {
-                LargeUtf8 | LargeBinary => Utf8View,
-                Utf8 | Binary | BinaryView => Utf8View,
-                Null => Null,
-                _ => {
-                    return plan_err!(
-                        "the md5 can only accept strings but got {:?}",
-                        **t
-                    );
-                }
-            },
-            other => {
-                return plan_err!(
-                    "The md5 function can only accept strings. Got {other}"
-                );
-            }
-        })
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8View)
     }
+
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         md5(&args.args)
     }
diff --git a/datafusion/functions/src/crypto/mod.rs b/datafusion/functions/src/crypto/mod.rs
index 62ea3c2e27371..fd15db44c795d 100644
--- a/datafusion/functions/src/crypto/mod.rs
+++ b/datafusion/functions/src/crypto/mod.rs
@@ -23,16 +23,13 @@ use std::sync::Arc;
 pub mod basic;
 pub mod digest;
 pub mod md5;
-pub mod sha224;
-pub mod sha256;
-pub mod sha384;
-pub mod sha512;
+pub mod sha;
 make_udf_function!(digest::DigestFunc, digest);
 make_udf_function!(md5::Md5Func, md5);
-make_udf_function!(sha224::SHA224Func, sha224);
-make_udf_function!(sha256::SHA256Func, sha256);
-make_udf_function!(sha384::SHA384Func, sha384);
-make_udf_function!(sha512::SHA512Func, sha512);
+make_udf_function!(sha::SHAFunc, sha224, sha::SHAFunc::sha224);
+make_udf_function!(sha::SHAFunc, sha256, sha::SHAFunc::sha256);
+make_udf_function!(sha::SHAFunc, sha384, sha::SHAFunc::sha384);
+make_udf_function!(sha::SHAFunc, sha512, sha::SHAFunc::sha512);
 
 pub mod expr_fn {
     export_functions!((
diff --git a/datafusion/functions/src/crypto/sha.rs b/datafusion/functions/src/crypto/sha.rs
new file mode 100644
index 0000000000000..9199cf57c7a2d
--- /dev/null
+++ b/datafusion/functions/src/crypto/sha.rs
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::crypto::basic::{DigestAlgorithm, digest_process};
+
+use arrow::datatypes::DataType;
+use datafusion_common::{
+    Result,
+    types::{logical_binary, logical_string},
+    utils::take_function_args,
+};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility,
+};
+use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
+use datafusion_macros::user_doc;
+use std::any::Any;
+
+#[user_doc(
+    doc_section(label = "Hashing Functions"),
+    description = "Computes the SHA-224 hash of a binary string.",
+    syntax_example = "sha224(expression)",
+    sql_example = r#"```sql
+> select sha224('foo');
++----------------------------------------------------------+
+| sha224(Utf8("foo"))                                      |
++----------------------------------------------------------+
+| 0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db |
++----------------------------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "String")
+)]
+struct SHA224Doc;
+
+#[user_doc(
+    doc_section(label = "Hashing Functions"),
+    description = "Computes the SHA-256 hash of a binary string.",
+    syntax_example = "sha256(expression)",
+    sql_example = r#"```sql
+> select sha256('foo');
++------------------------------------------------------------------+
+| sha256(Utf8("foo"))                                              |
++------------------------------------------------------------------+
+| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
++------------------------------------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "String")
+)]
+struct SHA256Doc;
+
+#[user_doc(
+    doc_section(label = "Hashing Functions"),
+    description = "Computes the SHA-384 hash of a binary string.",
+    syntax_example = "sha384(expression)",
+    sql_example = r#"```sql
+> select sha384('foo');
++--------------------------------------------------------------------------------------------------+
+| sha384(Utf8("foo"))                                                                              |
++--------------------------------------------------------------------------------------------------+
+| 98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb |
++--------------------------------------------------------------------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "String")
+)]
+struct SHA384Doc;
+
+#[user_doc(
+    doc_section(label = "Hashing Functions"),
+    description = "Computes the SHA-512 hash of a binary string.",
+    syntax_example = "sha512(expression)",
+    sql_example = r#"```sql
+> select sha512('foo');
++----------------------------------------------------------------------------------------------------------------------------------+
+| sha512(Utf8("foo"))                                                                                                              |
++----------------------------------------------------------------------------------------------------------------------------------+
+| f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7 |
++----------------------------------------------------------------------------------------------------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "String")
+)]
+struct SHA512Doc;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SHAFunc {
+    signature: Signature,
+    name: &'static str,
+    algorithm: DigestAlgorithm,
+}
+
+impl SHAFunc {
+    pub fn sha224() -> Self {
+        Self::new("sha224", DigestAlgorithm::Sha224)
+    }
+
+    pub fn sha256() -> Self {
+        Self::new("sha256", DigestAlgorithm::Sha256)
+    }
+
+    pub fn sha384() -> Self {
+        Self::new("sha384", DigestAlgorithm::Sha384)
+    }
+
+    pub fn sha512() -> Self {
+        Self::new("sha512", DigestAlgorithm::Sha512)
+    }
+
+    fn new(name: &'static str, algorithm: DigestAlgorithm) -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Native(logical_string()),
+                    )]),
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Native(logical_binary()),
+                    )]),
+                ],
+                Volatility::Immutable,
+            ),
+            name,
+            algorithm,
+        }
+    }
+}
+
+impl ScalarUDFImpl for SHAFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        self.name
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Binary)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let [data] = take_function_args(self.name(), args.args)?;
+        digest_process(&data, self.algorithm)
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        match self.algorithm {
+            DigestAlgorithm::Sha224 => SHA224Doc {}.doc(),
+            DigestAlgorithm::Sha256 => SHA256Doc {}.doc(),
+            DigestAlgorithm::Sha384 => SHA384Doc {}.doc(),
+            DigestAlgorithm::Sha512 => SHA512Doc {}.doc(),
+            DigestAlgorithm::Md5
+            | DigestAlgorithm::Blake2s
+            | DigestAlgorithm::Blake2b
+            | DigestAlgorithm::Blake3 => unreachable!(),
+        }
+    }
+}
diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs
deleted file mode 100644
index 69b79cce72c4e..0000000000000
--- a/datafusion/functions/src/crypto/sha224.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! "crypto" DataFusion functions
-use super::basic::{sha224, utf8_or_binary_to_binary_type};
-use arrow::datatypes::DataType;
-use datafusion_common::{
-    types::{logical_binary, logical_string, NativeType},
-    Result,
-};
-use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
-};
-use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
-use datafusion_macros::user_doc;
-use std::any::Any;
-
-#[user_doc(
-    doc_section(label = "Hashing Functions"),
-    description = "Computes the SHA-224 hash of a binary string.",
-    syntax_example = "sha224(expression)",
-    sql_example = r#"```sql
-> select sha224('foo');
-+------------------------------------------+
-| sha224(Utf8("foo"))                      |
-+------------------------------------------+
-| <sha224_hash_result>                     |
-+------------------------------------------+
-```"#,
-    standard_argument(name = "expression", prefix = "String")
-)]
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct SHA224Func {
-    signature: Signature,
-}
-
-impl Default for SHA224Func {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SHA224Func {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_string())],
-                        NativeType::String,
-                    )]),
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_binary())],
-                        NativeType::Binary,
-                    )]),
-                ],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-
-impl ScalarUDFImpl for SHA224Func {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "sha224"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_or_binary_to_binary_type(&arg_types[0], self.name())
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        sha224(&args.args)
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs
deleted file mode 100644
index 9a948ba50c9e1..0000000000000
--- a/datafusion/functions/src/crypto/sha256.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! "crypto" DataFusion functions
-use super::basic::{sha256, utf8_or_binary_to_binary_type};
-use arrow::datatypes::DataType;
-use datafusion_common::{
-    types::{logical_binary, logical_string, NativeType},
-    Result,
-};
-use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
-};
-use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
-use datafusion_macros::user_doc;
-use std::any::Any;
-
-#[user_doc(
-    doc_section(label = "Hashing Functions"),
-    description = "Computes the SHA-256 hash of a binary string.",
-    syntax_example = "sha256(expression)",
-    sql_example = r#"```sql
-> select sha256('foo');
-+--------------------------------------+
-| sha256(Utf8("foo"))                  |
-+--------------------------------------+
-| <sha256_hash_result>                 |
-+--------------------------------------+
-```"#,
-    standard_argument(name = "expression", prefix = "String")
-)]
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct SHA256Func {
-    signature: Signature,
-}
-impl Default for SHA256Func {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SHA256Func {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_string())],
-                        NativeType::String,
-                    )]),
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_binary())],
-                        NativeType::Binary,
-                    )]),
-                ],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-impl ScalarUDFImpl for SHA256Func {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "sha256"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_or_binary_to_binary_type(&arg_types[0], self.name())
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        sha256(&args.args)
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs
deleted file mode 100644
index 9e363cf883d29..0000000000000
--- a/datafusion/functions/src/crypto/sha384.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! "crypto" DataFusion functions
-use super::basic::{sha384, utf8_or_binary_to_binary_type};
-use arrow::datatypes::DataType;
-use datafusion_common::{
-    types::{logical_binary, logical_string, NativeType},
-    Result,
-};
-use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
-};
-use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
-use datafusion_macros::user_doc;
-use std::any::Any;
-
-#[user_doc(
-    doc_section(label = "Hashing Functions"),
-    description = "Computes the SHA-384 hash of a binary string.",
-    syntax_example = "sha384(expression)",
-    sql_example = r#"```sql
-> select sha384('foo');
-+-----------------------------------------+
-| sha384(Utf8("foo"))                     |
-+-----------------------------------------+
-| <sha384_hash_result>                    |
-+-----------------------------------------+
-```"#,
-    standard_argument(name = "expression", prefix = "String")
-)]
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct SHA384Func {
-    signature: Signature,
-}
-impl Default for SHA384Func {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SHA384Func {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_string())],
-                        NativeType::String,
-                    )]),
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_binary())],
-                        NativeType::Binary,
-                    )]),
-                ],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-impl ScalarUDFImpl for SHA384Func {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "sha384"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_or_binary_to_binary_type(&arg_types[0], self.name())
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        sha384(&args.args)
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs
deleted file mode 100644
index a185698ca46ff..0000000000000
--- a/datafusion/functions/src/crypto/sha512.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! "crypto" DataFusion functions
-use super::basic::{sha512, utf8_or_binary_to_binary_type};
-use arrow::datatypes::DataType;
-use datafusion_common::{
-    types::{logical_binary, logical_string, NativeType},
-    Result,
-};
-use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
-};
-use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
-use datafusion_macros::user_doc;
-use std::any::Any;
-
-#[user_doc(
-    doc_section(label = "Hashing Functions"),
-    description = "Computes the SHA-512 hash of a binary string.",
-    syntax_example = "sha512(expression)",
-    sql_example = r#"```sql
-> select sha512('foo');
-+-------------------------------------------+
-| sha512(Utf8("foo"))                       |
-+-------------------------------------------+
-| <sha512_hash_result>                      |
-+-------------------------------------------+
-```"#,
-    standard_argument(name = "expression", prefix = "String")
-)]
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct SHA512Func {
-    signature: Signature,
-}
-impl Default for SHA512Func {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SHA512Func {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_string())],
-                        NativeType::String,
-                    )]),
-                    TypeSignature::Coercible(vec![Coercion::new_implicit(
-                        TypeSignatureClass::Native(logical_binary()),
-                        vec![TypeSignatureClass::Native(logical_binary())],
-                        NativeType::Binary,
-                    )]),
-                ],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-impl ScalarUDFImpl for SHA512Func {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "sha512"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_or_binary_to_binary_type(&arg_types[0], self.name())
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        sha512(&args.args)
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs
index 90b92a7f88f9f..2db64beafa9b7 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -15,31 +15,57 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
+use std::sync::{Arc, LazyLock};
 
+use arrow::array::timezone::Tz;
 use arrow::array::{
     Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray,
     StringArrayType, StringViewArray,
 };
-use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use arrow::datatypes::DataType;
-use chrono::format::{parse, Parsed, StrftimeItems};
+use arrow::compute::DecimalCast;
+use arrow::compute::kernels::cast_utils::string_to_datetime;
+use arrow::datatypes::{DataType, TimeUnit};
+use arrow_buffer::ArrowNativeType;
 use chrono::LocalResult::Single;
+use chrono::format::{Parsed, StrftimeItems, parse};
 use chrono::{DateTime, TimeZone, Utc};
-
 use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, unwrap_or_internal_err, DataFusionError, Result,
-    ScalarType, ScalarValue,
+    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err,
+    internal_datafusion_err, unwrap_or_internal_err,
 };
 use datafusion_expr::ColumnarValue;
 
 /// Error message if nanosecond conversion request beyond supported interval
 const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
 
-/// Calls string_to_timestamp_nanos and converts the error type
-pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
-    string_to_timestamp_nanos(s).map_err(|e| e.into())
+static UTC: LazyLock<Tz> = LazyLock::new(|| "UTC".parse().expect("UTC is always valid"));
+
+/// Converts a string representation of a date‑time into a timestamp expressed in
+/// nanoseconds since the Unix epoch.
+///
+/// This helper is a thin wrapper around the more general `string_to_datetime`
+/// function. It accepts an optional `timezone` which, if `None`, defaults to
+/// Coordinated Universal Time (UTC). The string `s` must contain a valid
+/// date‑time format that can be parsed by the underlying chrono parser.
+///
+/// # Return Value
+///
+/// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`.
+/// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed
+///   value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804)
+///   or the parsed value does not correspond to an unambiguous time.
+pub(crate) fn string_to_timestamp_nanos_with_timezone(
+    timezone: &Option<Tz>,
+    s: &str,
+) -> Result<i64> {
+    let tz = timezone.as_ref().unwrap_or(&UTC);
+    let dt = string_to_datetime(tz, s)?;
+    let parsed = dt
+        .timestamp_nanos_opt()
+        .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?;
+
+    Ok(parsed)
 }
 
 /// Checks that all the arguments from the second are of type [Utf8], [LargeUtf8] or [Utf8View]
@@ -69,13 +95,12 @@ pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) -> Result<
 /// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers
 /// relative to the provided `timezone`
 ///
-/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
-///
-/// * `2023-01-01 040506 America/Los_Angeles`
-///
 /// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
 /// will be returned
 ///
+/// Note that parsing [IANA timezones] is not supported yet in chrono - <https://github.com/chronotope/chrono/issues/38>
+/// and this implementation only supports named timezones at the end of the string preceded by a space.
+///
 /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
 /// [IANA timezones]: https://www.iana.org/time-zones
 pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
@@ -89,11 +114,55 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
         )
     };
 
+    let mut datetime_str = s;
+    let mut format = format;
+
+    // Manually handle the most common case of a named timezone at the end of the timestamp.
+    // Note that %+ handles 'Z' at the end of the string without a space. This code doesn't
+    // handle named timezones with no preceding space since that would require writing a
+    // custom parser (or switching to Jiff)
+    let tz: Option<chrono_tz::Tz> = if format.trim_end().ends_with(" %Z") {
+        // grab the string after the last space as the named timezone
+        if let Some((dt_str, timezone_name)) = datetime_str.trim_end().rsplit_once(' ') {
+            datetime_str = dt_str;
+
+            // attempt to parse the timezone name
+            let result: Result<chrono_tz::Tz, chrono_tz::ParseError> =
+                timezone_name.parse();
+            let Ok(tz) = result else {
+                return Err(err(&result.unwrap_err().to_string()));
+            };
+
+            // successfully parsed the timezone name, remove the ' %Z' from the format
+            format = &format[..format.len() - 3];
+
+            Some(tz)
+        } else {
+            None
+        }
+    } else if format.contains("%Z") {
+        return Err(err(
+            "'%Z' is only supported at the end of the format string preceded by a space",
+        ));
+    } else {
+        None
+    };
+
     let mut parsed = Parsed::new();
-    parse(&mut parsed, s, StrftimeItems::new(format)).map_err(|e| err(&e.to_string()))?;
+    parse(&mut parsed, datetime_str, StrftimeItems::new(format))
+        .map_err(|e| err(&e.to_string()))?;
 
-    // attempt to parse the string assuming it has a timezone
-    let dt = parsed.to_datetime();
+    let dt = match tz {
+        Some(tz) => {
+            // A timezone was manually parsed out, convert it to a fixed offset
+            match parsed.to_datetime_with_timezone(&tz) {
+                Ok(dt) => Ok(dt.fixed_offset()),
+                Err(e) => Err(e),
+            }
+        }
+        // default to parse the string assuming it has a timezone
+        None => parsed.to_datetime(),
+    };
 
     if let Err(e) = &dt {
         // no timezone or other failure, try without a timezone
@@ -115,7 +184,7 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
 }
 
 /// Accepts a string with a `chrono` format and converts it to a
-/// nanosecond precision timestamp.
+/// nanosecond precision timestamp relative to the provided `timezone`.
 ///
 /// See [`chrono::format::strftime`] for the full set of supported formats.
 ///
@@ -141,19 +210,21 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
 ///
 /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
 #[inline]
-pub(crate) fn string_to_timestamp_nanos_formatted(
+pub(crate) fn string_to_timestamp_nanos_formatted_with_timezone(
+    timezone: &Option<Tz>,
     s: &str,
     format: &str,
 ) -> Result<i64, DataFusionError> {
-    string_to_datetime_formatted(&Utc, s, format)?
-        .naive_utc()
-        .and_utc()
+    let dt = string_to_datetime_formatted(timezone.as_ref().unwrap_or(&UTC), s, format)?;
+    let parsed = dt
         .timestamp_nanos_opt()
-        .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))
+        .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?;
+
+    Ok(parsed)
 }
 
 /// Accepts a string with a `chrono` format and converts it to a
-/// millisecond precision timestamp.
+/// millisecond precision timestamp relative to the provided `timezone`.
 ///
 /// See [`chrono::format::strftime`] for the full set of supported formats.
 ///
@@ -176,33 +247,33 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Res
         .timestamp_millis())
 }
 
-pub(crate) fn handle<O, F, S>(
+pub(crate) fn handle<O, F>(
     args: &[ColumnarValue],
     op: F,
     name: &str,
+    dt: &DataType,
 ) -> Result<ColumnarValue>
 where
     O: ArrowPrimitiveType,
-    S: ScalarType<O::Native>,
     F: Fn(&str) -> Result<O::Native>,
 {
     match &args[0] {
         ColumnarValue::Array(a) => match a.data_type() {
             DataType::Utf8View => Ok(ColumnarValue::Array(Arc::new(
                 unary_string_to_primitive_function::<&StringViewArray, O, _>(
-                    a.as_ref().as_string_view(),
+                    &a.as_string_view(),
                     op,
                 )?,
             ))),
             DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
                 unary_string_to_primitive_function::<&GenericStringArray<i64>, O, _>(
-                    a.as_ref().as_string::<i64>(),
+                    &a.as_string::<i64>(),
                     op,
                 )?,
             ))),
             DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
                 unary_string_to_primitive_function::<&GenericStringArray<i32>, O, _>(
-                    a.as_ref().as_string::<i32>(),
+                    &a.as_string::<i32>(),
                     op,
                 )?,
             ))),
@@ -210,8 +281,13 @@ where
         },
         ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
             Some(a) => {
-                let result = a.as_ref().map(|x| op(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
+                let result = a
+                    .as_ref()
+                    .map(|x| op(x))
+                    .transpose()?
+                    .and_then(|v| v.to_i64());
+                let s = scalar_value(dt, result)?;
+                Ok(ColumnarValue::Scalar(s))
             }
             _ => exec_err!("Unsupported data type {scalar:?} for function {name}"),
         },
@@ -221,15 +297,15 @@ where
 // Given a function that maps a `&str`, `&str` to an arrow native type,
 // returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
 // depending on the `args`'s variant.
-pub(crate) fn handle_multiple<O, F, S, M>(
+pub(crate) fn handle_multiple<O, F, M>(
     args: &[ColumnarValue],
     op: F,
     op2: M,
     name: &str,
+    dt: &DataType,
 ) -> Result<ColumnarValue>
 where
     O: ArrowPrimitiveType,
-    S: ScalarType<O::Native>,
     F: Fn(&str, &str) -> Result<O::Native>,
     M: Fn(O::Native) -> O::Native,
 {
@@ -243,14 +319,24 @@ where
                             DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
                                 // all good
                             }
-                            other => return exec_err!("Unsupported data type {other:?} for function {name}, arg # {pos}"),
+                            other => {
+                                return exec_err!(
+                                    "Unsupported data type {other:?} for function {name}, arg # {pos}"
+                                );
+                            }
                         },
                         ColumnarValue::Scalar(arg) => {
                             match arg.data_type() {
-                                DataType::Utf8View| DataType::LargeUtf8 | DataType::Utf8 => {
+                                DataType::Utf8View
+                                | DataType::LargeUtf8
+                                | DataType::Utf8 => {
                                     // all good
                                 }
-                                other => return exec_err!("Unsupported data type {other:?} for function {name}, arg # {pos}"),
+                                other => {
+                                    return exec_err!(
+                                        "Unsupported data type {other:?} for function {name}, arg # {pos}"
+                                    );
+                                }
                             }
                         }
                     }
@@ -280,15 +366,17 @@ where
                         | ScalarValue::Utf8(x),
                     ) = v
                     else {
-                        return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                        return exec_err!(
+                            "Unsupported data type {v:?} for function {name}, arg # {pos}"
+                        );
                     };
 
                     if let Some(s) = x {
                         match op(a, s.as_str()) {
                             Ok(r) => {
-                                ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some(
-                                    op2(r),
-                                )))));
+                                let result = op2(r).to_i64();
+                                let s = scalar_value(dt, result)?;
+                                ret = Some(Ok(ColumnarValue::Scalar(s)));
                                 break;
                             }
                             Err(e) => ret = Some(Err(e)),
@@ -431,7 +519,7 @@ where
 /// * the number of arguments is not 1 or
 /// * the function `op` errors
 fn unary_string_to_primitive_function<'a, StringArrType, O, F>(
-    array: StringArrType,
+    array: &StringArrType,
     op: F,
 ) -> Result<PrimitiveArray<O>>
 where
@@ -442,3 +530,16 @@ where
     // first map is the iterator, second is for the `Option<_>`
     array.iter().map(|x| x.map(&op).transpose()).collect()
 }
+
+fn scalar_value(dt: &DataType, r: Option<i64>) -> Result<ScalarValue> {
+    match dt {
+        DataType::Date32 => Ok(ScalarValue::Date32(r.and_then(|v| v.to_i32()))),
+        DataType::Timestamp(u, tz) => match u {
+            TimeUnit::Second => Ok(ScalarValue::TimestampSecond(r, tz.clone())),
+            TimeUnit::Millisecond => Ok(ScalarValue::TimestampMillisecond(r, tz.clone())),
+            TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())),
+            TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, tz.clone())),
+        },
+        t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")),
+    }
+}
diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index da690b4e6be18..7edc1a58d9cb5 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -22,7 +22,7 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Date32;
 use chrono::{Datelike, NaiveDate, TimeZone};
 
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index 9f3456b8777f0..2c9bcdfe49db3 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -21,7 +21,7 @@ use arrow::datatypes::DataType::Time64;
 use arrow::datatypes::TimeUnit::Nanosecond;
 use chrono::TimeZone;
 use chrono::Timelike;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
@@ -225,6 +225,9 @@ mod tests {
         // 10 hours in nanoseconds
         let expected_offset = 10i64 * 3600 * 1_000_000_000;
 
-        assert_eq!(difference, expected_offset, "Expected 10-hour offset difference in nanoseconds between UTC+05:00 and UTC-05:00");
+        assert_eq!(
+            difference, expected_offset,
+            "Expected 10-hour offset difference in nanoseconds between UTC+05:00 and UTC-05:00"
+        );
     }
 }
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 92af123dbafac..6c67fbad34a1d 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -24,18 +24,21 @@ use arrow::array::types::{
     TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
     TimestampSecondType,
 };
-use arrow::array::{ArrayRef, PrimitiveArray};
-use arrow::datatypes::DataType::{Null, Timestamp, Utf8};
+use arrow::array::{ArrayRef, AsArray, PrimitiveArray};
+use arrow::datatypes::DataType::{Time32, Time64, Timestamp};
 use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano};
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
-use arrow::datatypes::{DataType, TimeUnit};
-
+use arrow::datatypes::{
+    DataType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+    Time64NanosecondType, TimeUnit,
+};
+use arrow::temporal_conversions::NANOSECONDS_IN_DAY;
 use datafusion_common::cast::as_primitive_array;
-use datafusion_common::{exec_err, not_impl_err, plan_err, Result, ScalarValue};
-use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err, plan_err};
 use datafusion_expr::TypeSignature::Exact;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -71,6 +74,17 @@ FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
 | 2023-01-03T03:00:00 |
 +---------------------+
 2 row(s) fetched.
+
+-- Bin the time into 15 minute intervals starting at 1 min
+>  SELECT date_bin(interval '15 minutes', time, TIME '00:01:00') as bin
+FROM VALUES (TIME '02:18:18'), (TIME '19:00:03')  t(time);
++----------+
+| bin      |
++----------+
+| 02:16:00 |
+| 18:46:00 |
++----------+
+2 row(s) fetched.
 ```"#,
     argument(name = "interval", description = "Bin interval."),
     argument(
@@ -109,7 +123,7 @@ impl Default for DateBinFunc {
 impl DateBinFunc {
     pub fn new() -> Self {
         let base_sig = |array_type: TimeUnit| {
-            vec![
+            let mut v = vec![
                 Exact(vec![
                     DataType::Interval(MonthDayNano),
                     Timestamp(array_type, None),
@@ -146,7 +160,44 @@ impl DateBinFunc {
                     DataType::Interval(DayTime),
                     Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())),
                 ]),
-            ]
+            ];
+
+            match array_type {
+                Second | Millisecond => {
+                    v.append(&mut vec![
+                        Exact(vec![
+                            DataType::Interval(MonthDayNano),
+                            Time32(array_type),
+                            Time32(array_type),
+                        ]),
+                        Exact(vec![DataType::Interval(MonthDayNano), Time32(array_type)]),
+                        Exact(vec![
+                            DataType::Interval(DayTime),
+                            Time32(array_type),
+                            Time32(array_type),
+                        ]),
+                        Exact(vec![DataType::Interval(DayTime), Time32(array_type)]),
+                    ]);
+                }
+                Microsecond | Nanosecond => {
+                    v.append(&mut vec![
+                        Exact(vec![
+                            DataType::Interval(DayTime),
+                            Time64(array_type),
+                            Time64(array_type),
+                        ]),
+                        Exact(vec![DataType::Interval(DayTime), Time64(array_type)]),
+                        Exact(vec![
+                            DataType::Interval(MonthDayNano),
+                            Time64(array_type),
+                            Time64(array_type),
+                        ]),
+                        Exact(vec![DataType::Interval(MonthDayNano), Time64(array_type)]),
+                    ]);
+                }
+            }
+
+            v
         };
 
         let full_sig = [Nanosecond, Microsecond, Millisecond, Second]
@@ -176,13 +227,11 @@ impl ScalarUDFImpl for DateBinFunc {
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         match &arg_types[1] {
-            Timestamp(Nanosecond, None) | Utf8 | Null => Ok(Timestamp(Nanosecond, None)),
-            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
-            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
-            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
-            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
+            Timestamp(tu, tz_opt) => Ok(Timestamp(*tu, tz_opt.clone())),
+            Time32(tu) => Ok(Time32(*tu)),
+            Time64(tu) => Ok(Time64(*tu)),
             _ => plan_err!(
-                "The date_bin function can only accept timestamp as the second arg."
+                "The date_bin function can only accept timestamp or time as the second arg."
             ),
         }
     }
@@ -193,11 +242,27 @@ impl ScalarUDFImpl for DateBinFunc {
     ) -> Result<ColumnarValue> {
         let args = &args.args;
         if args.len() == 2 {
-            // Default to unix EPOCH
-            let origin = ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
-                Some(0),
-                Some("+00:00".into()),
-            ));
+            let origin = match args[1].data_type() {
+                Time32(Second) => {
+                    ColumnarValue::Scalar(ScalarValue::Time32Second(Some(0)))
+                }
+                Time32(Millisecond) => {
+                    ColumnarValue::Scalar(ScalarValue::Time32Millisecond(Some(0)))
+                }
+                Time64(Microsecond) => {
+                    ColumnarValue::Scalar(ScalarValue::Time64Microsecond(Some(0)))
+                }
+                Time64(Nanosecond) => {
+                    ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(Some(0)))
+                }
+                _ => {
+                    // Default to unix EPOCH
+                    ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                        Some(0),
+                        Some("+00:00".into()),
+                    ))
+                }
+            };
             date_bin_impl(&args[0], &args[1], &origin)
         } else if args.len() == 3 {
             date_bin_impl(&args[0], &args[1], &args[2])
@@ -227,6 +292,10 @@ impl ScalarUDFImpl for DateBinFunc {
     }
 }
 
+const NANOS_PER_MICRO: i64 = 1_000;
+const NANOS_PER_MILLI: i64 = 1_000_000;
+const NANOS_PER_SEC: i64 = NANOSECONDS;
+
 enum Interval {
     Nanoseconds(i64),
     Months(i64),
@@ -306,8 +375,8 @@ fn date_bin_months_interval(stride_months: i64, source: i64, origin: i64) -> i64
 }
 
 fn to_utc_date_time(nanos: i64) -> DateTime<Utc> {
-    let secs = nanos / 1_000_000_000;
-    let nsec = (nanos % 1_000_000_000) as u32;
+    let secs = nanos / NANOS_PER_SEC;
+    let nsec = (nanos % NANOS_PER_SEC) as u32;
     DateTime::from_timestamp(secs, nsec).unwrap()
 }
 
@@ -365,23 +434,105 @@ fn date_bin_impl(
         }
         ColumnarValue::Array(_) => {
             return not_impl_err!(
-            "DATE_BIN only supports literal values for the stride argument, not arrays"
-        );
+                "DATE_BIN only supports literal values for the stride argument, not arrays"
+            );
         }
     };
 
-    let origin = match origin {
-        ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(v), _)) => *v,
+    let (origin, is_time) = match origin {
+        ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(v), _)) => {
+            (*v, false)
+        }
+        ColumnarValue::Scalar(ScalarValue::Time32Millisecond(Some(v))) => {
+            match stride {
+                Interval::Months(m) => {
+                    if m > 0 {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+                Interval::Nanoseconds(ns) => {
+                    if ns >= NANOSECONDS_IN_DAY {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+            }
+
+            (*v as i64 * NANOS_PER_MILLI, true)
+        }
+        ColumnarValue::Scalar(ScalarValue::Time32Second(Some(v))) => {
+            match stride {
+                Interval::Months(m) => {
+                    if m > 0 {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+                Interval::Nanoseconds(ns) => {
+                    if ns >= NANOSECONDS_IN_DAY {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+            }
+
+            (*v as i64 * NANOS_PER_SEC, true)
+        }
+        ColumnarValue::Scalar(ScalarValue::Time64Microsecond(Some(v))) => {
+            match stride {
+                Interval::Months(m) => {
+                    if m > 0 {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+                Interval::Nanoseconds(ns) => {
+                    if ns >= NANOSECONDS_IN_DAY {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+            }
+
+            (*v * NANOS_PER_MICRO, true)
+        }
+        ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(Some(v))) => {
+            match stride {
+                Interval::Months(m) => {
+                    if m > 0 {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+                Interval::Nanoseconds(ns) => {
+                    if ns >= NANOSECONDS_IN_DAY {
+                        return exec_err!(
+                            "DATE_BIN stride for TIME input must be less than 1 day"
+                        );
+                    }
+                }
+            }
+
+            (*v, true)
+        }
         ColumnarValue::Scalar(v) => {
             return exec_err!(
-                "DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got {}",
+                "DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision or a TIME but got {}",
                 v.data_type()
             );
         }
         ColumnarValue::Array(_) => {
             return not_impl_err!(
-            "DATE_BIN only supports literal values for the origin argument, not arrays"
-        );
+                "DATE_BIN only supports literal values for the origin argument, not arrays"
+            );
         }
     };
 
@@ -399,8 +550,8 @@ fn date_bin_impl(
     ) -> impl Fn(i64) -> i64 {
         let scale = match T::UNIT {
             Nanosecond => 1,
-            Microsecond => NANOSECONDS / 1_000_000,
-            Millisecond => NANOSECONDS / 1_000,
+            Microsecond => NANOS_PER_MICRO,
+            Millisecond => NANOS_PER_MILLI,
             Second => NANOSECONDS,
         };
         move |x: i64| stride_fn(stride, x * scale, origin) / scale
@@ -439,7 +590,49 @@ fn date_bin_impl(
                 tz_opt.clone(),
             ))
         }
-
+        ColumnarValue::Scalar(ScalarValue::Time32Millisecond(v)) => {
+            if !is_time {
+                return exec_err!("DATE_BIN with Time32 source requires Time32 origin");
+            }
+            let apply_stride_fn = move |x: i32| {
+                let binned_nanos = stride_fn(stride, x as i64 * NANOS_PER_MILLI, origin);
+                let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                (nanos / NANOS_PER_MILLI) as i32
+            };
+            ColumnarValue::Scalar(ScalarValue::Time32Millisecond(v.map(apply_stride_fn)))
+        }
+        ColumnarValue::Scalar(ScalarValue::Time32Second(v)) => {
+            if !is_time {
+                return exec_err!("DATE_BIN with Time32 source requires Time32 origin");
+            }
+            let apply_stride_fn = move |x: i32| {
+                let binned_nanos = stride_fn(stride, x as i64 * NANOS_PER_SEC, origin);
+                let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                (nanos / NANOS_PER_SEC) as i32
+            };
+            ColumnarValue::Scalar(ScalarValue::Time32Second(v.map(apply_stride_fn)))
+        }
+        ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(v)) => {
+            if !is_time {
+                return exec_err!("DATE_BIN with Time64 source requires Time64 origin");
+            }
+            let apply_stride_fn = move |x: i64| {
+                let binned_nanos = stride_fn(stride, x, origin);
+                binned_nanos % (NANOSECONDS_IN_DAY)
+            };
+            ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(v.map(apply_stride_fn)))
+        }
+        ColumnarValue::Scalar(ScalarValue::Time64Microsecond(v)) => {
+            if !is_time {
+                return exec_err!("DATE_BIN with Time64 source requires Time64 origin");
+            }
+            let apply_stride_fn = move |x: i64| {
+                let binned_nanos = stride_fn(stride, x * NANOS_PER_MICRO, origin);
+                let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                nanos / NANOS_PER_MICRO
+            };
+            ColumnarValue::Scalar(ScalarValue::Time64Microsecond(v.map(apply_stride_fn)))
+        }
         ColumnarValue::Array(array) => {
             fn transform_array_with_stride<T>(
                 origin: i64,
@@ -481,9 +674,74 @@ fn date_bin_impl(
                         origin, stride, stride_fn, array, tz_opt,
                     )?
                 }
+                Time32(Millisecond) => {
+                    if !is_time {
+                        return exec_err!(
+                            "DATE_BIN with Time32 source requires Time32 origin"
+                        );
+                    }
+                    let array = array.as_primitive::<Time32MillisecondType>();
+                    let apply_stride_fn = move |x: i32| {
+                        let binned_nanos =
+                            stride_fn(stride, x as i64 * NANOS_PER_MILLI, origin);
+                        let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                        (nanos / NANOS_PER_MILLI) as i32
+                    };
+                    let array: PrimitiveArray<Time32MillisecondType> =
+                        array.unary(apply_stride_fn);
+                    ColumnarValue::Array(Arc::new(array))
+                }
+                Time32(Second) => {
+                    if !is_time {
+                        return exec_err!(
+                            "DATE_BIN with Time32 source requires Time32 origin"
+                        );
+                    }
+                    let array = array.as_primitive::<Time32SecondType>();
+                    let apply_stride_fn = move |x: i32| {
+                        let binned_nanos =
+                            stride_fn(stride, x as i64 * NANOS_PER_SEC, origin);
+                        let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                        (nanos / NANOS_PER_SEC) as i32
+                    };
+                    let array: PrimitiveArray<Time32SecondType> =
+                        array.unary(apply_stride_fn);
+                    ColumnarValue::Array(Arc::new(array))
+                }
+                Time64(Microsecond) => {
+                    if !is_time {
+                        return exec_err!(
+                            "DATE_BIN with Time64 source requires Time64 origin"
+                        );
+                    }
+                    let array = array.as_primitive::<Time64MicrosecondType>();
+                    let apply_stride_fn = move |x: i64| {
+                        let binned_nanos = stride_fn(stride, x * NANOS_PER_MICRO, origin);
+                        let nanos = binned_nanos % (NANOSECONDS_IN_DAY);
+                        nanos / NANOS_PER_MICRO
+                    };
+                    let array: PrimitiveArray<Time64MicrosecondType> =
+                        array.unary(apply_stride_fn);
+                    ColumnarValue::Array(Arc::new(array))
+                }
+                Time64(Nanosecond) => {
+                    if !is_time {
+                        return exec_err!(
+                            "DATE_BIN with Time64 source requires Time64 origin"
+                        );
+                    }
+                    let array = array.as_primitive::<Time64NanosecondType>();
+                    let apply_stride_fn = move |x: i64| {
+                        let binned_nanos = stride_fn(stride, x, origin);
+                        binned_nanos % (NANOSECONDS_IN_DAY)
+                    };
+                    let array: PrimitiveArray<Time64NanosecondType> =
+                        array.unary(apply_stride_fn);
+                    ColumnarValue::Array(Arc::new(array))
+                }
                 _ => {
                     return exec_err!(
-                        "DATE_BIN expects source argument to be a TIMESTAMP but got {}",
+                        "DATE_BIN expects source argument to be a TIMESTAMP or TIME but got {}",
                         array.data_type()
                     );
                 }
@@ -491,7 +749,7 @@ fn date_bin_impl(
         }
         _ => {
             return exec_err!(
-                "DATE_BIN expects source argument to be a TIMESTAMP scalar or array"
+                "DATE_BIN expects source argument to be a TIMESTAMP or TIME scalar or array"
             );
         }
     })
@@ -501,7 +759,7 @@ fn date_bin_impl(
 mod tests {
     use std::sync::Arc;
 
-    use crate::datetime::date_bin::{date_bin_nanos_interval, DateBinFunc};
+    use crate::datetime::date_bin::{DateBinFunc, date_bin_nanos_interval};
     use arrow::array::types::TimestampNanosecondType;
     use arrow::array::{Array, IntervalDayTimeArray, TimestampNanosecondArray};
     use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
@@ -687,7 +945,7 @@ mod tests {
         let res = invoke_date_bin_with_args(args, 1, return_field);
         assert_eq!(
             res.err().unwrap().strip_backtrace(),
-            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(µs)"
+            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision or a TIME but got Timestamp(µs)"
         );
 
         args = vec![
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index aa23a5028dd81..375200d07280b 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -21,15 +21,16 @@ use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
-use arrow::compute::{binary, date_part, DatePart};
+use arrow::compute::{DatePart, binary, date_part};
 use arrow::datatypes::DataType::{
     Date32, Date64, Duration, Interval, Time32, Time64, Timestamp,
 };
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit};
-use datafusion_common::types::{logical_date, NativeType};
+use datafusion_common::types::{NativeType, logical_date};
 
 use datafusion_common::{
+    Result, ScalarValue,
     cast::{
         as_date32_array, as_date64_array, as_int32_array, as_time32_millisecond_array,
         as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array,
@@ -39,7 +40,6 @@ use datafusion_common::{
     exec_err, internal_err, not_impl_err,
     types::logical_string,
     utils::take_function_args,
-    Result, ScalarValue,
 };
 use datafusion_expr::{
     ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature,
diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
index 913e6217af82d..aca1d24c3116a 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -34,12 +34,12 @@ use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
 use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
 };
-use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::TypeSignature::Exact;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -276,6 +276,7 @@ impl ScalarUDFImpl for DateTruncFunc {
                     T::UNIT,
                     array,
                     granularity,
+                    tz_opt.clone(),
                 )?;
                 return Ok(ColumnarValue::Array(result));
             }
@@ -340,7 +341,9 @@ impl ScalarUDFImpl for DateTruncFunc {
                         )?,
                     }
                 } else {
-                    return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
+                    return exec_err!(
+                        "second argument of `date_trunc` is an unsupported array type: {array_type}"
+                    );
                 }
             }
             _ => {
@@ -522,6 +525,7 @@ fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
     tu: TimeUnit,
     array: &PrimitiveArray<T>,
     granularity: DateTruncGranularity,
+    tz_opt: Option<Arc<str>>,
 ) -> Result<ArrayRef> {
     let unit = match (tu, granularity) {
         (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
@@ -556,7 +560,8 @@ fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
                 .iter()
                 .map(|v| *v - i64::rem_euclid(*v, unit)),
             array.nulls().cloned(),
-        );
+        )
+        .with_timezone_opt(tz_opt);
         Ok(Arc::new(array))
     } else {
         // truncate to the same or smaller unit
@@ -626,7 +631,7 @@ mod tests {
     use std::sync::Arc;
 
     use crate::datetime::date_trunc::{
-        date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
+        DateTruncFunc, DateTruncGranularity, date_trunc_coarse,
     };
 
     use arrow::array::cast::as_primitive_array;
@@ -634,8 +639,8 @@ mod tests {
     use arrow::array::{Array, TimestampNanosecondArray};
     use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
     use arrow::datatypes::{DataType, Field, TimeUnit};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
     #[test]
@@ -1094,4 +1099,176 @@ mod tests {
             }
         });
     }
+
+    #[test]
+    fn test_date_trunc_fine_granularity_timezones() {
+        let cases = [
+            // Test "second" granularity
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:42:30.500000Z",
+                    "2020-09-08T13:42:31.999999Z",
+                ],
+                Some("+00".into()),
+                "second",
+                vec![
+                    "2020-09-08T13:42:29.000000Z",
+                    "2020-09-08T13:42:30.000000Z",
+                    "2020-09-08T13:42:31.000000Z",
+                ],
+            ),
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855+05",
+                    "2020-09-08T13:42:30.500000+05",
+                    "2020-09-08T13:42:31.999999+05",
+                ],
+                Some("+05".into()),
+                "second",
+                vec![
+                    "2020-09-08T13:42:29.000000+05",
+                    "2020-09-08T13:42:30.000000+05",
+                    "2020-09-08T13:42:31.000000+05",
+                ],
+            ),
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:42:30.500000Z",
+                    "2020-09-08T13:42:31.999999Z",
+                ],
+                Some("Europe/Berlin".into()),
+                "second",
+                vec![
+                    "2020-09-08T13:42:29.000000Z",
+                    "2020-09-08T13:42:30.000000Z",
+                    "2020-09-08T13:42:31.000000Z",
+                ],
+            ),
+            // Test "minute" granularity
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:43:30.500000Z",
+                    "2020-09-08T13:44:31.999999Z",
+                ],
+                Some("+00".into()),
+                "minute",
+                vec![
+                    "2020-09-08T13:42:00.000000Z",
+                    "2020-09-08T13:43:00.000000Z",
+                    "2020-09-08T13:44:00.000000Z",
+                ],
+            ),
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855+08",
+                    "2020-09-08T13:43:30.500000+08",
+                    "2020-09-08T13:44:31.999999+08",
+                ],
+                Some("+08".into()),
+                "minute",
+                vec![
+                    "2020-09-08T13:42:00.000000+08",
+                    "2020-09-08T13:43:00.000000+08",
+                    "2020-09-08T13:44:00.000000+08",
+                ],
+            ),
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:43:30.500000Z",
+                    "2020-09-08T13:44:31.999999Z",
+                ],
+                Some("America/Sao_Paulo".into()),
+                "minute",
+                vec![
+                    "2020-09-08T13:42:00.000000Z",
+                    "2020-09-08T13:43:00.000000Z",
+                    "2020-09-08T13:44:00.000000Z",
+                ],
+            ),
+            // Test with None (no timezone)
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:43:30.500000Z",
+                    "2020-09-08T13:44:31.999999Z",
+                ],
+                None,
+                "minute",
+                vec![
+                    "2020-09-08T13:42:00.000000Z",
+                    "2020-09-08T13:43:00.000000Z",
+                    "2020-09-08T13:44:00.000000Z",
+                ],
+            ),
+            // Test millisecond granularity
+            (
+                vec![
+                    "2020-09-08T13:42:29.190855Z",
+                    "2020-09-08T13:42:29.191999Z",
+                    "2020-09-08T13:42:29.192500Z",
+                ],
+                Some("Asia/Kolkata".into()),
+                "millisecond",
+                vec![
+                    "2020-09-08T19:12:29.190000+05:30",
+                    "2020-09-08T19:12:29.191000+05:30",
+                    "2020-09-08T19:12:29.192000+05:30",
+                ],
+            ),
+        ];
+
+        cases
+            .iter()
+            .for_each(|(original, tz_opt, granularity, expected)| {
+                let input = original
+                    .iter()
+                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
+                    .collect::<TimestampNanosecondArray>()
+                    .with_timezone_opt(tz_opt.clone());
+                let right = expected
+                    .iter()
+                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
+                    .collect::<TimestampNanosecondArray>()
+                    .with_timezone_opt(tz_opt.clone());
+                let batch_len = input.len();
+                let arg_fields = vec![
+                    Field::new("a", DataType::Utf8, false).into(),
+                    Field::new("b", input.data_type().clone(), false).into(),
+                ];
+                let args = datafusion_expr::ScalarFunctionArgs {
+                    args: vec![
+                        ColumnarValue::Scalar(ScalarValue::from(*granularity)),
+                        ColumnarValue::Array(Arc::new(input)),
+                    ],
+                    arg_fields,
+                    number_rows: batch_len,
+                    return_field: Field::new(
+                        "f",
+                        DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
+                        true,
+                    )
+                    .into(),
+                    config_options: Arc::new(ConfigOptions::default()),
+                };
+                let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
+                if let ColumnarValue::Array(result) = result {
+                    assert_eq!(
+                        result.data_type(),
+                        &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
+                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
+                    );
+                    let left = as_primitive_array::<TimestampNanosecondType>(&result);
+                    assert_eq!(
+                        left, &right,
+                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
+                    );
+                } else {
+                    panic!("unexpected column type");
+                }
+            });
+    }
 }
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs
index 5d6adfb6f119a..574564dd4502f 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
 use arrow::datatypes::TimeUnit::Second;
 use arrow::datatypes::{DataType, Field, FieldRef};
-use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility,
@@ -164,9 +164,9 @@ mod test {
     use crate::datetime::from_unixtime::FromUnixtimeFunc;
     use arrow::datatypes::TimeUnit::Second;
     use arrow::datatypes::{DataType, Field};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
     use datafusion_common::ScalarValue::Int64;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
     use std::sync::Arc;
 
diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs
index 0fe5d156a8383..8a87809c4394c 100644
--- a/datafusion/functions/src/datetime/make_date.rs
+++ b/datafusion/functions/src/datetime/make_date.rs
@@ -21,14 +21,16 @@ use std::sync::Arc;
 use arrow::array::builder::PrimitiveBuilder;
 use arrow::array::cast::AsArray;
 use arrow::array::types::{Date32Type, Int32Type};
-use arrow::array::PrimitiveArray;
+use arrow::array::{Array, PrimitiveArray};
 use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf8View};
+use arrow::datatypes::DataType::Date32;
 use chrono::prelude::*;
 
-use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
+use datafusion_common::types::{NativeType, logical_int32, logical_string};
+use datafusion_common::{Result, ScalarValue, exec_err, utils::take_function_args};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass,
+    Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -51,7 +53,7 @@ use datafusion_macros::user_doc;
 +-----------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "year",
@@ -79,12 +81,16 @@ impl Default for MakeDateFunc {
 
 impl MakeDateFunc {
     pub fn new() -> Self {
+        let int = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![
+                TypeSignatureClass::Integer,
+                TypeSignatureClass::Native(logical_string()),
+            ],
+            NativeType::Int32,
+        );
         Self {
-            signature: Signature::uniform(
-                3,
-                vec![Int32, Int64, UInt32, UInt64, Utf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::coercible(vec![int; 3], Volatility::Immutable),
         }
     }
 }
@@ -110,87 +116,59 @@ impl ScalarUDFImpl for MakeDateFunc {
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        // first, identify if any of the arguments is an Array. If yes, store its `len`,
-        // as any scalar will need to be converted to an array of len `len`.
-        let args = args.args;
-        let len = args
-            .iter()
-            .fold(Option::<usize>::None, |acc, arg| match arg {
-                ColumnarValue::Scalar(_) => acc,
-                ColumnarValue::Array(a) => Some(a.len()),
-            });
+        let [years, months, days] = take_function_args(self.name(), args.args)?;
 
-        let [years, months, days] = take_function_args(self.name(), args)?;
-
-        if matches!(years, ColumnarValue::Scalar(ScalarValue::Null))
-            || matches!(months, ColumnarValue::Scalar(ScalarValue::Null))
-            || matches!(days, ColumnarValue::Scalar(ScalarValue::Null))
-        {
-            return Ok(ColumnarValue::Scalar(ScalarValue::Null));
-        }
-
-        let years = years.cast_to(&Int32, None)?;
-        let months = months.cast_to(&Int32, None)?;
-        let days = days.cast_to(&Int32, None)?;
-
-        let scalar_value_fn = |col: &ColumnarValue| -> Result<i32> {
-            let ColumnarValue::Scalar(s) = col else {
-                return exec_err!("Expected scalar value");
-            };
-            let ScalarValue::Int32(Some(i)) = s else {
-                return exec_err!("Unable to parse date from null/empty value");
-            };
-            Ok(*i)
-        };
-
-        let value = if let Some(array_size) = len {
-            let to_primitive_array_fn =
-                |col: &ColumnarValue| -> PrimitiveArray<Int32Type> {
-                    match col {
-                        ColumnarValue::Array(a) => {
-                            a.as_primitive::<Int32Type>().to_owned()
-                        }
-                        _ => {
-                            let v = scalar_value_fn(col).unwrap();
-                            PrimitiveArray::<Int32Type>::from_value(v, array_size)
-                        }
+        match (years, months, days) {
+            (ColumnarValue::Scalar(y), _, _) if y.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+            }
+            (_, ColumnarValue::Scalar(m), _) if m.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+            }
+            (_, _, ColumnarValue::Scalar(d)) if d.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+            }
+            (
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(years))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(months))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(days))),
+            ) => {
+                let mut value = 0;
+                make_date_inner(years, months, days, |days: i32| value = days)?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(value))))
+            }
+            (years, months, days) => {
+                let len = args.number_rows;
+                let years = years.into_array(len)?;
+                let months = months.into_array(len)?;
+                let days = days.into_array(len)?;
+
+                let years = years.as_primitive::<Int32Type>();
+                let months = months.as_primitive::<Int32Type>();
+                let days = days.as_primitive::<Int32Type>();
+
+                let mut builder: PrimitiveBuilder<Date32Type> =
+                    PrimitiveArray::builder(len);
+
+                for i in 0..len {
+                    // match postgresql behaviour which returns null for any null input
+                    if years.is_null(i) || months.is_null(i) || days.is_null(i) {
+                        builder.append_null();
+                    } else {
+                        make_date_inner(
+                            years.value(i),
+                            months.value(i),
+                            days.value(i),
+                            |days: i32| builder.append_value(days),
+                        )?;
                     }
-                };
-
-            let years = to_primitive_array_fn(&years);
-            let months = to_primitive_array_fn(&months);
-            let days = to_primitive_array_fn(&days);
+                }
 
-            let mut builder: PrimitiveBuilder<Date32Type> =
-                PrimitiveArray::builder(array_size);
-            for i in 0..array_size {
-                make_date_inner(
-                    years.value(i),
-                    months.value(i),
-                    days.value(i),
-                    |days: i32| builder.append_value(days),
-                )?;
+                Ok(ColumnarValue::Array(Arc::new(builder.finish())))
             }
-
-            let arr = builder.finish();
-
-            ColumnarValue::Array(Arc::new(arr))
-        } else {
-            // For scalar only columns the operation is faster without using the PrimitiveArray.
-            // Also, keep the output as scalar since all inputs are scalar.
-            let mut value = 0;
-            make_date_inner(
-                scalar_value_fn(&years)?,
-                scalar_value_fn(&months)?,
-                scalar_value_fn(&days)?,
-                |days: i32| value = days,
-            )?;
-
-            ColumnarValue::Scalar(ScalarValue::Date32(Some(value)))
-        };
-
-        Ok(value)
+        }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -204,11 +182,13 @@ fn make_date_inner<F: FnMut(i32)>(
     day: i32,
     mut date_consumer_fn: F,
 ) -> Result<()> {
-    let Ok(m) = u32::try_from(month) else {
-        return exec_err!("Month value '{month:?}' is out of range");
+    let m = match month {
+        1..=12 => month as u32,
+        _ => return exec_err!("Month value '{month:?}' is out of range"),
     };
-    let Ok(d) = u32::try_from(day) else {
-        return exec_err!("Day value '{day:?}' is out of range");
+    let d = match day {
+        1..=31 => day as u32,
+        _ => return exec_err!("Day value '{day:?}' is out of range"),
     };
 
     if let Some(date) = NaiveDate::from_ymd_opt(year, m, d) {
@@ -225,180 +205,3 @@ fn make_date_inner<F: FnMut(i32)>(
         exec_err!("Unable to parse date from {year}, {month}, {day}")
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use crate::datetime::make_date::MakeDateFunc;
-    use arrow::array::{Array, Date32Array, Int32Array, Int64Array, UInt32Array};
-    use arrow::datatypes::{DataType, Field};
-    use datafusion_common::config::ConfigOptions;
-    use datafusion_common::{DataFusionError, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
-    use std::sync::Arc;
-
-    fn invoke_make_date_with_args(
-        args: Vec<ColumnarValue>,
-        number_rows: usize,
-    ) -> Result<ColumnarValue, DataFusionError> {
-        let arg_fields = args
-            .iter()
-            .map(|arg| Field::new("a", arg.data_type(), true).into())
-            .collect::<Vec<_>>();
-        let args = datafusion_expr::ScalarFunctionArgs {
-            args,
-            arg_fields,
-            number_rows,
-            return_field: Field::new("f", DataType::Date32, true).into(),
-            config_options: Arc::new(ConfigOptions::default()),
-        };
-        MakeDateFunc::new().invoke_with_args(args)
-    }
-
-    #[test]
-    fn test_make_date() {
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int32(Some(2024))),
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
-                ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
-            ],
-            1,
-        )
-        .expect("that make_date parsed values without error");
-
-        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
-            assert_eq!(19736, date.unwrap());
-        } else {
-            panic!("Expected a scalar value")
-        }
-
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(2024))),
-                ColumnarValue::Scalar(ScalarValue::UInt64(Some(1))),
-                ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
-            ],
-            1,
-        )
-        .expect("that make_date parsed values without error");
-
-        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
-            assert_eq!(19736, date.unwrap());
-        } else {
-            panic!("Expected a scalar value")
-        }
-
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Utf8(Some("2024".to_string()))),
-                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("1".to_string()))),
-                ColumnarValue::Scalar(ScalarValue::Utf8(Some("14".to_string()))),
-            ],
-            1,
-        )
-        .expect("that make_date parsed values without error");
-
-        if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
-            assert_eq!(19736, date.unwrap());
-        } else {
-            panic!("Expected a scalar value")
-        }
-
-        let years = Arc::new((2021..2025).map(Some).collect::<Int64Array>());
-        let months = Arc::new((1..5).map(Some).collect::<Int32Array>());
-        let days = Arc::new((11..15).map(Some).collect::<UInt32Array>());
-        let batch_len = years.len();
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Array(years),
-                ColumnarValue::Array(months),
-                ColumnarValue::Array(days),
-            ],
-            batch_len,
-        )
-        .unwrap();
-
-        if let ColumnarValue::Array(array) = res {
-            assert_eq!(array.len(), 4);
-            let mut builder = Date32Array::builder(4);
-            builder.append_value(18_638);
-            builder.append_value(19_035);
-            builder.append_value(19_429);
-            builder.append_value(19_827);
-            assert_eq!(&builder.finish() as &dyn Array, array.as_ref());
-        } else {
-            panic!("Expected a columnar array")
-        }
-
-        //
-        // Fallible test cases
-        //
-
-        // invalid number of arguments
-        let res = invoke_make_date_with_args(
-            vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))],
-            1,
-        );
-        assert_eq!(
-            res.err().unwrap().strip_backtrace(),
-            "Execution error: make_date function requires 3 arguments, got 1"
-        );
-
-        // invalid type
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
-                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
-                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
-            ],
-            1,
-        );
-        assert_eq!(
-            res.err().unwrap().strip_backtrace(),
-            "Arrow error: Cast error: Casting from Interval(YearMonth) to Int32 not supported"
-        );
-
-        // overflow of month
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
-                ColumnarValue::Scalar(ScalarValue::UInt64(Some(u64::MAX))),
-                ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
-            ],
-            1,
-        );
-        assert_eq!(
-            res.err().unwrap().strip_backtrace(),
-            "Arrow error: Cast error: Can't cast value 18446744073709551615 to type Int32"
-        );
-
-        // overflow of day
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
-                ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
-                ColumnarValue::Scalar(ScalarValue::UInt32(Some(u32::MAX))),
-            ],
-            1,
-        );
-        assert_eq!(
-            res.err().unwrap().strip_backtrace(),
-            "Arrow error: Cast error: Can't cast value 4294967295 to type Int32"
-        );
-    }
-
-    #[test]
-    fn test_make_date_null_param() {
-        let res = invoke_make_date_with_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Null),
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
-                ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
-            ],
-            1,
-        )
-        .expect("that make_date parsed values without error");
-
-        assert!(matches!(res, ColumnarValue::Scalar(ScalarValue::Null)));
-    }
-}
diff --git a/datafusion/functions/src/datetime/make_time.rs b/datafusion/functions/src/datetime/make_time.rs
new file mode 100644
index 0000000000000..5775ce851af77
--- /dev/null
+++ b/datafusion/functions/src/datetime/make_time.rs
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::builder::PrimitiveBuilder;
+use arrow::array::cast::AsArray;
+use arrow::array::types::Int32Type;
+use arrow::array::{Array, PrimitiveArray};
+use arrow::datatypes::DataType::Time32;
+use arrow::datatypes::{DataType, Time32SecondType, TimeUnit};
+use chrono::prelude::*;
+
+use datafusion_common::types::{NativeType, logical_int32, logical_string};
+use datafusion_common::{Result, ScalarValue, exec_err, utils::take_function_args};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
+use datafusion_macros::user_doc;
+
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = "Make a time from hour/minute/second component parts.",
+    syntax_example = "make_time(hour, minute, second)",
+    sql_example = r#"```sql
+> select make_time(13, 23, 1);
++-------------------------------------------+
+| make_time(Int64(13),Int64(23),Int64(1))   |
++-------------------------------------------+
+| 13:23:01                                  |
++-------------------------------------------+
+> select make_time('23', '01', '31');
++-----------------------------------------------+
+| make_time(Utf8("23"),Utf8("01"),Utf8("31"))   |
++-----------------------------------------------+
+| 23:01:31                                      |
++-----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
+"#,
+    argument(
+        name = "hour",
+        description = "Hour to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators."
+    ),
+    argument(
+        name = "minute",
+        description = "Minute to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators."
+    ),
+    argument(
+        name = "second",
+        description = "Second to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators."
+    )
+)]
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct MakeTimeFunc {
+    signature: Signature,
+}
+
+impl Default for MakeTimeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl MakeTimeFunc {
+    pub fn new() -> Self {
+        let int = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![
+                TypeSignatureClass::Integer,
+                TypeSignatureClass::Native(logical_string()),
+            ],
+            NativeType::Int32,
+        );
+        Self {
+            signature: Signature::coercible(vec![int; 3], Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for MakeTimeFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "make_time"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Time32(TimeUnit::Second))
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: datafusion_expr::ScalarFunctionArgs,
+    ) -> Result<ColumnarValue> {
+        let [hours, minutes, seconds] = take_function_args(self.name(), args.args)?;
+
+        match (hours, minutes, seconds) {
+            (ColumnarValue::Scalar(h), _, _) if h.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Time32Second(None)))
+            }
+            (_, ColumnarValue::Scalar(m), _) if m.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Time32Second(None)))
+            }
+            (_, _, ColumnarValue::Scalar(s)) if s.is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Time32Second(None)))
+            }
+            (
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(hours))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(minutes))),
+                ColumnarValue::Scalar(ScalarValue::Int32(Some(seconds))),
+            ) => {
+                let mut value = 0;
+                make_time_inner(hours, minutes, seconds, |seconds: i32| value = seconds)?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Time32Second(Some(
+                    value,
+                ))))
+            }
+            (hours, minutes, seconds) => {
+                let len = args.number_rows;
+                let hours = hours.into_array(len)?;
+                let minutes = minutes.into_array(len)?;
+                let seconds = seconds.into_array(len)?;
+
+                let hours = hours.as_primitive::<Int32Type>();
+                let minutes = minutes.as_primitive::<Int32Type>();
+                let seconds = seconds.as_primitive::<Int32Type>();
+
+                let mut builder: PrimitiveBuilder<Time32SecondType> =
+                    PrimitiveArray::builder(len);
+
+                for i in 0..len {
+                    // match postgresql behaviour which returns null for any null input
+                    if hours.is_null(i) || minutes.is_null(i) || seconds.is_null(i) {
+                        builder.append_null();
+                    } else {
+                        make_time_inner(
+                            hours.value(i),
+                            minutes.value(i),
+                            seconds.value(i),
+                            |seconds: i32| builder.append_value(seconds),
+                        )?;
+                    }
+                }
+
+                Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+            }
+        }
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+/// Converts the hour/minute/second fields to an `i32` representing the seconds from
+/// midnight and invokes `time_consumer_fn` with the value
+fn make_time_inner<F: FnMut(i32)>(
+    hour: i32,
+    minute: i32,
+    second: i32,
+    mut time_consumer_fn: F,
+) -> Result<()> {
+    let h = match hour {
+        0..=24 => hour as u32,
+        _ => return exec_err!("Hour value '{hour:?}' is out of range"),
+    };
+    let m = match minute {
+        0..=60 => minute as u32,
+        _ => return exec_err!("Minute value '{minute:?}' is out of range"),
+    };
+    let s = match second {
+        0..=60 => second as u32,
+        _ => return exec_err!("Second value '{second:?}' is out of range"),
+    };
+
+    if let Some(time) = NaiveTime::from_hms_opt(h, m, s) {
+        time_consumer_fn(time.num_seconds_from_midnight() as i32);
+        Ok(())
+    } else {
+        exec_err!("Unable to parse time from {hour}, {minute}, {second}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::datetime::make_time::MakeTimeFunc;
+    use arrow::array::{Array, Int32Array, Time32SecondArray};
+    use arrow::datatypes::TimeUnit::Second;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::DataFusionError;
+    use datafusion_common::config::ConfigOptions;
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use std::sync::Arc;
+
+    fn invoke_make_time_with_args(
+        args: Vec<ColumnarValue>,
+        number_rows: usize,
+    ) -> Result<ColumnarValue, DataFusionError> {
+        let arg_fields = args
+            .iter()
+            .map(|arg| Field::new("a", arg.data_type(), true).into())
+            .collect::<Vec<_>>();
+        let args = datafusion_expr::ScalarFunctionArgs {
+            args,
+            arg_fields,
+            number_rows,
+            return_field: Field::new("f", DataType::Time32(Second), true).into(),
+            config_options: Arc::new(ConfigOptions::default()),
+        };
+
+        MakeTimeFunc::new().invoke_with_args(args)
+    }
+
+    #[test]
+    fn test_make_time() {
+        let hours = Arc::new((4..8).map(Some).collect::<Int32Array>());
+        let minutes = Arc::new((1..5).map(Some).collect::<Int32Array>());
+        let seconds = Arc::new((11..15).map(Some).collect::<Int32Array>());
+        let batch_len = hours.len();
+        let res = invoke_make_time_with_args(
+            vec![
+                ColumnarValue::Array(hours),
+                ColumnarValue::Array(minutes),
+                ColumnarValue::Array(seconds),
+            ],
+            batch_len,
+        )
+        .unwrap();
+
+        if let ColumnarValue::Array(array) = res {
+            assert_eq!(array.len(), 4);
+
+            let mut builder = Time32SecondArray::builder(4);
+            builder.append_value(14_471);
+            builder.append_value(18_132);
+            builder.append_value(21_793);
+            builder.append_value(25_454);
+            assert_eq!(&builder.finish() as &dyn Array, array.as_ref());
+        } else {
+            panic!("Expected a columnar array")
+        }
+    }
+}
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index d80f14facf822..100c20a646003 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -29,6 +29,7 @@ pub mod date_part;
 pub mod date_trunc;
 pub mod from_unixtime;
 pub mod make_date;
+pub mod make_time;
 pub mod now;
 pub mod planner;
 pub mod to_char;
@@ -44,16 +45,20 @@ make_udf_function!(date_bin::DateBinFunc, date_bin);
 make_udf_function!(date_part::DatePartFunc, date_part);
 make_udf_function!(date_trunc::DateTruncFunc, date_trunc);
 make_udf_function!(make_date::MakeDateFunc, make_date);
+make_udf_function!(make_time::MakeTimeFunc, make_time);
 make_udf_function!(from_unixtime::FromUnixtimeFunc, from_unixtime);
 make_udf_function!(to_char::ToCharFunc, to_char);
 make_udf_function!(to_date::ToDateFunc, to_date);
 make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time);
 make_udf_function!(to_unixtime::ToUnixtimeFunc, to_unixtime);
-make_udf_function!(to_timestamp::ToTimestampFunc, to_timestamp);
-make_udf_function!(to_timestamp::ToTimestampSecondsFunc, to_timestamp_seconds);
-make_udf_function!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis);
-make_udf_function!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros);
-make_udf_function!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos);
+make_udf_function_with_config!(to_timestamp::ToTimestampFunc, to_timestamp);
+make_udf_function_with_config!(
+    to_timestamp::ToTimestampSecondsFunc,
+    to_timestamp_seconds
+);
+make_udf_function_with_config!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis);
+make_udf_function_with_config!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros);
+make_udf_function_with_config!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos);
 
 // create UDF with config
 make_udf_function_with_config!(now::NowFunc, now);
@@ -90,6 +95,10 @@ pub mod expr_fn {
         make_date,
         "make a date from year, month and day component parts",
         year month day
+    ),(
+        make_time,
+        "make a time from hour, minute and second component parts",
+        hour minute second
     ),(
         now,
         "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
@@ -102,28 +111,28 @@ pub mod expr_fn {
     ),
     (
         to_unixtime,
-        "converts a string and optional formats to a Unixtime",
+        "converts a value to seconds since the unix epoch",
         args,
     ),(
         to_timestamp,
-        "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`",
-        args,
+        "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`",
+        @config args,
     ),(
         to_timestamp_seconds,
-        "converts a string and optional formats to a `Timestamp(Seconds, None)`",
-        args,
+        "converts a string and optional formats to a `Timestamp(Seconds, TimeZone)`",
+        @config args,
     ),(
         to_timestamp_millis,
-        "converts a string and optional formats to a `Timestamp(Milliseconds, None)`",
-        args,
+        "converts a string and optional formats to a `Timestamp(Milliseconds, TimeZone)`",
+        @config args,
     ),(
         to_timestamp_micros,
-        "converts a string and optional formats to a `Timestamp(Microseconds, None)`",
-        args,
+        "converts a string and optional formats to a `Timestamp(Microseconds, TimeZone)`",
+        @config args,
     ),(
         to_timestamp_nanos,
-        "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`",
-        args,
+        "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`",
+        @config args,
     ));
 
     /// Returns a string representation of a date, time, timestamp or duration based
@@ -259,6 +268,7 @@ pub mod expr_fn {
 /// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     use datafusion_common::config::ConfigOptions;
+    let config = ConfigOptions::default();
     vec![
         current_date(),
         current_time(),
@@ -267,15 +277,16 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         date_trunc(),
         from_unixtime(),
         make_date(),
-        now(&ConfigOptions::default()),
+        make_time(),
+        now(&config),
         to_char(),
         to_date(),
         to_local_time(),
         to_unixtime(),
-        to_timestamp(),
-        to_timestamp_seconds(),
-        to_timestamp_millis(),
-        to_timestamp_micros(),
-        to_timestamp_nanos(),
+        to_timestamp(&config),
+        to_timestamp_seconds(&config),
+        to_timestamp_millis(&config),
+        to_timestamp_micros(&config),
+        to_timestamp_nanos(&config),
     ]
 }
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 4723548a45584..b804efe59106d 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -22,7 +22,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDF, ScalarUDFImpl,
@@ -148,7 +148,7 @@ impl ScalarUDFImpl for NowFunc {
 mod tests {
     use super::*;
 
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     #[test]
     fn now_func_default_matches_config() {
         let default_config = ConfigOptions::default();
diff --git a/datafusion/functions/src/datetime/planner.rs b/datafusion/functions/src/datetime/planner.rs
index f4b64c3711e2c..f2b8ef9d1d310 100644
--- a/datafusion/functions/src/datetime/planner.rs
+++ b/datafusion/functions/src/datetime/planner.rs
@@ -16,9 +16,9 @@
 // under the License.
 
 //! SQL planning extensions like [`DatetimeFunctionPlanner`]
+use datafusion_expr::Expr;
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::planner::{ExprPlanner, PlannerResult};
-use datafusion_expr::Expr;
 
 #[derive(Default, Debug)]
 pub struct DatetimeFunctionPlanner;
diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs
index 7d9b2bc241e1a..8d0c47cfe664c 100644
--- a/datafusion/functions/src/datetime/to_char.rs
+++ b/datafusion/functions/src/datetime/to_char.rs
@@ -19,7 +19,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::cast::AsArray;
-use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
+use arrow::array::{Array, ArrayRef, StringArray, new_null_array};
 use arrow::compute::cast;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{
@@ -28,10 +28,10 @@ use arrow::datatypes::DataType::{
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::error::ArrowError;
 use arrow::util::display::{ArrayFormatter, DurationFormat, FormatOptions};
-use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, utils::take_function_args};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -48,7 +48,7 @@ use datafusion_macros::user_doc;
 +----------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -144,13 +144,11 @@ impl ScalarUDFImpl for ToCharFunc {
 
         match format {
             ColumnarValue::Scalar(ScalarValue::Utf8(None))
-            | ColumnarValue::Scalar(ScalarValue::Null) => {
-                to_char_scalar(date_time.clone(), None)
-            }
+            | ColumnarValue::Scalar(ScalarValue::Null) => to_char_scalar(date_time, None),
             // constant format
             ColumnarValue::Scalar(ScalarValue::Utf8(Some(format))) => {
                 // invoke to_char_scalar with the known string, without converting to array
-                to_char_scalar(date_time.clone(), Some(format))
+                to_char_scalar(date_time, Some(format))
             }
             ColumnarValue::Array(_) => to_char_array(&args),
             _ => {
@@ -206,7 +204,7 @@ fn build_format_options<'a>(
 
 /// Special version when arg\[1] is a scalar
 fn to_char_scalar(
-    expression: ColumnarValue,
+    expression: &ColumnarValue,
     format: Option<&str>,
 ) -> Result<ColumnarValue> {
     // it's possible that the expression is a scalar however because
@@ -253,7 +251,7 @@ fn to_char_scalar(
         // if the data type was a Date32, formatting could have failed because the format string
         // contained datetime specifiers, so we'll retry by casting the date array as a timestamp array
         if data_type == &Date32 {
-            return to_char_scalar(expression.clone().cast_to(&Date64, None)?, format);
+            return to_char_scalar(&expression.cast_to(&Date64, None)?, format);
         }
 
         exec_err!("{}", formatted.unwrap_err())
@@ -292,7 +290,7 @@ fn to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                 if data_type == &Date32 {
                     let failed_date_value = arrays[0].slice(idx, 1);
 
-                    match retry_date_as_timestamp(failed_date_value, &format_options) {
+                    match retry_date_as_timestamp(&failed_date_value, &format_options) {
                         Ok(value) => {
                             results.push(Some(value));
                             continue;
@@ -322,7 +320,7 @@ fn to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 }
 
 fn retry_date_as_timestamp(
-    array_ref: ArrayRef,
+    array_ref: &ArrayRef,
     format_options: &FormatOptions,
 ) -> Result<String> {
     let target_data_type = Date64;
@@ -345,8 +343,8 @@ mod tests {
     };
     use arrow::datatypes::{DataType, Field, TimeUnit};
     use chrono::{NaiveDateTime, Timelike};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
     use std::sync::Arc;
 
diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs
index 3840c8d8bbb94..60c6fdf2df975 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -16,12 +16,13 @@
 // under the License.
 
 use crate::datetime::common::*;
+use arrow::compute::cast_with_options;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::*;
 use arrow::error::ArrowError::ParseError;
 use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
-use datafusion_common::error::DataFusionError;
-use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
+use datafusion_common::format::DEFAULT_CAST_OPTIONS;
+use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -31,7 +32,7 @@ use std::any::Any;
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = r"Converts a value to a date (`YYYY-MM-DD`).
-Supports strings, integer and double types as input.
+Supports strings, numeric and timestamp types as input.
 Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
 Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
 Returns the corresponding date.
@@ -53,7 +54,7 @@ Note: `to_date` returns Date32, which represents its values as the number of day
 +---------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     standard_argument(name = "expression", prefix = "String"),
     argument(
@@ -83,7 +84,7 @@ impl ToDateFunc {
 
     fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args.len() {
-            1 => handle::<Date32Type, _, Date32Type>(
+            1 => handle::<Date32Type, _>(
                 args,
                 |s| match Date32Type::parse(s) {
                     Some(v) => Ok(v),
@@ -93,8 +94,9 @@ impl ToDateFunc {
                     )),
                 },
                 "to_date",
+                &Date32,
             ),
-            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
+            2.. => handle_multiple::<Date32Type, _, _>(
                 args,
                 |s, format| {
                     string_to_timestamp_millis_formatted(s, format)
@@ -107,6 +109,7 @@ impl ToDateFunc {
                 },
                 |n| n,
                 "to_date",
+                &Date32,
             ),
             0 => exec_err!("Unsupported 0 argument count for function to_date"),
         }
@@ -145,9 +148,42 @@ impl ScalarUDFImpl for ToDateFunc {
         }
 
         match args[0].data_type() {
-            Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
+            Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
                 args[0].cast_to(&Date32, None)
             }
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
+                // Arrow cast doesn't support direct casting of these types to date32
+                // as it only supports Int32 and Int64. To work around that limitation,
+                // use cast_with_options to cast to Int32 and then cast the result of
+                // that to Date32.
+                match &args[0] {
+                    ColumnarValue::Array(array) => {
+                        Ok(ColumnarValue::Array(cast_with_options(
+                            &cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
+                            &Date32,
+                            &DEFAULT_CAST_OPTIONS,
+                        )?))
+                    }
+                    ColumnarValue::Scalar(scalar) => {
+                        let sv =
+                            scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
+                        Ok(ColumnarValue::Scalar(
+                            sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
+                        ))
+                    }
+                }
+            }
+            Float16
+            | Float32
+            | Float64
+            | Decimal32(_, _)
+            | Decimal64(_, _)
+            | Decimal128(_, _)
+            | Decimal256(_, _) => {
+                // The only way this makes sense is to get the Int64 value of the float
+                // or decimal and then cast that to Date32.
+                args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
+            }
             Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
             other => {
                 exec_err!("Unsupported data type {} for function to_date", other)
@@ -352,7 +388,11 @@ mod tests {
             match to_date_result {
                 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
                     let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
-                    assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
+                    assert_eq!(
+                        date_val, expected,
+                        "{}: to_date created wrong value for date '{}' with format string '{}'",
+                        tc.name, tc.formatted_date, tc.format_str
+                    );
                 }
                 _ => panic!(
                     "Could not convert '{}' with format string '{}'to Date",
@@ -386,7 +426,8 @@ mod tests {
                     builder.append_value(expected.unwrap());
 
                     assert_eq!(
-                        &builder.finish() as &dyn Array, a.as_ref(),
+                        &builder.finish() as &dyn Array,
+                        a.as_ref(),
                         "{}: to_date created wrong value for date '{}' with format string '{}'",
                         tc.name,
                         tc.formatted_date,
diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
index 6e0a150b0a35f..86c949711d011 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -20,7 +20,7 @@ use std::ops::Add;
 use std::sync::Arc;
 
 use arrow::array::timezone::Tz;
-use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
+use arrow::array::{ArrayRef, PrimitiveBuilder};
 use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{
@@ -31,11 +31,12 @@ use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
 
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{
-    exec_err, internal_datafusion_err, plan_err, utils::take_function_args, Result,
-    ScalarValue,
+    Result, ScalarValue, exec_err, internal_datafusion_err, internal_err,
+    utils::take_function_args,
 };
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass,
+    Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -111,133 +112,163 @@ impl Default for ToLocalTimeFunc {
 impl ToLocalTimeFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![Coercion::new_exact(TypeSignatureClass::Timestamp)],
+                Volatility::Immutable,
+            ),
         }
     }
+}
 
-    fn to_local_time(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let [time_value] = take_function_args(self.name(), args)?;
+impl ScalarUDFImpl for ToLocalTimeFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 
-        let arg_type = time_value.data_type();
-        match arg_type {
-            Timestamp(_, None) => {
-                // if no timezone specified, just return the input
-                Ok(time_value.clone())
-            }
-            // If has timezone, adjust the underlying time value. The current time value
-            // is stored as i64 in UTC, even though the timezone may not be in UTC. Therefore,
-            // we need to adjust the time value to the local time. See [`adjust_to_local_time`]
-            // for more details.
-            //
-            // Then remove the timezone in return type, i.e. return None
-            Timestamp(_, Some(timezone)) => {
-                let tz: Tz = timezone.parse()?;
-
-                match time_value {
-                    ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
-                        Some(ts),
-                        Some(_),
-                    )) => {
-                        let adjusted_ts =
-                            adjust_to_local_time::<TimestampNanosecondType>(*ts, tz)?;
-                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
-                            Some(adjusted_ts),
-                            None,
-                        )))
-                    }
-                    ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
-                        Some(ts),
-                        Some(_),
-                    )) => {
-                        let adjusted_ts =
-                            adjust_to_local_time::<TimestampMicrosecondType>(*ts, tz)?;
-                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
-                            Some(adjusted_ts),
-                            None,
-                        )))
-                    }
-                    ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
-                        Some(ts),
-                        Some(_),
-                    )) => {
-                        let adjusted_ts =
-                            adjust_to_local_time::<TimestampMillisecondType>(*ts, tz)?;
-                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
-                            Some(adjusted_ts),
-                            None,
-                        )))
-                    }
-                    ColumnarValue::Scalar(ScalarValue::TimestampSecond(
-                        Some(ts),
-                        Some(_),
-                    )) => {
-                        let adjusted_ts =
-                            adjust_to_local_time::<TimestampSecondType>(*ts, tz)?;
-                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampSecond(
-                            Some(adjusted_ts),
-                            None,
-                        )))
-                    }
-                    ColumnarValue::Array(array) => {
-                        fn transform_array<T: ArrowTimestampType>(
-                            array: &ArrayRef,
-                            tz: Tz,
-                        ) -> Result<ColumnarValue> {
-                            let mut builder = PrimitiveBuilder::<T>::new();
-
-                            let primitive_array = as_primitive_array::<T>(array)?;
-                            for ts_opt in primitive_array.iter() {
-                                match ts_opt {
-                                    None => builder.append_null(),
-                                    Some(ts) => {
-                                        let adjusted_ts: i64 =
-                                            adjust_to_local_time::<T>(ts, tz)?;
-                                        builder.append_value(adjusted_ts)
-                                    }
-                                }
-                            }
-
-                            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
-                        }
-
-                        match array.data_type() {
-                            Timestamp(_, None) => {
-                                // if no timezone specified, just return the input
-                                Ok(time_value.clone())
-                            }
-                            Timestamp(Nanosecond, Some(_)) => {
-                                transform_array::<TimestampNanosecondType>(array, tz)
-                            }
-                            Timestamp(Microsecond, Some(_)) => {
-                                transform_array::<TimestampMicrosecondType>(array, tz)
-                            }
-                            Timestamp(Millisecond, Some(_)) => {
-                                transform_array::<TimestampMillisecondType>(array, tz)
-                            }
-                            Timestamp(Second, Some(_)) => {
-                                transform_array::<TimestampSecondType>(array, tz)
-                            }
-                            _ => {
-                                exec_err!("to_local_time function requires timestamp argument in array, got {:?}", array.data_type())
-                            }
-                        }
-                    }
-                    _ => {
-                        exec_err!(
-                        "to_local_time function requires timestamp argument, got {:?}",
-                        time_value.data_type()
-                    )
-                    }
-                }
-            }
-            _ => {
-                exec_err!(
-                    "to_local_time function requires timestamp argument, got {:?}",
-                    arg_type
-                )
+    fn name(&self) -> &str {
+        "to_local_time"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            DataType::Null => Ok(Timestamp(Nanosecond, None)),
+            Timestamp(timeunit, _) => Ok(Timestamp(*timeunit, None)),
+            dt => internal_err!(
+                "The to_local_time function can only accept timestamp as the arg, got {dt}"
+            ),
+        }
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: datafusion_expr::ScalarFunctionArgs,
+    ) -> Result<ColumnarValue> {
+        let [time_value] = take_function_args(self.name(), &args.args)?;
+        to_local_time(time_value)
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+fn transform_array<T: ArrowTimestampType>(
+    array: &ArrayRef,
+    tz: Tz,
+) -> Result<ColumnarValue> {
+    let primitive_array = as_primitive_array::<T>(array)?;
+    let mut builder = PrimitiveBuilder::<T>::with_capacity(primitive_array.len());
+    for ts_opt in primitive_array.iter() {
+        match ts_opt {
+            None => builder.append_null(),
+            Some(ts) => {
+                let adjusted_ts: i64 = adjust_to_local_time::<T>(ts, tz)?;
+                builder.append_value(adjusted_ts)
             }
         }
     }
+
+    Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+}
+
+fn to_local_time(time_value: &ColumnarValue) -> Result<ColumnarValue> {
+    let arg_type = time_value.data_type();
+
+    let tz: Tz = match &arg_type {
+        Timestamp(_, Some(timezone)) => timezone.parse()?,
+        Timestamp(_, None) => {
+            // if no timezone specified, just return the input
+            return Ok(time_value.clone());
+        }
+        DataType::Null => {
+            return Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                None, None,
+            )));
+        }
+        dt => {
+            return internal_err!(
+                "to_local_time function requires timestamp argument, got {dt}"
+            );
+        }
+    };
+
+    // If has timezone, adjust the underlying time value. The current time value
+    // is stored as i64 in UTC, even though the timezone may not be in UTC. Therefore,
+    // we need to adjust the time value to the local time. See [`adjust_to_local_time`]
+    // for more details.
+    //
+    // Then remove the timezone in return type, i.e. return None
+    match time_value {
+        ColumnarValue::Scalar(ScalarValue::TimestampSecond(None, Some(_))) => Ok(
+            ColumnarValue::Scalar(ScalarValue::TimestampSecond(None, None)),
+        ),
+        ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(None, Some(_))) => Ok(
+            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(None, None)),
+        ),
+        ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(None, Some(_))) => Ok(
+            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(None, None)),
+        ),
+        ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, Some(_))) => Ok(
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, None)),
+        ),
+        ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), Some(_))) => {
+            let adjusted_ts = adjust_to_local_time::<TimestampNanosecondType>(*ts, tz)?;
+            Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                Some(adjusted_ts),
+                None,
+            )))
+        }
+        ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(Some(ts), Some(_))) => {
+            let adjusted_ts = adjust_to_local_time::<TimestampMicrosecondType>(*ts, tz)?;
+            Ok(ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                Some(adjusted_ts),
+                None,
+            )))
+        }
+        ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(Some(ts), Some(_))) => {
+            let adjusted_ts = adjust_to_local_time::<TimestampMillisecondType>(*ts, tz)?;
+            Ok(ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
+                Some(adjusted_ts),
+                None,
+            )))
+        }
+        ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(ts), Some(_))) => {
+            let adjusted_ts = adjust_to_local_time::<TimestampSecondType>(*ts, tz)?;
+            Ok(ColumnarValue::Scalar(ScalarValue::TimestampSecond(
+                Some(adjusted_ts),
+                None,
+            )))
+        }
+        ColumnarValue::Array(array)
+            if matches!(array.data_type(), Timestamp(Nanosecond, Some(_))) =>
+        {
+            transform_array::<TimestampNanosecondType>(array, tz)
+        }
+        ColumnarValue::Array(array)
+            if matches!(array.data_type(), Timestamp(Microsecond, Some(_))) =>
+        {
+            transform_array::<TimestampMicrosecondType>(array, tz)
+        }
+        ColumnarValue::Array(array)
+            if matches!(array.data_type(), Timestamp(Millisecond, Some(_))) =>
+        {
+            transform_array::<TimestampMillisecondType>(array, tz)
+        }
+        ColumnarValue::Array(array)
+            if matches!(array.data_type(), Timestamp(Second, Some(_))) =>
+        {
+            transform_array::<TimestampSecondType>(array, tz)
+        }
+        _ => {
+            internal_err!(
+                "to_local_time function requires timestamp argument, got {arg_type}"
+            )
+        }
+    }
 }
 
 /// This function converts a timestamp with a timezone to a timestamp without a timezone.
@@ -343,81 +374,19 @@ fn adjust_to_local_time<T: ArrowTimestampType>(ts: i64, tz: Tz) -> Result<i64> {
     }
 }
 
-impl ScalarUDFImpl for ToLocalTimeFunc {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "to_local_time"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let [time_value] = take_function_args(self.name(), arg_types)?;
-
-        match time_value {
-            Timestamp(timeunit, _) => Ok(Timestamp(*timeunit, None)),
-            _ => exec_err!(
-                "The to_local_time function can only accept timestamp as the arg, got {:?}", time_value
-            )
-        }
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: datafusion_expr::ScalarFunctionArgs,
-    ) -> Result<ColumnarValue> {
-        let [time_value] = take_function_args(self.name(), args.args)?;
-
-        self.to_local_time(std::slice::from_ref(&time_value))
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 1 {
-            return plan_err!(
-                "to_local_time function requires 1 argument, got {:?}",
-                arg_types.len()
-            );
-        }
-
-        let first_arg = arg_types[0].clone();
-        match &first_arg {
-            DataType::Null => Ok(vec![Timestamp(Nanosecond, None)]),
-            Timestamp(Nanosecond, timezone) => {
-                Ok(vec![Timestamp(Nanosecond, timezone.clone())])
-            }
-            Timestamp(Microsecond, timezone) => {
-                Ok(vec![Timestamp(Microsecond, timezone.clone())])
-            }
-            Timestamp(Millisecond, timezone) => {
-                Ok(vec![Timestamp(Millisecond, timezone.clone())])
-            }
-            Timestamp(Second, timezone) => Ok(vec![Timestamp(Second, timezone.clone())]),
-            _ => plan_err!("The to_local_time function can only accept Timestamp as the arg got {first_arg}"),
-        }
-    }
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
 
-    use arrow::array::{types::TimestampNanosecondType, Array, TimestampNanosecondArray};
+    use arrow::array::{Array, TimestampNanosecondArray, types::TimestampNanosecondType};
     use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
     use arrow::datatypes::{DataType, Field, TimeUnit};
     use chrono::NaiveDateTime;
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
 
-    use super::{adjust_to_local_time, ToLocalTimeFunc};
+    use super::{ToLocalTimeFunc, adjust_to_local_time};
 
     #[test]
     fn test_adjust_to_local_time() {
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index 0a0700097770f..58077694b07a0 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -20,25 +20,37 @@ use std::sync::Arc;
 
 use crate::datetime::common::*;
 use arrow::array::Float64Array;
+use arrow::array::timezone::Tz;
 use arrow::datatypes::DataType::*;
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{
-    ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
+    ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType,
 };
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::format::DEFAULT_CAST_OPTIONS;
-use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
+use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = r#"
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
-
-Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
+for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
+`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
+for the input outside of supported bounds.
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 "#,
     syntax_example = "to_timestamp(expression[, ..., format_n])",
     sql_example = r#"```sql
@@ -55,7 +67,7 @@ Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer in
 | 2023-05-17T03:59:00.123456789                                                                          |
 +--------------------------------------------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -63,17 +75,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
-    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
+    description = r#"
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
+"#,
     syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
     sql_example = r#"```sql
 > select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
@@ -89,7 +116,7 @@ pub struct ToTimestampFunc {
 | 2023-05-17T03:59:00                                                                                            |
 +----------------------------------------------------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -97,17 +124,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampSecondsFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
-    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
+    description = r#"
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
+"#,
     syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
     sql_example = r#"```sql
 > select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
@@ -123,7 +165,7 @@ pub struct ToTimestampSecondsFunc {
 | 2023-05-17T03:59:00.123                                                                                       |
 +---------------------------------------------------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -131,17 +173,32 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampMillisFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
-    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
+    description = r#"
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
+"#,
     syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
     sql_example = r#"```sql
 > select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
@@ -157,7 +214,7 @@ pub struct ToTimestampMillisFunc {
 | 2023-05-17T03:59:00.123456                                                                                    |
 +---------------------------------------------------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -165,17 +222,31 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampMicrosFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
-    description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
+    description = r#"
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
+"#,
     syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
     sql_example = r#"```sql
 > select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
@@ -191,7 +262,7 @@ pub struct ToTimestampMicrosFunc {
 | 2023-05-17T03:59:00.123456789                                                                                |
 +---------------------------------------------------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 "#,
     argument(
         name = "expression",
@@ -199,83 +270,60 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     ),
     argument(
         name = "format_n",
-        description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
+        description = r#"
+Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+only supported at the end of the string preceded by a space.
+"#
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct ToTimestampNanosFunc {
     signature: Signature,
+    timezone: Option<Arc<str>>,
 }
 
-impl Default for ToTimestampFunc {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ToTimestampFunc {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
-        }
-    }
-}
-
-impl Default for ToTimestampSecondsFunc {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ToTimestampSecondsFunc {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
-        }
-    }
-}
-
-impl Default for ToTimestampMillisFunc {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ToTimestampMillisFunc {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
+/// Macro to generate boilerplate constructors and config methods for ToTimestamp* functions.
+/// Generates: Default impl, deprecated new(), new_with_config(), and extracts timezone from ConfigOptions.
+macro_rules! impl_to_timestamp_constructors {
+    ($func:ty) => {
+        impl Default for $func {
+            fn default() -> Self {
+                Self::new_with_config(&ConfigOptions::default())
+            }
         }
-    }
-}
 
-impl Default for ToTimestampMicrosFunc {
-    fn default() -> Self {
-        Self::new()
-    }
-}
+        impl $func {
+            #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
+            /// Deprecated constructor retained for backwards compatibility.
+            ///
+            /// Prefer `new_with_config` which allows specifying the
+            /// timezone via [`ConfigOptions`]. This helper now mirrors the
+            /// canonical default offset (None) provided by `ConfigOptions::default()`.
+            pub fn new() -> Self {
+                Self::new_with_config(&ConfigOptions::default())
+            }
 
-impl ToTimestampMicrosFunc {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
+            pub fn new_with_config(config: &ConfigOptions) -> Self {
+                Self {
+                    signature: Signature::variadic_any(Volatility::Immutable),
+                    timezone: config
+                        .execution
+                        .time_zone
+                        .as_ref()
+                        .map(|tz| Arc::from(tz.as_str())),
+                }
+            }
         }
-    }
-}
-
-impl Default for ToTimestampNanosFunc {
-    fn default() -> Self {
-        Self::new()
-    }
+    };
 }
 
-impl ToTimestampNanosFunc {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
-        }
-    }
-}
+impl_to_timestamp_constructors!(ToTimestampFunc);
+impl_to_timestamp_constructors!(ToTimestampSecondsFunc);
+impl_to_timestamp_constructors!(ToTimestampMillisFunc);
+impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
+impl_to_timestamp_constructors!(ToTimestampNanosFunc);
 
 /// to_timestamp SQL function
 ///
@@ -283,6 +331,15 @@ impl ToTimestampNanosFunc {
 /// The supported range for integer input is between `-9223372037` and `9223372036`.
 /// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
 /// Please use `to_timestamp_seconds` for the input outside of supported bounds.
+/// Macro to generate the with_updated_config method for ToTimestamp* functions.
+macro_rules! impl_with_updated_config {
+    () => {
+        fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
+            Some(Self::new_with_config(config).into())
+        }
+    };
+}
+
 impl ScalarUDFImpl for ToTimestampFunc {
     fn as_any(&self) -> &dyn Any {
         self
@@ -296,15 +353,18 @@ impl ScalarUDFImpl for ToTimestampFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(return_type_for(&arg_types[0], Nanosecond))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Timestamp(Nanosecond, self.timezone.clone()))
     }
 
+    impl_with_updated_config!();
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        let args = args.args;
+        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
+
         if args.is_empty() {
             return exec_err!(
                 "to_timestamp function requires 1 or more arguments, got {}",
@@ -317,13 +377,13 @@ impl ScalarUDFImpl for ToTimestampFunc {
             validate_data_types(&args, "to_timestamp")?;
         }
 
+        let tz = self.timezone.clone();
+
         match args[0].data_type() {
             Int32 | Int64 => args[0]
                 .cast_to(&Timestamp(Second, None), None)?
-                .cast_to(&Timestamp(Nanosecond, None), None),
-            Null | Timestamp(_, None) => {
-                args[0].cast_to(&Timestamp(Nanosecond, None), None)
-            }
+                .cast_to(&Timestamp(Nanosecond, tz), None),
+            Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
             Float64 => {
                 let rescaled = arrow::compute::kernels::numeric::mul(
                     &args[0].to_array(1)?,
@@ -333,15 +393,12 @@ impl ScalarUDFImpl for ToTimestampFunc {
                 )?;
                 Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
                     &rescaled,
-                    &Timestamp(Nanosecond, None),
+                    &Timestamp(Nanosecond, tz),
                     &DEFAULT_CAST_OPTIONS,
                 )?))
             }
-            Timestamp(_, Some(tz)) => {
-                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
-            }
             Utf8View | LargeUtf8 | Utf8 => {
-                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
+                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp", &tz)
             }
             Decimal128(_, _) => {
                 match &args[0] {
@@ -354,14 +411,12 @@ impl ScalarUDFImpl for ToTimestampFunc {
                         let scale_factor = 10_i128.pow(*scale as u32);
                         let seconds = value / scale_factor;
                         let fraction = value % scale_factor;
-
                         let nanos = (fraction * 1_000_000_000) / scale_factor;
-
                         let timestamp_nanos = seconds * 1_000_000_000 + nanos;
 
                         Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
                             Some(timestamp_nanos as i64),
-                            None,
+                            tz,
                         )))
                     }
                     _ => exec_err!("Invalid decimal value"),
@@ -372,6 +427,7 @@ impl ScalarUDFImpl for ToTimestampFunc {
             }
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -390,15 +446,18 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(return_type_for(&arg_types[0], Second))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Timestamp(Second, self.timezone.clone()))
     }
 
+    impl_with_updated_config!();
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        let args = args.args;
+        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
+
         if args.is_empty() {
             return exec_err!(
                 "to_timestamp_seconds function requires 1 or more arguments, got {}",
@@ -411,14 +470,17 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
             validate_data_types(&args, "to_timestamp")?;
         }
 
+        let tz = self.timezone.clone();
+
         match args[0].data_type() {
-            Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
-                args[0].cast_to(&Timestamp(Second, None), None)
-            }
-            Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
-            Utf8View | LargeUtf8 | Utf8 => {
-                to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
+            Null | Int32 | Int64 | Timestamp(_, _) | Decimal128(_, _) => {
+                args[0].cast_to(&Timestamp(Second, tz), None)
             }
+            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampSecondType>(
+                &args,
+                "to_timestamp_seconds",
+                &self.timezone,
+            ),
             other => {
                 exec_err!(
                     "Unsupported data type {} for function to_timestamp_seconds",
@@ -427,6 +489,7 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
             }
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -445,15 +508,18 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(return_type_for(&arg_types[0], Millisecond))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Timestamp(Millisecond, self.timezone.clone()))
     }
 
+    impl_with_updated_config!();
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        let args = args.args;
+        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
+
         if args.is_empty() {
             return exec_err!(
                 "to_timestamp_millis function requires 1 or more arguments, got {}",
@@ -467,15 +533,13 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
         }
 
         match args[0].data_type() {
-            Null | Int32 | Int64 | Timestamp(_, None) => {
-                args[0].cast_to(&Timestamp(Millisecond, None), None)
-            }
-            Timestamp(_, Some(tz)) => {
-                args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
+            Null | Int32 | Int64 | Timestamp(_, _) => {
+                args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None)
             }
             Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
                 &args,
                 "to_timestamp_millis",
+                &self.timezone,
             ),
             other => {
                 exec_err!(
@@ -485,6 +549,7 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
             }
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -503,15 +568,18 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(return_type_for(&arg_types[0], Microsecond))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Timestamp(Microsecond, self.timezone.clone()))
     }
 
+    impl_with_updated_config!();
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        let args = args.args;
+        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
+
         if args.is_empty() {
             return exec_err!(
                 "to_timestamp_micros function requires 1 or more arguments, got {}",
@@ -525,15 +593,13 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
         }
 
         match args[0].data_type() {
-            Null | Int32 | Int64 | Timestamp(_, None) => {
-                args[0].cast_to(&Timestamp(Microsecond, None), None)
-            }
-            Timestamp(_, Some(tz)) => {
-                args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
+            Null | Int32 | Int64 | Timestamp(_, _) => {
+                args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None)
             }
             Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
                 &args,
                 "to_timestamp_micros",
+                &self.timezone,
             ),
             other => {
                 exec_err!(
@@ -543,6 +609,7 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
             }
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -561,15 +628,18 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(return_type_for(&arg_types[0], Nanosecond))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Timestamp(Nanosecond, self.timezone.clone()))
     }
 
+    impl_with_updated_config!();
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        let args = args.args;
+        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
+
         if args.is_empty() {
             return exec_err!(
                 "to_timestamp_nanos function requires 1 or more arguments, got {}",
@@ -583,15 +653,14 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
         }
 
         match args[0].data_type() {
-            Null | Int32 | Int64 | Timestamp(_, None) => {
-                args[0].cast_to(&Timestamp(Nanosecond, None), None)
-            }
-            Timestamp(_, Some(tz)) => {
-                args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
-            }
-            Utf8View | LargeUtf8 | Utf8 => {
-                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
+            Null | Int32 | Int64 | Timestamp(_, _) => {
+                args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None)
             }
+            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampNanosecondType>(
+                &args,
+                "to_timestamp_nanos",
+                &self.timezone,
+            ),
             other => {
                 exec_err!(
                     "Unsupported data type {} for function to_timestamp_nanos",
@@ -600,23 +669,16 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
             }
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
 }
 
-/// Returns the return type for the to_timestamp_* function, preserving
-/// the timezone if it exists.
-fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
-    match arg {
-        Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
-        _ => Timestamp(unit, None),
-    }
-}
-
 fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
     args: &[ColumnarValue],
     name: &str,
+    timezone: &Option<Arc<str>>,
 ) -> Result<ColumnarValue> {
     let factor = match T::UNIT {
         Second => 1_000_000_000,
@@ -625,17 +687,26 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
         Nanosecond => 1,
     };
 
+    let tz = match timezone.clone() {
+        Some(tz) => Some(tz.parse::<Tz>()?),
+        None => None,
+    };
+
     match args.len() {
-        1 => handle::<T, _, T>(
+        1 => handle::<T, _>(
             args,
-            |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
+            move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor),
             name,
+            &Timestamp(T::UNIT, timezone.clone()),
         ),
-        n if n >= 2 => handle_multiple::<T, _, T, _>(
+        n if n >= 2 => handle_multiple::<T, _, _>(
             args,
-            string_to_timestamp_nanos_formatted,
+            move |s, format| {
+                string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format)
+            },
             |n| n / factor,
             name,
+            &Timestamp(T::UNIT, timezone.clone()),
         ),
         _ => exec_err!("Unsupported 0 argument count for function {name}"),
     }
@@ -652,35 +723,110 @@ mod tests {
     };
     use arrow::array::{ArrayRef, Int64Array, StringBuilder};
     use arrow::datatypes::{Field, TimeUnit};
-    use chrono::Utc;
+    use chrono::{DateTime, FixedOffset, Utc};
     use datafusion_common::config::ConfigOptions;
-    use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
-    use datafusion_expr::ScalarFunctionImplementation;
+    use datafusion_common::{DataFusionError, ScalarValue, assert_contains};
+    use datafusion_expr::{ScalarFunctionArgs, ScalarFunctionImplementation};
 
     use super::*;
 
     fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
+        let timezone: Option<Arc<str>> = Some("UTC".into());
+        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp", &timezone)
     }
 
     /// to_timestamp_millis SQL function
     fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
+        let timezone: Option<Arc<str>> = Some("UTC".into());
+        to_timestamp_impl::<TimestampMillisecondType>(
+            args,
+            "to_timestamp_millis",
+            &timezone,
+        )
     }
 
     /// to_timestamp_micros SQL function
     fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
+        let timezone: Option<Arc<str>> = Some("UTC".into());
+        to_timestamp_impl::<TimestampMicrosecondType>(
+            args,
+            "to_timestamp_micros",
+            &timezone,
+        )
     }
 
     /// to_timestamp_nanos SQL function
     fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
+        let timezone: Option<Arc<str>> = Some("UTC".into());
+        to_timestamp_impl::<TimestampNanosecondType>(
+            args,
+            "to_timestamp_nanos",
+            &timezone,
+        )
     }
 
     /// to_timestamp_seconds SQL function
     fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
+        let timezone: Option<Arc<str>> = Some("UTC".into());
+        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds", &timezone)
+    }
+
+    fn udfs_and_timeunit() -> Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> {
+        let udfs: Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> = vec![
+            (
+                Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
+                Nanosecond,
+            ),
+            (
+                Box::new(ToTimestampSecondsFunc::new_with_config(
+                    &ConfigOptions::default(),
+                )),
+                Second,
+            ),
+            (
+                Box::new(ToTimestampMillisFunc::new_with_config(
+                    &ConfigOptions::default(),
+                )),
+                Millisecond,
+            ),
+            (
+                Box::new(ToTimestampMicrosFunc::new_with_config(
+                    &ConfigOptions::default(),
+                )),
+                Microsecond,
+            ),
+            (
+                Box::new(ToTimestampNanosFunc::new_with_config(
+                    &ConfigOptions::default(),
+                )),
+                Nanosecond,
+            ),
+        ];
+        udfs
+    }
+
+    fn validate_expected_error(
+        options: &mut ConfigOptions,
+        args: ScalarFunctionArgs,
+        expected_err: &str,
+    ) {
+        let udfs = udfs_and_timeunit();
+
+        for (udf, _) in udfs {
+            match udf
+                .with_updated_config(options)
+                .unwrap()
+                .invoke_with_args(args.clone())
+            {
+                Ok(_) => panic!("Expected error but got success"),
+                Err(e) => {
+                    assert!(
+                        e.to_string().contains(expected_err),
+                        "Can not find expected error '{expected_err}'. Actual error '{e}'"
+                    );
+                }
+            }
+        }
     }
 
     #[test]
@@ -751,6 +897,368 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn to_timestamp_respects_execution_timezone() -> Result<()> {
+        let udfs = udfs_and_timeunit();
+
+        let mut options = ConfigOptions::default();
+        options.execution.time_zone = Some("-05:00".to_string());
+
+        let time_zone: Option<Arc<str>> = options
+            .execution
+            .time_zone
+            .as_ref()
+            .map(|tz| Arc::from(tz.as_str()));
+
+        for (udf, time_unit) in udfs {
+            let field = Field::new("arg", Utf8, true).into();
+
+            let args = ScalarFunctionArgs {
+                args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                    "2020-09-08T13:42:29".to_string(),
+                )))],
+                arg_fields: vec![field],
+                number_rows: 1,
+                return_field: Field::new(
+                    "f",
+                    Timestamp(time_unit, Some("-05:00".into())),
+                    true,
+                )
+                .into(),
+                config_options: Arc::new(options.clone()),
+            };
+
+            let result = udf
+                .with_updated_config(&options.clone())
+                .unwrap()
+                .invoke_with_args(args)?;
+            let result = match time_unit {
+                Second => {
+                    let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
+                        Some(value),
+                        tz,
+                    )) = result
+                    else {
+                        panic!("expected scalar timestamp");
+                    };
+
+                    assert_eq!(tz, time_zone);
+
+                    value
+                }
+                Millisecond => {
+                    let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
+                        Some(value),
+                        tz,
+                    )) = result
+                    else {
+                        panic!("expected scalar timestamp");
+                    };
+
+                    assert_eq!(tz, time_zone);
+
+                    value
+                }
+                Microsecond => {
+                    let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                        Some(value),
+                        tz,
+                    )) = result
+                    else {
+                        panic!("expected scalar timestamp");
+                    };
+
+                    assert_eq!(tz, time_zone);
+
+                    value
+                }
+                Nanosecond => {
+                    let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                        Some(value),
+                        tz,
+                    )) = result
+                    else {
+                        panic!("expected scalar timestamp");
+                    };
+
+                    assert_eq!(tz, time_zone);
+
+                    value
+                }
+            };
+
+            let scale = match time_unit {
+                Second => 1_000_000_000,
+                Millisecond => 1_000_000,
+                Microsecond => 1_000,
+                Nanosecond => 1,
+            };
+
+            let offset = FixedOffset::west_opt(5 * 3600).unwrap();
+            let result = Some(
+                DateTime::<Utc>::from_timestamp_nanos(result * scale)
+                    .with_timezone(&offset)
+                    .to_string(),
+            );
+
+            assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string()));
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn to_timestamp_formats_respects_execution_timezone() -> Result<()> {
+        let udfs = udfs_and_timeunit();
+
+        let mut options = ConfigOptions::default();
+        options.execution.time_zone = Some("-05:00".to_string());
+
+        let time_zone: Option<Arc<str>> = options
+            .execution
+            .time_zone
+            .as_ref()
+            .map(|tz| Arc::from(tz.as_str()));
+
+        let expr_field = Field::new("arg", Utf8, true).into();
+        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
+
+        for (udf, time_unit) in udfs {
+            for (value, format, expected_str) in [
+                (
+                    "2020-09-08 09:42:29 -05:00",
+                    "%Y-%m-%d %H:%M:%S %z",
+                    Some("2020-09-08 09:42:29 -05:00"),
+                ),
+                (
+                    "2020-09-08T13:42:29Z",
+                    "%+",
+                    Some("2020-09-08 08:42:29 -05:00"),
+                ),
+                (
+                    "2020-09-08 13:42:29 UTC",
+                    "%Y-%m-%d %H:%M:%S %Z",
+                    Some("2020-09-08 08:42:29 -05:00"),
+                ),
+                (
+                    "+0000 2024-01-01 12:00:00",
+                    "%z %Y-%m-%d %H:%M:%S",
+                    Some("2024-01-01 07:00:00 -05:00"),
+                ),
+                (
+                    "20200908134229+0100",
+                    "%Y%m%d%H%M%S%z",
+                    Some("2020-09-08 07:42:29 -05:00"),
+                ),
+                (
+                    "2020-09-08+0230 13:42",
+                    "%Y-%m-%d%z %H:%M",
+                    Some("2020-09-08 06:12:00 -05:00"),
+                ),
+            ] {
+                let args = ScalarFunctionArgs {
+                    args: vec![
+                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
+                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                            format.to_string(),
+                        ))),
+                    ],
+                    arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
+                    number_rows: 1,
+                    return_field: Field::new(
+                        "f",
+                        Timestamp(time_unit, Some("-05:00".into())),
+                        true,
+                    )
+                    .into(),
+                    config_options: Arc::new(options.clone()),
+                };
+                let result = udf
+                    .with_updated_config(&options.clone())
+                    .unwrap()
+                    .invoke_with_args(args)?;
+                let result = match time_unit {
+                    Second => {
+                        let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
+                            Some(value),
+                            tz,
+                        )) = result
+                        else {
+                            panic!("expected scalar timestamp");
+                        };
+
+                        assert_eq!(tz, time_zone);
+
+                        value
+                    }
+                    Millisecond => {
+                        let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
+                            Some(value),
+                            tz,
+                        )) = result
+                        else {
+                            panic!("expected scalar timestamp");
+                        };
+
+                        assert_eq!(tz, time_zone);
+
+                        value
+                    }
+                    Microsecond => {
+                        let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                            Some(value),
+                            tz,
+                        )) = result
+                        else {
+                            panic!("expected scalar timestamp");
+                        };
+
+                        assert_eq!(tz, time_zone);
+
+                        value
+                    }
+                    Nanosecond => {
+                        let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                            Some(value),
+                            tz,
+                        )) = result
+                        else {
+                            panic!("expected scalar timestamp");
+                        };
+
+                        assert_eq!(tz, time_zone);
+
+                        value
+                    }
+                };
+
+                let scale = match time_unit {
+                    Second => 1_000_000_000,
+                    Millisecond => 1_000_000,
+                    Microsecond => 1_000,
+                    Nanosecond => 1,
+                };
+                let offset = FixedOffset::west_opt(5 * 3600).unwrap();
+                let result = Some(
+                    DateTime::<Utc>::from_timestamp_nanos(result * scale)
+                        .with_timezone(&offset)
+                        .to_string(),
+                );
+
+                assert_eq!(result, expected_str.map(|s| s.to_string()));
+            }
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> {
+        let field: Arc<Field> = Field::new("arg", Utf8, true).into();
+        let return_field: Arc<Field> =
+            Field::new("f", Timestamp(Nanosecond, None), true).into();
+
+        let mut options = ConfigOptions::default();
+        options.execution.time_zone = Some("Invalid/Timezone".to_string());
+
+        let args = ScalarFunctionArgs {
+            args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                "2020-09-08T13:42:29Z".to_string(),
+            )))],
+            arg_fields: vec![Arc::clone(&field)],
+            number_rows: 1,
+            return_field: Arc::clone(&return_field),
+            config_options: Arc::new(options.clone()),
+        };
+
+        let expected_err =
+            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
+
+        validate_expected_error(&mut options, args, expected_err);
+
+        Ok(())
+    }
+
+    #[test]
+    fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> {
+        let expr_field: Arc<Field> = Field::new("arg", Utf8, true).into();
+        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
+        let return_field: Arc<Field> =
+            Field::new("f", Timestamp(Nanosecond, None), true).into();
+
+        let mut options = ConfigOptions::default();
+        options.execution.time_zone = Some("Invalid/Timezone".to_string());
+
+        let expected_err =
+            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
+
+        let make_args = |value: &str, format: &str| ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))),
+            ],
+            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
+            number_rows: 1,
+            return_field: Arc::clone(&return_field),
+            config_options: Arc::new(options.clone()),
+        };
+
+        for (value, format, _expected_str) in [
+            (
+                "2020-09-08 09:42:29 -05:00",
+                "%Y-%m-%d %H:%M:%S %z",
+                Some("2020-09-08 09:42:29 -05:00"),
+            ),
+            (
+                "2020-09-08T13:42:29Z",
+                "%+",
+                Some("2020-09-08 08:42:29 -05:00"),
+            ),
+            (
+                "2020-09-08 13:42:29 +0000",
+                "%Y-%m-%d %H:%M:%S %z",
+                Some("2020-09-08 08:42:29 -05:00"),
+            ),
+            (
+                "+0000 2024-01-01 12:00:00",
+                "%z %Y-%m-%d %H:%M:%S",
+                Some("2024-01-01 07:00:00 -05:00"),
+            ),
+            (
+                "20200908134229+0100",
+                "%Y%m%d%H%M%S%z",
+                Some("2020-09-08 07:42:29 -05:00"),
+            ),
+            (
+                "2020-09-08+0230 13:42",
+                "%Y-%m-%d%z %H:%M",
+                Some("2020-09-08 06:12:00 -05:00"),
+            ),
+        ] {
+            let args = make_args(value, format);
+            validate_expected_error(&mut options.clone(), args, expected_err);
+        }
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                    "2020-09-08T13:42:29".to_string(),
+                ))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                    "%Y-%m-%dT%H:%M:%S".to_string(),
+                ))),
+            ],
+            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
+            number_rows: 1,
+            return_field: Arc::clone(&return_field),
+            config_options: Arc::new(options.clone()),
+        };
+
+        validate_expected_error(&mut options.clone(), args, expected_err);
+
+        Ok(())
+    }
+
     #[test]
     fn to_timestamp_invalid_input_type() -> Result<()> {
         // pass the wrong type of input array to to_timestamp and test
@@ -811,8 +1319,7 @@ mod tests {
         let string_array =
             ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
 
-        let expected_err =
-            "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
+        let expected_err = "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
         match to_timestamp(&[string_array]) {
             Ok(_) => panic!("Expected error but got success"),
             Err(e) => {
@@ -836,8 +1343,7 @@ mod tests {
         let string_array =
             ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
 
-        let expected_err =
-            "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
+        let expected_err = "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
         match to_timestamp(&[string_array]) {
             Ok(_) => panic!("Expected error but got success"),
             Err(e) => {
@@ -874,8 +1380,7 @@ mod tests {
             ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
         ];
 
-        let expected_err =
-            "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
+        let expected_err = "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
         match to_timestamp(&string_array) {
             Ok(_) => panic!("Expected error but got success"),
             Err(e) => {
@@ -923,7 +1428,11 @@ mod tests {
     }
 
     fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
-        let result = string_to_timestamp_nanos_formatted(s, format);
+        let result = string_to_timestamp_nanos_formatted_with_timezone(
+            &Some("UTC".parse()?),
+            s,
+            format,
+        );
         if let Err(e) = &result {
             eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
         }
@@ -950,7 +1459,9 @@ mod tests {
         ];
 
         for (s, f, ctx) in cases {
-            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
+            let expected = format!(
+                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
+            );
             let actual = string_to_datetime_formatted(&Utc, s, f)
                 .unwrap_err()
                 .strip_backtrace();
@@ -978,7 +1489,9 @@ mod tests {
         ];
 
         for (s, f, ctx) in cases {
-            let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
+            let expected = format!(
+                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
+            );
             let actual = string_to_datetime_formatted(&Utc, s, f)
                 .unwrap_err()
                 .strip_backtrace();
@@ -987,13 +1500,21 @@ mod tests {
     }
 
     #[test]
-    fn test_tz() {
+    fn test_no_tz() {
         let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
-            Box::new(ToTimestampFunc::new()),
-            Box::new(ToTimestampSecondsFunc::new()),
-            Box::new(ToTimestampMillisFunc::new()),
-            Box::new(ToTimestampNanosFunc::new()),
-            Box::new(ToTimestampSecondsFunc::new()),
+            Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
+            Box::new(ToTimestampSecondsFunc::new_with_config(
+                &ConfigOptions::default(),
+            )),
+            Box::new(ToTimestampMillisFunc::new_with_config(
+                &ConfigOptions::default(),
+            )),
+            Box::new(ToTimestampNanosFunc::new_with_config(
+                &ConfigOptions::default(),
+            )),
+            Box::new(ToTimestampSecondsFunc::new_with_config(
+                &ConfigOptions::default(),
+            )),
         ];
 
         let mut nanos_builder = TimestampNanosecondArray::builder(2);
@@ -1026,8 +1547,8 @@ mod tests {
             for array in arrays {
                 let rt = udf.return_type(&[array.data_type()]).unwrap();
                 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
-                assert!(matches!(rt, Timestamp(_, Some(_))));
-                let args = datafusion_expr::ScalarFunctionArgs {
+                assert!(matches!(rt, Timestamp(_, None)));
+                let args = ScalarFunctionArgs {
                     args: vec![array.clone()],
                     arg_fields: vec![arg_field],
                     number_rows: 4,
@@ -1042,7 +1563,7 @@ mod tests {
                     _ => panic!("Expected a columnar array"),
                 };
                 let ty = array.data_type();
-                assert!(matches!(ty, Timestamp(_, Some(_))));
+                assert!(matches!(ty, Timestamp(_, None)));
             }
         }
 
@@ -1077,7 +1598,7 @@ mod tests {
                 let rt = udf.return_type(&[array.data_type()]).unwrap();
                 assert!(matches!(rt, Timestamp(_, None)));
                 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
-                let args = datafusion_expr::ScalarFunctionArgs {
+                let args = ScalarFunctionArgs {
                     args: vec![array.clone()],
                     arg_fields: vec![arg_field],
                     number_rows: 5,
diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs
index 42651cd537162..5ebcce0a7cfc2 100644
--- a/datafusion/functions/src/datetime/to_unixtime.rs
+++ b/datafusion/functions/src/datetime/to_unixtime.rs
@@ -18,7 +18,7 @@
 use super::to_timestamp::ToTimestampSecondsFunc;
 use crate::datetime::common::*;
 use arrow::datatypes::{DataType, TimeUnit};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -27,7 +27,7 @@ use std::any::Any;
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
-    description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.",
+    description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00`). Supports strings, dates, timestamps, integer, unsigned integer, and float types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers, unsigned integers, and floats are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00`).",
     syntax_example = "to_unixtime(expression[, ..., format_n])",
     sql_example = r#"
 ```sql
@@ -101,22 +101,44 @@ impl ScalarUDFImpl for ToUnixtimeFunc {
 
         // validate that any args after the first one are Utf8
         if arg_args.len() > 1 {
-            validate_data_types(arg_args, "to_unixtime")?;
+            // Format arguments only make sense for string inputs
+            match arg_args[0].data_type() {
+                DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
+                    validate_data_types(arg_args, "to_unixtime")?;
+                }
+                _ => {
+                    return exec_err!(
+                        "to_unixtime function only accepts format arguments with string input, got {} arguments",
+                        arg_args.len()
+                    );
+                }
+            }
         }
 
         match arg_args[0].data_type() {
-            DataType::Int32 | DataType::Int64 | DataType::Null | DataType::Float64 => {
-                arg_args[0].cast_to(&DataType::Int64, None)
-            }
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64
+            | DataType::Float16
+            | DataType::Float32
+            | DataType::Float64
+            | DataType::Null => arg_args[0].cast_to(&DataType::Int64, None),
             DataType::Date64 | DataType::Date32 => arg_args[0]
                 .cast_to(&DataType::Timestamp(TimeUnit::Second, None), None)?
                 .cast_to(&DataType::Int64, None),
             DataType::Timestamp(_, tz) => arg_args[0]
                 .cast_to(&DataType::Timestamp(TimeUnit::Second, tz), None)?
                 .cast_to(&DataType::Int64, None),
-            DataType::Utf8 => ToTimestampSecondsFunc::new()
-                .invoke_with_args(args)?
-                .cast_to(&DataType::Int64, None),
+            DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
+                ToTimestampSecondsFunc::new_with_config(args.config_options.as_ref())
+                    .invoke_with_args(args)?
+                    .cast_to(&DataType::Int64, None)
+            }
             other => {
                 exec_err!("Unsupported data type {} for function to_unixtime", other)
             }
diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index e5314ad220c8f..7b72c264e5557 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -19,29 +19,30 @@
 
 use arrow::{
     array::{
-        Array, ArrayRef, BinaryArray, GenericByteArray, OffsetSizeTrait, StringArray,
+        Array, ArrayRef, AsArray, BinaryArrayType, FixedSizeBinaryArray,
+        GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
     },
-    datatypes::{ByteArrayType, DataType},
+    datatypes::DataType,
 };
 use arrow_buffer::{Buffer, OffsetBufferBuilder};
 use base64::{
-    engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig},
     Engine as _,
+    engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig},
 };
 use datafusion_common::{
-    cast::{as_generic_binary_array, as_generic_string_array},
+    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, internal_err,
     not_impl_err, plan_err,
+    types::{NativeType, logical_string},
     utils::take_function_args,
 };
-use datafusion_common::{exec_err, internal_datafusion_err, ScalarValue};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::{ColumnarValue, Documentation};
-use std::sync::Arc;
-use std::{fmt, str::FromStr};
-
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignatureClass, Volatility,
+};
 use datafusion_macros::user_doc;
 use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
 
 // Allow padding characters, but don't require them, and don't generate them.
 const BASE64_ENGINE: GeneralPurpose = GeneralPurpose::new(
@@ -79,7 +80,17 @@ impl Default for EncodeFunc {
 impl EncodeFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Binary,
+                        vec![TypeSignatureClass::Native(logical_string())],
+                        NativeType::Binary,
+                    ),
+                    Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                ],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -88,6 +99,7 @@ impl ScalarUDFImpl for EncodeFunc {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn name(&self) -> &str {
         "encode"
     }
@@ -97,48 +109,21 @@ impl ScalarUDFImpl for EncodeFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-
-        Ok(match arg_types[0] {
-            Utf8 => Utf8,
-            LargeUtf8 => LargeUtf8,
-            Utf8View => Utf8,
-            Binary => Utf8,
-            LargeBinary => LargeUtf8,
-            Null => Null,
-            _ => {
-                return plan_err!(
-                    "The encode function can only accept Utf8 or Binary or Null."
-                );
-            }
-        })
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: datafusion_expr::ScalarFunctionArgs,
-    ) -> Result<ColumnarValue> {
-        encode(&args.args)
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [expression, format] = take_function_args(self.name(), arg_types)?;
-
-        if format != &DataType::Utf8 {
-            return Err(DataFusionError::Plan("2nd argument should be Utf8".into()));
+        match &arg_types[0] {
+            DataType::LargeBinary => Ok(DataType::LargeUtf8),
+            _ => Ok(DataType::Utf8),
         }
+    }
 
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let [expression, encoding] = take_function_args("encode", &args.args)?;
+        let encoding = Encoding::try_from(encoding)?;
         match expression {
-            DataType::Utf8 | DataType::Utf8View | DataType::Null => {
-                Ok(vec![DataType::Utf8; 2])
+            _ if expression.data_type().is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
             }
-            DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]),
-            DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
-            DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]),
-            _ => plan_err!(
-                "1st argument should be Utf8 or Binary or Null, got {:?}",
-                arg_types[0]
-            ),
+            ColumnarValue::Array(array) => encode_array(array, encoding),
+            ColumnarValue::Scalar(scalar) => encode_scalar(scalar, encoding),
         }
     }
 
@@ -172,7 +157,17 @@ impl Default for DecodeFunc {
 impl DecodeFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Binary,
+                        vec![TypeSignatureClass::Native(logical_string())],
+                        NativeType::Binary,
+                    ),
+                    Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                ],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -181,6 +176,7 @@ impl ScalarUDFImpl for DecodeFunc {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn name(&self) -> &str {
         "decode"
     }
@@ -190,40 +186,21 @@ impl ScalarUDFImpl for DecodeFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].to_owned())
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: datafusion_expr::ScalarFunctionArgs,
-    ) -> Result<ColumnarValue> {
-        decode(&args.args)
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 2 {
-            return plan_err!(
-                "{} expects to get 2 arguments, but got {}",
-                self.name(),
-                arg_types.len()
-            );
-        }
-
-        if arg_types[1] != DataType::Utf8 {
-            return plan_err!("2nd argument should be Utf8");
+        match &arg_types[0] {
+            DataType::LargeBinary => Ok(DataType::LargeBinary),
+            _ => Ok(DataType::Binary),
         }
+    }
 
-        match arg_types[0] {
-            DataType::Utf8 | DataType::Utf8View | DataType::Binary | DataType::Null => {
-                Ok(vec![DataType::Binary, DataType::Utf8])
-            }
-            DataType::LargeUtf8 | DataType::LargeBinary => {
-                Ok(vec![DataType::LargeBinary, DataType::Utf8])
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let [expression, encoding] = take_function_args("decode", &args.args)?;
+        let encoding = Encoding::try_from(encoding)?;
+        match expression {
+            _ if expression.data_type().is_null() => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Binary(None)))
             }
-            _ => plan_err!(
-                "1st argument should be Utf8 or Binary or Null, got {:?}",
-                arg_types[0]
-            ),
+            ColumnarValue::Array(array) => decode_array(array, encoding),
+            ColumnarValue::Scalar(scalar) => decode_scalar(scalar, encoding),
         }
     }
 
@@ -232,324 +209,385 @@ impl ScalarUDFImpl for DecodeFunc {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
-enum Encoding {
-    Base64,
-    Hex,
-}
-
-fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarValue> {
+fn encode_scalar(value: &ScalarValue, encoding: Encoding) -> Result<ColumnarValue> {
     match value {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => encoding.encode_utf8_array::<i32>(a.as_ref()),
-            DataType::LargeUtf8 => encoding.encode_utf8_array::<i64>(a.as_ref()),
-            DataType::Utf8View => encoding.encode_utf8_array::<i32>(a.as_ref()),
-            DataType::Binary => encoding.encode_binary_array::<i32>(a.as_ref()),
-            DataType::LargeBinary => encoding.encode_binary_array::<i64>(a.as_ref()),
-            other => exec_err!(
-                "Unsupported data type {other:?} for function encode({encoding})"
-            ),
-        },
-        ColumnarValue::Scalar(scalar) => {
-            match scalar {
-                ScalarValue::Utf8(a) => {
-                    Ok(encoding.encode_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
-                }
-                ScalarValue::LargeUtf8(a) => Ok(encoding
-                    .encode_large_scalar(a.as_ref().map(|s: &String| s.as_bytes()))),
-                ScalarValue::Utf8View(a) => {
-                    Ok(encoding.encode_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
-                }
-                ScalarValue::Binary(a) => Ok(
-                    encoding.encode_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))
-                ),
-                ScalarValue::LargeBinary(a) => Ok(encoding
-                    .encode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
-                other => exec_err!(
-                    "Unsupported data type {other:?} for function encode({encoding})"
-                ),
-            }
+        ScalarValue::Binary(maybe_bytes)
+        | ScalarValue::BinaryView(maybe_bytes)
+        | ScalarValue::FixedSizeBinary(_, maybe_bytes) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
+                maybe_bytes
+                    .as_ref()
+                    .map(|bytes| encoding.encode_bytes(bytes)),
+            )))
+        }
+        ScalarValue::LargeBinary(maybe_bytes) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(
+                maybe_bytes
+                    .as_ref()
+                    .map(|bytes| encoding.encode_bytes(bytes)),
+            )))
         }
+        v => internal_err!("Unexpected value for encode: {v}"),
     }
 }
 
-fn decode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarValue> {
-    match value {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => encoding.decode_utf8_array::<i32>(a.as_ref()),
-            DataType::LargeUtf8 => encoding.decode_utf8_array::<i64>(a.as_ref()),
-            DataType::Utf8View => encoding.decode_utf8_array::<i32>(a.as_ref()),
-            DataType::Binary => encoding.decode_binary_array::<i32>(a.as_ref()),
-            DataType::LargeBinary => encoding.decode_binary_array::<i64>(a.as_ref()),
-            other => exec_err!(
-                "Unsupported data type {other:?} for function decode({encoding})"
-            ),
-        },
-        ColumnarValue::Scalar(scalar) => {
-            match scalar {
-                ScalarValue::Utf8(a) => {
-                    encoding.decode_scalar(a.as_ref().map(|s: &String| s.as_bytes()))
-                }
-                ScalarValue::LargeUtf8(a) => encoding
-                    .decode_large_scalar(a.as_ref().map(|s: &String| s.as_bytes())),
-                ScalarValue::Utf8View(a) => {
-                    encoding.decode_scalar(a.as_ref().map(|s: &String| s.as_bytes()))
-                }
-                ScalarValue::Binary(a) => {
-                    encoding.decode_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))
-                }
-                ScalarValue::LargeBinary(a) => encoding
-                    .decode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice())),
-                other => exec_err!(
-                    "Unsupported data type {other:?} for function decode({encoding})"
-                ),
-            }
+fn encode_array(array: &ArrayRef, encoding: Encoding) -> Result<ColumnarValue> {
+    let array = match array.data_type() {
+        DataType::Binary => encoding.encode_array::<_, i32>(&array.as_binary::<i32>()),
+        DataType::BinaryView => encoding.encode_array::<_, i32>(&array.as_binary_view()),
+        DataType::LargeBinary => {
+            encoding.encode_array::<_, i64>(&array.as_binary::<i64>())
         }
-    }
+        DataType::FixedSizeBinary(_) => {
+            encoding.encode_fsb_array(array.as_fixed_size_binary())
+        }
+        dt => {
+            internal_err!("Unexpected data type for encode: {dt}")
+        }
+    };
+    array.map(ColumnarValue::Array)
 }
 
-fn hex_encode(input: &[u8]) -> String {
-    hex::encode(input)
+fn decode_scalar(value: &ScalarValue, encoding: Encoding) -> Result<ColumnarValue> {
+    match value {
+        ScalarValue::Binary(maybe_bytes)
+        | ScalarValue::BinaryView(maybe_bytes)
+        | ScalarValue::FixedSizeBinary(_, maybe_bytes) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::Binary(
+                maybe_bytes
+                    .as_ref()
+                    .map(|x| encoding.decode_bytes(x))
+                    .transpose()?,
+            )))
+        }
+        ScalarValue::LargeBinary(maybe_bytes) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::LargeBinary(
+                maybe_bytes
+                    .as_ref()
+                    .map(|x| encoding.decode_bytes(x))
+                    .transpose()?,
+            )))
+        }
+        v => internal_err!("Unexpected value for decode: {v}"),
+    }
 }
 
-fn base64_encode(input: &[u8]) -> String {
-    BASE64_ENGINE.encode(input)
+/// Estimate how many bytes are actually represented by the array; in case the
+/// the array slices it's internal buffer, this returns the byte size of that slice
+/// but not the byte size of the entire buffer.
+///
+/// This is an estimation only as it can estimate higher if null slots are non-zero
+/// sized.
+fn estimate_byte_data_size<O: OffsetSizeTrait>(array: &GenericBinaryArray<O>) -> usize {
+    let offsets = array.value_offsets();
+    // Unwraps are safe as should always have 1 element in offset buffer
+    let start = *offsets.first().unwrap();
+    let end = *offsets.last().unwrap();
+    let data_size = end - start;
+    data_size.as_usize()
 }
 
-fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
-    // only write input / 2 bytes to buf
-    let out_len = input.len() / 2;
-    let buf = &mut buf[..out_len];
-    hex::decode_to_slice(input, buf)
-        .map_err(|e| internal_datafusion_err!("Failed to decode from hex: {e}"))?;
-    Ok(out_len)
+fn decode_array(array: &ArrayRef, encoding: Encoding) -> Result<ColumnarValue> {
+    let array = match array.data_type() {
+        DataType::Binary => {
+            let array = array.as_binary::<i32>();
+            encoding.decode_array::<_, i32>(&array, estimate_byte_data_size(array))
+        }
+        DataType::BinaryView => {
+            let array = array.as_binary_view();
+            // Don't know if there is a more strict upper bound we can infer
+            // for view arrays byte data size.
+            encoding.decode_array::<_, i32>(&array, array.get_buffer_memory_size())
+        }
+        DataType::LargeBinary => {
+            let array = array.as_binary::<i64>();
+            encoding.decode_array::<_, i64>(&array, estimate_byte_data_size(array))
+        }
+        DataType::FixedSizeBinary(size) => {
+            let array = array.as_fixed_size_binary();
+            // TODO: could we be more conservative by accounting for nulls?
+            let estimate = array.len().saturating_mul(*size as usize);
+            encoding.decode_fsb_array(array, estimate)
+        }
+        dt => {
+            internal_err!("Unexpected data type for decode: {dt}")
+        }
+    };
+    array.map(ColumnarValue::Array)
 }
 
-fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
-    BASE64_ENGINE
-        .decode_slice(input, buf)
-        .map_err(|e| internal_datafusion_err!("Failed to decode from base64: {e}"))
+#[derive(Debug, Copy, Clone)]
+enum Encoding {
+    Base64,
+    Hex,
 }
 
-macro_rules! encode_to_array {
-    ($METHOD: ident, $INPUT:expr) => {{
-        let utf8_array: StringArray = $INPUT
-            .iter()
-            .map(|x| x.map(|x| $METHOD(x.as_ref())))
-            .collect();
-        Arc::new(utf8_array)
-    }};
+impl fmt::Display for Encoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", format!("{self:?}").to_lowercase())
+    }
 }
 
-fn decode_to_array<F, T: ByteArrayType>(
-    method: F,
-    input: &GenericByteArray<T>,
-    conservative_upper_bound_size: usize,
-) -> Result<ArrayRef>
-where
-    F: Fn(&[u8], &mut [u8]) -> Result<usize>,
-{
-    let mut values = vec![0; conservative_upper_bound_size];
-    let mut offsets = OffsetBufferBuilder::new(input.len());
-    let mut total_bytes_decoded = 0;
-    for v in input {
-        if let Some(v) = v {
-            let cursor = &mut values[total_bytes_decoded..];
-            let decoded = method(v.as_ref(), cursor)?;
-            total_bytes_decoded += decoded;
-            offsets.push_length(decoded);
-        } else {
-            offsets.push_length(0);
+impl TryFrom<&ColumnarValue> for Encoding {
+    type Error = DataFusionError;
+
+    fn try_from(encoding: &ColumnarValue) -> Result<Self> {
+        let encoding = match encoding {
+            ColumnarValue::Scalar(encoding) => match encoding.try_as_str().flatten() {
+                Some(encoding) => encoding,
+                _ => return exec_err!("Encoding must be a non-null string"),
+            },
+            ColumnarValue::Array(_) => {
+                return not_impl_err!(
+                    "Encoding must be a scalar; array specified encoding is not yet supported"
+                );
+            }
+        };
+        match encoding {
+            "base64" => Ok(Self::Base64),
+            "hex" => Ok(Self::Hex),
+            _ => {
+                let options = [Self::Base64, Self::Hex]
+                    .iter()
+                    .map(|i| i.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ");
+                plan_err!(
+                    "There is no built-in encoding named '{encoding}', currently supported encodings are: {options}"
+                )
+            }
         }
     }
-    // We reserved an upper bound size for the values buffer, but we only use the actual size
-    values.truncate(total_bytes_decoded);
-    let binary_array = BinaryArray::try_new(
-        offsets.finish(),
-        Buffer::from_vec(values),
-        input.nulls().cloned(),
-    )?;
-    Ok(Arc::new(binary_array))
 }
 
 impl Encoding {
-    fn encode_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
-        ColumnarValue::Scalar(match self {
-            Self::Base64 => ScalarValue::Utf8(value.map(|v| BASE64_ENGINE.encode(v))),
-            Self::Hex => ScalarValue::Utf8(value.map(hex::encode)),
-        })
+    fn encode_bytes(self, value: &[u8]) -> String {
+        match self {
+            Self::Base64 => BASE64_ENGINE.encode(value),
+            Self::Hex => hex::encode(value),
+        }
     }
 
-    fn encode_large_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
-        ColumnarValue::Scalar(match self {
-            Self::Base64 => {
-                ScalarValue::LargeUtf8(value.map(|v| BASE64_ENGINE.encode(v)))
-            }
-            Self::Hex => ScalarValue::LargeUtf8(value.map(hex::encode)),
-        })
+    fn decode_bytes(self, value: &[u8]) -> Result<Vec<u8>> {
+        match self {
+            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
+                exec_datafusion_err!("Failed to decode value using base64: {e}")
+            }),
+            Self::Hex => hex::decode(value).map_err(|e| {
+                exec_datafusion_err!("Failed to decode value using hex: {e}")
+            }),
+        }
     }
 
-    fn encode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+    // OutputOffset important to ensure Large types output Large arrays
+    fn encode_array<'a, InputBinaryArray, OutputOffset>(
+        self,
+        array: &InputBinaryArray,
+    ) -> Result<ArrayRef>
     where
-        T: OffsetSizeTrait,
+        InputBinaryArray: BinaryArrayType<'a>,
+        OutputOffset: OffsetSizeTrait,
     {
-        let input_value = as_generic_binary_array::<T>(value)?;
-        let array: ArrayRef = match self {
-            Self::Base64 => encode_to_array!(base64_encode, input_value),
-            Self::Hex => encode_to_array!(hex_encode, input_value),
-        };
-        Ok(ColumnarValue::Array(array))
+        match self {
+            Self::Base64 => {
+                let array: GenericStringArray<OutputOffset> = array
+                    .iter()
+                    .map(|x| x.map(|x| BASE64_ENGINE.encode(x)))
+                    .collect();
+                Ok(Arc::new(array))
+            }
+            Self::Hex => {
+                let array: GenericStringArray<OutputOffset> =
+                    array.iter().map(|x| x.map(hex::encode)).collect();
+                Ok(Arc::new(array))
+            }
+        }
     }
 
-    fn encode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
-    {
-        let input_value = as_generic_string_array::<T>(value)?;
-        let array: ArrayRef = match self {
-            Self::Base64 => encode_to_array!(base64_encode, input_value),
-            Self::Hex => encode_to_array!(hex_encode, input_value),
-        };
-        Ok(ColumnarValue::Array(array))
+    // TODO: refactor this away once https://github.com/apache/arrow-rs/pull/8993 lands
+    fn encode_fsb_array(self, array: &FixedSizeBinaryArray) -> Result<ArrayRef> {
+        match self {
+            Self::Base64 => {
+                let array: GenericStringArray<i32> = array
+                    .iter()
+                    .map(|x| x.map(|x| BASE64_ENGINE.encode(x)))
+                    .collect();
+                Ok(Arc::new(array))
+            }
+            Self::Hex => {
+                let array: GenericStringArray<i32> =
+                    array.iter().map(|x| x.map(hex::encode)).collect();
+                Ok(Arc::new(array))
+            }
+        }
     }
 
-    fn decode_scalar(self, value: Option<&[u8]>) -> Result<ColumnarValue> {
-        let value = match value {
-            Some(value) => value,
-            None => return Ok(ColumnarValue::Scalar(ScalarValue::Binary(None))),
-        };
+    // OutputOffset important to ensure Large types output Large arrays
+    fn decode_array<'a, InputBinaryArray, OutputOffset>(
+        self,
+        value: &InputBinaryArray,
+        approx_data_size: usize,
+    ) -> Result<ArrayRef>
+    where
+        InputBinaryArray: BinaryArrayType<'a>,
+        OutputOffset: OffsetSizeTrait,
+    {
+        fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+            // only write input / 2 bytes to buf
+            let out_len = input.len() / 2;
+            let buf = &mut buf[..out_len];
+            hex::decode_to_slice(input, buf)
+                .map_err(|e| exec_datafusion_err!("Failed to decode from hex: {e}"))?;
+            Ok(out_len)
+        }
 
-        let out = match self {
-            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
-                internal_datafusion_err!("Failed to decode value using base64: {e}")
-            })?,
-            Self::Hex => hex::decode(value).map_err(|e| {
-                internal_datafusion_err!("Failed to decode value using hex: {e}")
-            })?,
-        };
+        fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+            BASE64_ENGINE
+                .decode_slice(input, buf)
+                .map_err(|e| exec_datafusion_err!("Failed to decode from base64: {e}"))
+        }
 
-        Ok(ColumnarValue::Scalar(ScalarValue::Binary(Some(out))))
+        match self {
+            Self::Base64 => {
+                let upper_bound = base64::decoded_len_estimate(approx_data_size);
+                delegated_decode::<_, _, OutputOffset>(base64_decode, value, upper_bound)
+            }
+            Self::Hex => {
+                // Calculate the upper bound for decoded byte size
+                // For hex encoding, each pair of hex characters (2 bytes) represents 1 byte when decoded
+                // So the upper bound is half the length of the input values.
+                let upper_bound = approx_data_size / 2;
+                delegated_decode::<_, _, OutputOffset>(hex_decode, value, upper_bound)
+            }
+        }
     }
 
-    fn decode_large_scalar(self, value: Option<&[u8]>) -> Result<ColumnarValue> {
-        let value = match value {
-            Some(value) => value,
-            None => return Ok(ColumnarValue::Scalar(ScalarValue::LargeBinary(None))),
-        };
-
-        let out = match self {
-            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
-                internal_datafusion_err!("Failed to decode value using base64: {e}")
-            })?,
-            Self::Hex => hex::decode(value).map_err(|e| {
-                internal_datafusion_err!("Failed to decode value using hex: {e}")
-            })?,
-        };
-
-        Ok(ColumnarValue::Scalar(ScalarValue::LargeBinary(Some(out))))
-    }
+    // TODO: refactor this away once https://github.com/apache/arrow-rs/pull/8993 lands
+    fn decode_fsb_array(
+        self,
+        value: &FixedSizeBinaryArray,
+        approx_data_size: usize,
+    ) -> Result<ArrayRef> {
+        fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+            // only write input / 2 bytes to buf
+            let out_len = input.len() / 2;
+            let buf = &mut buf[..out_len];
+            hex::decode_to_slice(input, buf)
+                .map_err(|e| exec_datafusion_err!("Failed to decode from hex: {e}"))?;
+            Ok(out_len)
+        }
 
-    fn decode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
-    {
-        let input_value = as_generic_binary_array::<T>(value)?;
-        let array = self.decode_byte_array(input_value)?;
-        Ok(ColumnarValue::Array(array))
-    }
+        fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+            BASE64_ENGINE
+                .decode_slice(input, buf)
+                .map_err(|e| exec_datafusion_err!("Failed to decode from base64: {e}"))
+        }
 
-    fn decode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
-    {
-        let input_value = as_generic_string_array::<T>(value)?;
-        let array = self.decode_byte_array(input_value)?;
-        Ok(ColumnarValue::Array(array))
-    }
+        fn delegated_decode<DecodeFunction>(
+            decode: DecodeFunction,
+            input: &FixedSizeBinaryArray,
+            conservative_upper_bound_size: usize,
+        ) -> Result<ArrayRef>
+        where
+            DecodeFunction: Fn(&[u8], &mut [u8]) -> Result<usize>,
+        {
+            let mut values = vec![0; conservative_upper_bound_size];
+            let mut offsets = OffsetBufferBuilder::new(input.len());
+            let mut total_bytes_decoded = 0;
+            for v in input.iter() {
+                if let Some(v) = v {
+                    let cursor = &mut values[total_bytes_decoded..];
+                    let decoded = decode(v, cursor)?;
+                    total_bytes_decoded += decoded;
+                    offsets.push_length(decoded);
+                } else {
+                    offsets.push_length(0);
+                }
+            }
+            // We reserved an upper bound size for the values buffer, but we only use the actual size
+            values.truncate(total_bytes_decoded);
+            let binary_array = GenericBinaryArray::<i32>::try_new(
+                offsets.finish(),
+                Buffer::from_vec(values),
+                input.nulls().cloned(),
+            )?;
+            Ok(Arc::new(binary_array))
+        }
 
-    fn decode_byte_array<T: ByteArrayType>(
-        &self,
-        input_value: &GenericByteArray<T>,
-    ) -> Result<ArrayRef> {
         match self {
             Self::Base64 => {
-                let upper_bound =
-                    base64::decoded_len_estimate(input_value.values().len());
-                decode_to_array(base64_decode, input_value, upper_bound)
+                let upper_bound = base64::decoded_len_estimate(approx_data_size);
+                delegated_decode(base64_decode, value, upper_bound)
             }
             Self::Hex => {
                 // Calculate the upper bound for decoded byte size
                 // For hex encoding, each pair of hex characters (2 bytes) represents 1 byte when decoded
                 // So the upper bound is half the length of the input values.
-                let upper_bound = input_value.values().len() / 2;
-                decode_to_array(hex_decode, input_value, upper_bound)
+                let upper_bound = approx_data_size / 2;
+                delegated_decode(hex_decode, value, upper_bound)
             }
         }
     }
 }
 
-impl fmt::Display for Encoding {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", format!("{self:?}").to_lowercase())
+fn delegated_decode<'a, DecodeFunction, InputBinaryArray, OutputOffset>(
+    decode: DecodeFunction,
+    input: &InputBinaryArray,
+    conservative_upper_bound_size: usize,
+) -> Result<ArrayRef>
+where
+    DecodeFunction: Fn(&[u8], &mut [u8]) -> Result<usize>,
+    InputBinaryArray: BinaryArrayType<'a>,
+    OutputOffset: OffsetSizeTrait,
+{
+    let mut values = vec![0; conservative_upper_bound_size];
+    let mut offsets = OffsetBufferBuilder::new(input.len());
+    let mut total_bytes_decoded = 0;
+    for v in input.iter() {
+        if let Some(v) = v {
+            let cursor = &mut values[total_bytes_decoded..];
+            let decoded = decode(v, cursor)?;
+            total_bytes_decoded += decoded;
+            offsets.push_length(decoded);
+        } else {
+            offsets.push_length(0);
+        }
     }
+    // We reserved an upper bound size for the values buffer, but we only use the actual size
+    values.truncate(total_bytes_decoded);
+    let binary_array = GenericBinaryArray::<OutputOffset>::try_new(
+        offsets.finish(),
+        Buffer::from_vec(values),
+        input.nulls().cloned(),
+    )?;
+    Ok(Arc::new(binary_array))
 }
 
-impl FromStr for Encoding {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<Encoding> {
-        Ok(match name {
-            "base64" => Self::Base64,
-            "hex" => Self::Hex,
-            _ => {
-                let options = [Self::Base64, Self::Hex]
-                    .iter()
-                    .map(|i| i.to_string())
-                    .collect::<Vec<_>>()
-                    .join(", ");
-                return plan_err!(
-                    "There is no built-in encoding named '{name}', currently supported encodings are: {options}"
-                );
-            }
-        })
+#[cfg(test)]
+mod tests {
+    use arrow::array::BinaryArray;
+    use arrow_buffer::OffsetBuffer;
+
+    use super::*;
+
+    #[test]
+    fn test_estimate_byte_data_size() {
+        // Offsets starting at 0, but don't count entire data buffer size
+        let array = BinaryArray::new(
+            OffsetBuffer::new(vec![0, 5, 10, 15].into()),
+            vec![0; 100].into(),
+            None,
+        );
+        let size = estimate_byte_data_size(&array);
+        assert_eq!(size, 15);
+
+        // Offsets starting at 0, but don't count entire data buffer size
+        let array = BinaryArray::new(
+            OffsetBuffer::new(vec![50, 51, 51, 60, 80, 81].into()),
+            vec![0; 100].into(),
+            Some(vec![true, false, false, true, true].into()),
+        );
+        let size = estimate_byte_data_size(&array);
+        assert_eq!(size, 31);
     }
 }
-
-/// Encodes the given data, accepts Binary, LargeBinary, Utf8, Utf8View or LargeUtf8 and returns a [`ColumnarValue`].
-/// Second argument is the encoding to use.
-/// Standard encodings are base64 and hex.
-fn encode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let [expression, format] = take_function_args("encode", args)?;
-
-    let encoding = match format {
-        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
-            Some(Some(method)) => method.parse::<Encoding>(),
-            _ => not_impl_err!(
-                "Second argument to encode must be non null constant string: Encode using dynamically decided method is not yet supported. Got {scalar:?}"
-            ),
-        },
-        ColumnarValue::Array(_) => not_impl_err!(
-            "Second argument to encode must be a constant: Encode using dynamically decided method is not yet supported"
-        ),
-    }?;
-    encode_process(expression, encoding)
-}
-
-/// Decodes the given data, accepts Binary, LargeBinary, Utf8, Utf8View or LargeUtf8 and returns a [`ColumnarValue`].
-/// Second argument is the encoding to use.
-/// Standard encodings are base64 and hex.
-fn decode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let [expression, format] = take_function_args("decode", args)?;
-
-    let encoding = match format {
-        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
-            Some(Some(method))=> method.parse::<Encoding>(),
-            _ => not_impl_err!(
-                "Second argument to decode must be a non null constant string: Decode using dynamically decided method is not yet supported. Got {scalar:?}"
-            ),
-        },
-        ColumnarValue::Array(_) => not_impl_err!(
-            "Second argument to decode must be a utf8 constant: Decode using dynamically decided method is not yet supported"
-        ),
-    }?;
-    decode_process(expression, encoding)
-}
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 7eb32b7ed795b..f88304a6a5f8d 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Function packages for [DataFusion].
 //!
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 9e195f2d52914..4adc331fef669 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -41,6 +41,17 @@
 /// - `Vec<Expr>` argument (single argument followed by a comma)
 /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
 /// - Functions that require config (marked with `@config` prefix)
+///
+/// Note on configuration construction paths:
+/// - The convenience wrappers generated for `@config` functions call the inner
+///   constructor with `ConfigOptions::default()`. These wrappers are intended
+///   primarily for programmatic `Expr` construction and convenience usage.
+/// - When functions are registered in a session, DataFusion will call
+///   `with_updated_config()` to create a `ScalarUDF` instance using the session's
+///   actual `ConfigOptions`. This also happens when configuration changes at runtime
+///   (e.g., via `SET` statements). In short: the macro uses the default config for
+///   convenience constructors; the session config is applied when functions are
+///   registered or when configuration is updated.
 #[macro_export]
 macro_rules! export_functions {
     ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
@@ -59,6 +70,24 @@ macro_rules! export_functions {
         }
     };
 
+    // function that requires config and takes a vector argument
+    (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
+        #[doc = $DOC]
+        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
+            use datafusion_common::config::ConfigOptions;
+            super::$FUNC(&ConfigOptions::default()).call($arg)
+        }
+    };
+
+    // function that requires config and variadic arguments
+    (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => {
+        #[doc = $DOC]
+        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
+            use datafusion_common::config::ConfigOptions;
+            super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*])
+        }
+    };
+
     // single vector argument (a single argument followed by a comma)
     (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
         #[doc = $DOC]
@@ -77,13 +106,13 @@ macro_rules! export_functions {
 }
 
 /// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
-/// named `$NAME` which returns that singleton.
+/// named `$NAME` which returns that singleton. Optionally use a custom constructor
+/// `$CTOR` which defaults to `$UDF::new()` if not specified.
 ///
 /// This is used to ensure creating the list of `ScalarUDF` only happens once.
 #[macro_export]
 macro_rules! make_udf_function {
-    ($UDF:ty, $NAME:ident) => {
-        #[allow(rustdoc::redundant_explicit_links)]
+    ($UDF:ty, $NAME:ident, $CTOR:expr) => {
         #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
         pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
             // Singleton instance of the function
@@ -91,12 +120,15 @@ macro_rules! make_udf_function {
                 std::sync::Arc<datafusion_expr::ScalarUDF>,
             > = std::sync::LazyLock::new(|| {
                 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
-                    <$UDF>::new(),
+                    ($CTOR)(),
                 ))
             });
             std::sync::Arc::clone(&INSTANCE)
         }
     };
+    ($UDF:ty, $NAME:ident) => {
+        make_udf_function!($UDF, $NAME, <$UDF>::new);
+    };
 }
 
 /// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
@@ -105,7 +137,6 @@ macro_rules! make_udf_function {
 #[macro_export]
 macro_rules! make_udf_function_with_config {
     ($UDF:ty, $NAME:ident) => {
-        #[allow(rustdoc::redundant_explicit_links)]
         #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
         pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
             std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
@@ -164,9 +195,7 @@ macro_rules! downcast_named_arg {
 /// $ARRAY_TYPE: the type of array to cast the argument to
 #[macro_export]
 macro_rules! downcast_arg {
-    ($ARG:expr, $ARRAY_TYPE:ident) => {{
-        $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE)
-    }};
+    ($ARG:expr, $ARRAY_TYPE:ident) => {{ $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE) }};
 }
 
 /// Macro to create a unary math UDF.
@@ -189,7 +218,7 @@ macro_rules! make_math_unary_udf {
 
             use arrow::array::{ArrayRef, AsArray};
             use arrow::datatypes::{DataType, Float32Type, Float64Type};
-            use datafusion_common::{exec_err, Result};
+            use datafusion_common::{Result, exec_err};
             use datafusion_expr::interval_arithmetic::Interval;
             use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
             use datafusion_expr::{
@@ -268,7 +297,7 @@ macro_rules! make_math_unary_udf {
                             return exec_err!(
                                 "Unsupported data type {other:?} for function {}",
                                 self.name()
-                            )
+                            );
                         }
                     };
 
@@ -303,9 +332,9 @@ macro_rules! make_math_binary_udf {
 
             use arrow::array::{ArrayRef, AsArray};
             use arrow::datatypes::{DataType, Float32Type, Float64Type};
-            use datafusion_common::{exec_err, Result};
-            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+            use datafusion_common::{Result, exec_err};
             use datafusion_expr::TypeSignature;
+            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
             use datafusion_expr::{
                 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
                 Signature, Volatility,
@@ -390,7 +419,7 @@ macro_rules! make_math_binary_udf {
                             return exec_err!(
                                 "Unsupported data type {other:?} for function {}",
                                 self.name()
-                            )
+                            );
                         }
                     };
 
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
index b3dc2b2eb6f8f..081668f7669f6 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -21,13 +21,13 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayRef, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
-    Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-    Int8Array,
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    Float16Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array,
+    Int64Array,
 };
 use arrow::datatypes::DataType;
 use arrow::error::ArrowError;
-use datafusion_common::{not_impl_err, utils::take_function_args, Result};
+use datafusion_common::{Result, not_impl_err, utils::take_function_args};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
@@ -39,6 +39,7 @@ use num_traits::sign::Signed;
 
 type MathArrayFunction = fn(&ArrayRef) -> Result<ArrayRef>;
 
+#[macro_export]
 macro_rules! make_abs_function {
     ($ARRAY_TYPE:ident) => {{
         |input: &ArrayRef| {
@@ -67,7 +68,8 @@ macro_rules! make_try_abs_function {
     }};
 }
 
-macro_rules! make_decimal_abs_function {
+#[macro_export]
+macro_rules! make_wrapping_abs_function {
     ($ARRAY_TYPE:ident) => {{
         |input: &ArrayRef| {
             let array = downcast_named_arg!(&input, "abs arg", $ARRAY_TYPE);
@@ -101,10 +103,10 @@ fn create_abs_function(input_data_type: &DataType) -> Result<MathArrayFunction>
         | DataType::UInt64 => Ok(|input: &ArrayRef| Ok(Arc::clone(input))),
 
         // Decimal types
-        DataType::Decimal32(_, _) => Ok(make_decimal_abs_function!(Decimal32Array)),
-        DataType::Decimal64(_, _) => Ok(make_decimal_abs_function!(Decimal64Array)),
-        DataType::Decimal128(_, _) => Ok(make_decimal_abs_function!(Decimal128Array)),
-        DataType::Decimal256(_, _) => Ok(make_decimal_abs_function!(Decimal256Array)),
+        DataType::Decimal32(_, _) => Ok(make_wrapping_abs_function!(Decimal32Array)),
+        DataType::Decimal64(_, _) => Ok(make_wrapping_abs_function!(Decimal64Array)),
+        DataType::Decimal128(_, _) => Ok(make_wrapping_abs_function!(Decimal128Array)),
+        DataType::Decimal256(_, _) => Ok(make_wrapping_abs_function!(Decimal256Array)),
 
         other => not_impl_err!("Unsupported data type {other:?} for function abs"),
     }
@@ -175,9 +177,9 @@ impl ScalarUDFImpl for AbsFunc {
         let range = &arg.range;
         let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-        if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+        if range.gt_eq(&zero_point)? == Interval::TRUE {
             Ok(arg.sort_properties)
-        } else if range.lt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+        } else if range.lt_eq(&zero_point)? == Interval::TRUE {
             Ok(-arg.sort_properties)
         } else {
             Ok(SortProperties::Unordered)
diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
new file mode 100644
index 0000000000000..501741002f968
--- /dev/null
+++ b/datafusion/functions/src/math/ceil.rs
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, AsArray};
+use arrow::datatypes::{
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Float32Type,
+    Float64Type,
+};
+use datafusion_common::{Result, ScalarValue, exec_err};
+use datafusion_expr::interval_arithmetic::Interval;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, TypeSignatureClass, Volatility,
+};
+use datafusion_macros::user_doc;
+
+use super::decimal::{apply_decimal_op, ceil_decimal_value};
+
+#[user_doc(
+    doc_section(label = "Math Functions"),
+    description = "Returns the nearest integer greater than or equal to a number.",
+    syntax_example = "ceil(numeric_expression)",
+    standard_argument(name = "numeric_expression", prefix = "Numeric"),
+    sql_example = r#"```sql
+> SELECT ceil(3.14);
++------------+
+| ceil(3.14) |
++------------+
+| 4.0        |
++------------+
+```"#
+)]
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct CeilFunc {
+    signature: Signature,
+}
+
+impl Default for CeilFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CeilFunc {
+    pub fn new() -> Self {
+        let decimal_sig = Coercion::new_exact(TypeSignatureClass::Decimal);
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![decimal_sig]),
+                    TypeSignature::Uniform(1, vec![DataType::Float64, DataType::Float32]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for CeilFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "ceil"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            DataType::Null => Ok(DataType::Float64),
+            other => Ok(other.clone()),
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(&args.args)?;
+        let value = &args[0];
+
+        let result: ArrayRef = match value.data_type() {
+            DataType::Float64 => Arc::new(
+                value
+                    .as_primitive::<Float64Type>()
+                    .unary::<_, Float64Type>(f64::ceil),
+            ),
+            DataType::Float32 => Arc::new(
+                value
+                    .as_primitive::<Float32Type>()
+                    .unary::<_, Float32Type>(f32::ceil),
+            ),
+            DataType::Null => {
+                return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None)));
+            }
+            DataType::Decimal32(precision, scale) => {
+                apply_decimal_op::<Decimal32Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    ceil_decimal_value,
+                )?
+            }
+            DataType::Decimal64(precision, scale) => {
+                apply_decimal_op::<Decimal64Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    ceil_decimal_value,
+                )?
+            }
+            DataType::Decimal128(precision, scale) => {
+                apply_decimal_op::<Decimal128Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    ceil_decimal_value,
+                )?
+            }
+            DataType::Decimal256(precision, scale) => {
+                apply_decimal_op::<Decimal256Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    ceil_decimal_value,
+                )?
+            }
+            other => {
+                return exec_err!(
+                    "Unsupported data type {other:?} for function {}",
+                    self.name()
+                );
+            }
+        };
+
+        Ok(ColumnarValue::Array(result))
+    }
+
+    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
+        Ok(input[0].sort_properties)
+    }
+
+    fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
+        let data_type = inputs[0].data_type();
+        Interval::make_unbounded(&data_type)
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
diff --git a/datafusion/functions/src/math/cot.rs b/datafusion/functions/src/math/cot.rs
index 43f2012d073dd..a0d7b02b68e5a 100644
--- a/datafusion/functions/src/math/cot.rs
+++ b/datafusion/functions/src/math/cot.rs
@@ -23,7 +23,7 @@ use arrow::datatypes::DataType::{Float32, Float64};
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
 
 use crate::utils::make_scalar_function;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
diff --git a/datafusion/functions/src/math/decimal.rs b/datafusion/functions/src/math/decimal.rs
new file mode 100644
index 0000000000000..abaded4568a93
--- /dev/null
+++ b/datafusion/functions/src/math/decimal.rs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, AsArray, PrimitiveArray};
+use arrow::datatypes::{ArrowNativeTypeOp, DecimalType};
+use arrow::error::ArrowError;
+use arrow_buffer::ArrowNativeType;
+use datafusion_common::{DataFusionError, Result};
+
+pub(super) fn apply_decimal_op<T, F>(
+    array: &ArrayRef,
+    precision: u8,
+    scale: i8,
+    fn_name: &str,
+    op: F,
+) -> Result<ArrayRef>
+where
+    T: DecimalType,
+    T::Native: ArrowNativeType + ArrowNativeTypeOp,
+    F: Fn(T::Native, T::Native) -> T::Native,
+{
+    if scale <= 0 {
+        return Ok(Arc::clone(array));
+    }
+
+    let factor = decimal_scale_factor::<T>(scale, fn_name)?;
+    let decimal = array.as_primitive::<T>();
+    let data_type = array.data_type().clone();
+
+    let result: PrimitiveArray<T> = decimal.try_unary(|value| {
+        let new_value = op(value, factor);
+        T::validate_decimal_precision(new_value, precision, scale).map_err(|_| {
+            ArrowError::ComputeError(format!("Decimal overflow while applying {fn_name}"))
+        })?;
+        Ok::<_, ArrowError>(new_value)
+    })?;
+
+    let result = result.with_data_type(data_type);
+
+    Ok(Arc::new(result))
+}
+
+fn decimal_scale_factor<T>(scale: i8, fn_name: &str) -> Result<T::Native>
+where
+    T: DecimalType,
+    T::Native: ArrowNativeType + ArrowNativeTypeOp,
+{
+    let base = <T::Native as ArrowNativeType>::from_usize(10).ok_or_else(|| {
+        DataFusionError::Execution(format!(
+            "Cannot get 10_{} from usize: {:?}",
+            std::any::type_name::<T::Native>(),
+            10_usize
+        ))
+    })?;
+
+    base.pow_checked(scale as u32).map_err(|_| {
+        DataFusionError::Execution(format!("Decimal overflow while applying {fn_name}"))
+    })
+}
+
+pub(super) fn ceil_decimal_value<T>(value: T, factor: T) -> T
+where
+    T: ArrowNativeTypeOp + std::ops::Rem<Output = T>,
+{
+    let remainder = value % factor;
+
+    if remainder == T::ZERO {
+        return value;
+    }
+
+    if value >= T::ZERO {
+        let increment = factor.sub_wrapping(remainder);
+        value.add_wrapping(increment)
+    } else {
+        value.sub_wrapping(remainder)
+    }
+}
+
+pub(super) fn floor_decimal_value<T>(value: T, factor: T) -> T
+where
+    T: ArrowNativeTypeOp + std::ops::Rem<Output = T>,
+{
+    let remainder = value % factor;
+
+    if remainder == T::ZERO {
+        return value;
+    }
+
+    if value >= T::ZERO {
+        value.sub_wrapping(remainder)
+    } else {
+        let adjustment = factor.add_wrapping(remainder);
+        value.sub_wrapping(adjustment)
+    }
+}
diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs
index 79f6da94dd0e1..4d651cf77d534 100644
--- a/datafusion/functions/src/math/factorial.rs
+++ b/datafusion/functions/src/math/factorial.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Int64;
 
 use crate::utils::make_scalar_function;
-use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result};
+use datafusion_common::{Result, arrow_datafusion_err, exec_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     Volatility,
diff --git a/datafusion/functions/src/math/floor.rs b/datafusion/functions/src/math/floor.rs
new file mode 100644
index 0000000000000..221e58e1e7a7f
--- /dev/null
+++ b/datafusion/functions/src/math/floor.rs
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, AsArray};
+use arrow::datatypes::{
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Float32Type,
+    Float64Type,
+};
+use datafusion_common::{Result, ScalarValue, exec_err};
+use datafusion_expr::interval_arithmetic::Interval;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, TypeSignatureClass, Volatility,
+};
+use datafusion_macros::user_doc;
+
+use super::decimal::{apply_decimal_op, floor_decimal_value};
+
+#[user_doc(
+    doc_section(label = "Math Functions"),
+    description = "Returns the nearest integer less than or equal to a number.",
+    syntax_example = "floor(numeric_expression)",
+    standard_argument(name = "numeric_expression", prefix = "Numeric"),
+    sql_example = r#"```sql
+> SELECT floor(3.14);
++-------------+
+| floor(3.14) |
++-------------+
+| 3.0         |
++-------------+
+```"#
+)]
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct FloorFunc {
+    signature: Signature,
+}
+
+impl Default for FloorFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl FloorFunc {
+    pub fn new() -> Self {
+        let decimal_sig = Coercion::new_exact(TypeSignatureClass::Decimal);
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![decimal_sig]),
+                    TypeSignature::Uniform(1, vec![DataType::Float64, DataType::Float32]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for FloorFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "floor"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            DataType::Null => Ok(DataType::Float64),
+            other => Ok(other.clone()),
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(&args.args)?;
+        let value = &args[0];
+
+        let result: ArrayRef = match value.data_type() {
+            DataType::Float64 => Arc::new(
+                value
+                    .as_primitive::<Float64Type>()
+                    .unary::<_, Float64Type>(f64::floor),
+            ),
+            DataType::Float32 => Arc::new(
+                value
+                    .as_primitive::<Float32Type>()
+                    .unary::<_, Float32Type>(f32::floor),
+            ),
+            DataType::Null => {
+                return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None)));
+            }
+            DataType::Decimal32(precision, scale) => {
+                apply_decimal_op::<Decimal32Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    floor_decimal_value,
+                )?
+            }
+            DataType::Decimal64(precision, scale) => {
+                apply_decimal_op::<Decimal64Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    floor_decimal_value,
+                )?
+            }
+            DataType::Decimal128(precision, scale) => {
+                apply_decimal_op::<Decimal128Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    floor_decimal_value,
+                )?
+            }
+            DataType::Decimal256(precision, scale) => {
+                apply_decimal_op::<Decimal256Type, _>(
+                    value,
+                    *precision,
+                    *scale,
+                    self.name(),
+                    floor_decimal_value,
+                )?
+            }
+            other => {
+                return exec_err!(
+                    "Unsupported data type {other:?} for function {}",
+                    self.name()
+                );
+            }
+        };
+
+        Ok(ColumnarValue::Array(result))
+    }
+
+    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
+        Ok(input[0].sort_properties)
+    }
+
+    fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
+        let data_type = inputs[0].data_type();
+        Interval::make_unbounded(&data_type)
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs
index 0b85e7b54a782..baf52d780683b 100644
--- a/datafusion/functions/src/math/gcd.rs
+++ b/datafusion/functions/src/math/gcd.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{new_null_array, ArrayRef, AsArray, Int64Array, PrimitiveArray};
+use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray, new_null_array};
 use arrow::compute::try_binary;
 use arrow::datatypes::{DataType, Int64Type};
 use arrow::error::ArrowError;
@@ -23,7 +23,7 @@ use std::any::Any;
 use std::mem::swap;
 use std::sync::Arc;
 
-use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     Volatility,
@@ -94,20 +94,23 @@ impl ScalarUDFImpl for GcdFunc {
             [ColumnarValue::Array(a), ColumnarValue::Array(b)] => {
                 compute_gcd_for_arrays(&a, &b)
             }
-            [ColumnarValue::Scalar(ScalarValue::Int64(a)), ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
-                match (a, b) {
-                    (Some(a), Some(b)) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
-                        Some(compute_gcd(a, b)?),
-                    ))),
-                    _ => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
-                }
-            }
-            [ColumnarValue::Array(a), ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
-                compute_gcd_with_scalar(&a, b)
-            }
-            [ColumnarValue::Scalar(ScalarValue::Int64(a)), ColumnarValue::Array(b)] => {
-                compute_gcd_with_scalar(&b, a)
-            }
+            [
+                ColumnarValue::Scalar(ScalarValue::Int64(a)),
+                ColumnarValue::Scalar(ScalarValue::Int64(b)),
+            ] => match (a, b) {
+                (Some(a), Some(b)) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                    Some(compute_gcd(a, b)?),
+                ))),
+                _ => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
+            },
+            [
+                ColumnarValue::Array(a),
+                ColumnarValue::Scalar(ScalarValue::Int64(b)),
+            ] => compute_gcd_with_scalar(&a, b),
+            [
+                ColumnarValue::Scalar(ScalarValue::Int64(a)),
+                ColumnarValue::Array(b),
+            ] => compute_gcd_with_scalar(&b, a),
             _ => exec_err!("Unsupported argument types for function gcd"),
         }
     }
diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs
index 68cd3aca28fdc..6349551ca0a43 100644
--- a/datafusion/functions/src/math/iszero.rs
+++ b/datafusion/functions/src/math/iszero.rs
@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use arrow::datatypes::DataType::{Boolean, Float32, Float64};
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
 
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -96,7 +96,7 @@ impl ScalarUDFImpl for IsZeroFunc {
 }
 
 /// Iszero SQL function
-pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         Float64 => Ok(Arc::new(BooleanArray::from_unary(
             args[0].as_primitive::<Float64Type>(),
diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs
index bfb20dfd5ce41..58934e137beca 100644
--- a/datafusion/functions/src/math/lcm.rs
+++ b/datafusion/functions/src/math/lcm.rs
@@ -23,7 +23,7 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Int64;
 
 use arrow::error::ArrowError;
-use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result};
+use datafusion_common::{Result, arrow_datafusion_err, exec_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     Volatility,
diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
index 24000a3876bd2..0c50afa2dffd3 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -21,24 +21,26 @@ use std::any::Any;
 
 use super::power::PowerFunc;
 
-use crate::utils::{calculate_binary_math, decimal128_to_i128};
+use crate::utils::{
+    calculate_binary_math, decimal32_to_i32, decimal64_to_i64, decimal128_to_i128,
+};
 use arrow::array::{Array, ArrayRef};
-use arrow::compute::kernels::cast;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type,
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Float16Type,
+    Float32Type, Float64Type,
 };
 use arrow::error::ArrowError;
 use arrow_buffer::i256;
 use datafusion_common::types::NativeType;
 use datafusion_common::{
-    exec_err, internal_err, plan_datafusion_err, plan_err, Result, ScalarValue,
+    Result, ScalarValue, exec_err, internal_err, plan_datafusion_err, plan_err,
 };
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    lit, Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF,
-    TypeSignature, TypeSignatureClass,
+    Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF,
+    TypeSignature, TypeSignatureClass, lit,
 };
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
@@ -102,6 +104,69 @@ impl LogFunc {
     }
 }
 
+/// Binary function to calculate logarithm of Decimal32 `value` using `base` base
+/// Returns error if base is invalid
+fn log_decimal32(value: i32, scale: i8, base: f64) -> Result<f64, ArrowError> {
+    if !base.is_finite() || base.trunc() != base {
+        return Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer base: {base}"
+        )));
+    }
+    if (base as u32) < 2 {
+        return Err(ArrowError::ComputeError(format!(
+            "Log base must be greater than 1: {base}"
+        )));
+    }
+
+    // Match f64::log behaviour
+    if value <= 0 {
+        return Ok(f64::NAN);
+    }
+
+    if scale < 0 {
+        let actual_value = (value as f64) * 10.0_f64.powi(-(scale as i32));
+        Ok(actual_value.log(base))
+    } else {
+        let unscaled_value = decimal32_to_i32(value, scale)?;
+        if unscaled_value <= 0 {
+            return Ok(f64::NAN);
+        }
+        let log_value: u32 = unscaled_value.ilog(base as i32);
+        Ok(log_value as f64)
+    }
+}
+
+/// Binary function to calculate logarithm of Decimal64 `value` using `base` base
+/// Returns error if base is invalid
+fn log_decimal64(value: i64, scale: i8, base: f64) -> Result<f64, ArrowError> {
+    if !base.is_finite() || base.trunc() != base {
+        return Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer base: {base}"
+        )));
+    }
+    if (base as u32) < 2 {
+        return Err(ArrowError::ComputeError(format!(
+            "Log base must be greater than 1: {base}"
+        )));
+    }
+
+    if value <= 0 {
+        return Ok(f64::NAN);
+    }
+
+    if scale < 0 {
+        let actual_value = (value as f64) * 10.0_f64.powi(-(scale as i32));
+        Ok(actual_value.log(base))
+    } else {
+        let unscaled_value = decimal64_to_i64(value, scale)?;
+        if unscaled_value <= 0 {
+            return Ok(f64::NAN);
+        }
+        let log_value: u32 = unscaled_value.ilog(base as i64);
+        Ok(log_value as f64)
+    }
+}
+
 /// Binary function to calculate an integer logarithm of Decimal128 `value` using `base` base
 /// Returns error if base is invalid
 fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, ArrowError> {
@@ -116,13 +181,21 @@ fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, ArrowError>
         )));
     }
 
-    let unscaled_value = decimal128_to_i128(value, scale)?;
-    if unscaled_value > 0 {
+    if value <= 0 {
+        // Reflect f64::log behaviour
+        return Ok(f64::NAN);
+    }
+
+    if scale < 0 {
+        let actual_value = (value as f64) * 10.0_f64.powi(-(scale as i32));
+        Ok(actual_value.log(base))
+    } else {
+        let unscaled_value = decimal128_to_i128(value, scale)?;
+        if unscaled_value <= 0 {
+            return Ok(f64::NAN);
+        }
         let log_value: u32 = unscaled_value.ilog(base as i128);
         Ok(log_value as f64)
-    } else {
-        // Reflect f64::log behaviour
-        Ok(f64::NAN)
     }
 }
 
@@ -223,15 +296,18 @@ impl ScalarUDFImpl for LogFunc {
                     |value, base| Ok(value.log(base)),
                 )?
             }
-            // TODO: native log support for decimal 32 & 64; right now upcast
-            //       to decimal128 to calculate
-            //       https://github.com/apache/datafusion/issues/17555
-            DataType::Decimal32(precision, scale)
-            | DataType::Decimal64(precision, scale) => {
-                calculate_binary_math::<Decimal128Type, Float64Type, Float64Type, _>(
-                    &cast(&value, &DataType::Decimal128(*precision, *scale))?,
+            DataType::Decimal32(_, scale) => {
+                calculate_binary_math::<Decimal32Type, Float64Type, Float64Type, _>(
+                    &value,
                     &base,
-                    |value, base| log_decimal128(value, *scale, base),
+                    |value, base| log_decimal32(value, *scale, base),
+                )?
+            }
+            DataType::Decimal64(_, scale) => {
+                calculate_binary_math::<Decimal64Type, Float64Type, Float64Type, _>(
+                    &value,
+                    &base,
+                    |value, base| log_decimal64(value, *scale, base),
                 )?
             }
             DataType::Decimal128(_, scale) => {
@@ -249,7 +325,7 @@ impl ScalarUDFImpl for LogFunc {
                 )?
             }
             other => {
-                return exec_err!("Unsupported data type {other:?} for function log")
+                return exec_err!("Unsupported data type {other:?} for function log");
             }
         };
 
@@ -289,6 +365,19 @@ impl ScalarUDFImpl for LogFunc {
         if num_args != 1 && num_args != 2 {
             return plan_err!("Expected log to have 1 or 2 arguments, got {num_args}");
         }
+
+        match arg_types.last().unwrap() {
+            DataType::Decimal32(_, scale)
+            | DataType::Decimal64(_, scale)
+            | DataType::Decimal128(_, scale)
+            | DataType::Decimal256(_, scale)
+                if *scale < 0 =>
+            {
+                return Ok(ExprSimplifyResult::Original(args));
+            }
+            _ => (),
+        };
+
         let number = args.pop().unwrap();
         let number_datatype = arg_types.pop().unwrap();
         // default to base 10
@@ -324,7 +413,7 @@ impl ScalarUDFImpl for LogFunc {
                         _ => {
                             return internal_err!(
                                 "Unexpected number of arguments in log::simplify"
-                            )
+                            );
                         }
                     };
                     Ok(ExprSimplifyResult::Original(args))
@@ -350,10 +439,10 @@ mod tests {
         Date32Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array,
     };
     use arrow::compute::SortOptions;
-    use arrow::datatypes::{Field, DECIMAL256_MAX_PRECISION};
+    use arrow::datatypes::{DECIMAL256_MAX_PRECISION, Field};
+    use datafusion_common::DFSchema;
     use datafusion_common::cast::{as_float32_array, as_float64_array};
     use datafusion_common::config::ConfigOptions;
-    use datafusion_common::DFSchema;
     use datafusion_expr::execution_props::ExecutionProps;
     use datafusion_expr::simplify::SimplifyContext;
 
@@ -1120,7 +1209,8 @@ mod tests {
         };
         let result = LogFunc::new().invoke_with_args(args);
         assert!(result.is_err());
-        assert_eq!(result.unwrap_err().to_string().lines().next().unwrap(),
+        assert_eq!(
+            result.unwrap_err().to_string().lines().next().unwrap(),
             "Arrow error: Not yet implemented: Log of Decimal256 larger than Decimal128 is not yet supported: 170141183460469231731687303715884106727"
         );
     }
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 4eb337a30110e..610e773d68fd0 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -23,8 +23,11 @@ use std::sync::Arc;
 
 pub mod abs;
 pub mod bounds;
+pub mod ceil;
 pub mod cot;
+mod decimal;
 pub mod factorial;
+pub mod floor;
 pub mod gcd;
 pub mod iszero;
 pub mod lcm;
@@ -104,14 +107,7 @@ make_math_unary_udf!(
     super::bounds::unbounded_bounds,
     super::get_cbrt_doc
 );
-make_math_unary_udf!(
-    CeilFunc,
-    ceil,
-    ceil,
-    super::ceil_order,
-    super::bounds::unbounded_bounds,
-    super::get_ceil_doc
-);
+make_udf_function!(ceil::CeilFunc, ceil);
 make_math_unary_udf!(
     CosFunc,
     cos,
@@ -146,14 +142,7 @@ make_math_unary_udf!(
     super::get_exp_doc
 );
 make_udf_function!(factorial::FactorialFunc, factorial);
-make_math_unary_udf!(
-    FloorFunc,
-    floor,
-    floor,
-    super::floor_order,
-    super::bounds::unbounded_bounds,
-    super::get_floor_doc
-);
+make_udf_function!(floor::FloorFunc, floor);
 make_udf_function!(log::LogFunc, log);
 make_udf_function!(gcd::GcdFunc, gcd);
 make_udf_function!(nans::IsNanFunc, isnan);
diff --git a/datafusion/functions/src/math/monotonicity.rs b/datafusion/functions/src/math/monotonicity.rs
index 5b8252137be11..f672ff9804546 100644
--- a/datafusion/functions/src/math/monotonicity.rs
+++ b/datafusion/functions/src/math/monotonicity.rs
@@ -17,11 +17,11 @@
 
 use std::sync::LazyLock;
 
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_doc::scalar_doc_sections::DOC_SECTION_MATH;
+use datafusion_expr::Documentation;
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
-use datafusion_expr::Documentation;
 
 /// Non-increasing on the interval \[−1, 1\], undefined otherwise.
 pub fn acos_order(input: &[ExprProperties]) -> Result<SortProperties> {
@@ -31,7 +31,7 @@ pub fn acos_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let valid_domain =
         Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
-    if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
+    if valid_domain.contains(range)? == Interval::TRUE {
         Ok(-arg.sort_properties)
     } else {
         exec_err!("Input range of ACOS contains out-of-domain values")
@@ -72,7 +72,7 @@ pub fn acosh_order(input: &[ExprProperties]) -> Result<SortProperties> {
         ScalarValue::try_from(&range.upper().data_type())?,
     )?;
 
-    if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
+    if valid_domain.contains(range)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of ACOSH contains out-of-domain values")
@@ -110,7 +110,7 @@ pub fn asin_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let valid_domain =
         Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
-    if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
+    if valid_domain.contains(range)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of ASIN contains out-of-domain values")
@@ -207,7 +207,7 @@ pub fn atanh_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let valid_domain =
         Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
-    if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
+    if valid_domain.contains(range)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of ATANH contains out-of-domain values")
@@ -309,30 +309,6 @@ pub fn ceil_order(input: &[ExprProperties]) -> Result<SortProperties> {
     Ok(input[0].sort_properties)
 }
 
-static DOCUMENTATION_CEIL: LazyLock<Documentation> = LazyLock::new(|| {
-    Documentation::builder(
-        DOC_SECTION_MATH,
-        "Returns the nearest integer greater than or equal to a number.",
-        "ceil(numeric_expression)",
-    )
-    .with_standard_argument("numeric_expression", Some("Numeric"))
-    .with_sql_example(
-        r#"```sql
-    > SELECT ceil(3.14);
-+------------+
-| ceil(3.14) |
-+------------+
-| 4.0        |
-+------------+
-```"#,
-    )
-    .build()
-});
-
-pub fn get_ceil_doc() -> &'static Documentation {
-    &DOCUMENTATION_CEIL
-}
-
 /// Non-increasing on \[0, π\] and then non-decreasing on \[π, 2π\].
 /// This pattern repeats periodically with a period of 2π.
 // TODO: Implement ordering rule of the ATAN2 function.
@@ -371,9 +347,9 @@ pub fn cosh_order(input: &[ExprProperties]) -> Result<SortProperties> {
 
     let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-    if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    if range.gt_eq(&zero_point)? == Interval::TRUE {
         Ok(arg.sort_properties)
-    } else if range.lt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    } else if range.lt_eq(&zero_point)? == Interval::TRUE {
         Ok(-arg.sort_properties)
     } else {
         Ok(SortProperties::Unordered)
@@ -467,30 +443,6 @@ pub fn floor_order(input: &[ExprProperties]) -> Result<SortProperties> {
     Ok(input[0].sort_properties)
 }
 
-static DOCUMENTATION_FLOOR: LazyLock<Documentation> = LazyLock::new(|| {
-    Documentation::builder(
-        DOC_SECTION_MATH,
-        "Returns the nearest integer less than or equal to a number.",
-        "floor(numeric_expression)",
-    )
-    .with_standard_argument("numeric_expression", Some("Numeric"))
-    .with_sql_example(
-        r#"```sql
-> SELECT floor(3.14);
-+-------------+
-| floor(3.14) |
-+-------------+
-| 3.0         |
-+-------------+
-```"#,
-    )
-    .build()
-});
-
-pub fn get_floor_doc() -> &'static Documentation {
-    &DOCUMENTATION_FLOOR
-}
-
 /// Non-decreasing for x ≥ 0, undefined otherwise.
 pub fn ln_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let arg = &input[0];
@@ -498,7 +450,7 @@ pub fn ln_order(input: &[ExprProperties]) -> Result<SortProperties> {
 
     let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-    if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    if range.gt_eq(&zero_point)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of LN contains out-of-domain values")
@@ -536,7 +488,7 @@ pub fn log2_order(input: &[ExprProperties]) -> Result<SortProperties> {
 
     let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-    if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    if range.gt_eq(&zero_point)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of LOG2 contains out-of-domain values")
@@ -574,7 +526,7 @@ pub fn log10_order(input: &[ExprProperties]) -> Result<SortProperties> {
 
     let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-    if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    if range.gt_eq(&zero_point)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of LOG10 contains out-of-domain values")
@@ -701,7 +653,7 @@ pub fn sqrt_order(input: &[ExprProperties]) -> Result<SortProperties> {
 
     let zero_point = Interval::make_zero(&range.lower().data_type())?;
 
-    if range.gt_eq(&zero_point)? == Interval::CERTAINLY_TRUE {
+    if range.gt_eq(&zero_point)? == Interval::TRUE {
         Ok(arg.sort_properties)
     } else {
         exec_err!("Input range of SQRT contains out-of-domain values")
diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs
index 759b0f5fd50ac..be21cfde0aa6c 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -18,7 +18,7 @@
 //! Math function: `isnan()`.
 
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, TypeSignature};
 
 use arrow::array::{ArrayRef, AsArray, BooleanArray};
@@ -100,7 +100,7 @@ impl ScalarUDFImpl for IsNanFunc {
                 return exec_err!(
                     "Unsupported data type {other:?} for function {}",
                     self.name()
-                )
+                );
             }
         };
         Ok(ColumnarValue::Array(arr))
diff --git a/datafusion/functions/src/math/nanvl.rs b/datafusion/functions/src/math/nanvl.rs
index f0835b4d48a0c..345b1a5b71aef 100644
--- a/datafusion/functions/src/math/nanvl.rs
+++ b/datafusion/functions/src/math/nanvl.rs
@@ -23,7 +23,7 @@ use crate::utils::make_scalar_function;
 use arrow::array::{ArrayRef, AsArray, Float32Array, Float64Array};
 use arrow::datatypes::DataType::{Float32, Float64};
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -110,11 +110,7 @@ fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         Float64 => {
             let compute_nanvl = |x: f64, y: f64| {
-                if x.is_nan() {
-                    y
-                } else {
-                    x
-                }
+                if x.is_nan() { y } else { x }
             };
 
             let x = args[0].as_primitive() as &Float64Array;
@@ -125,11 +121,7 @@ fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
         Float32 => {
             let compute_nanvl = |x: f32, y: f32| {
-                if x.is_nan() {
-                    y
-                } else {
-                    x
-                }
+                if x.is_nan() { y } else { x }
             };
 
             let x = args[0].as_primitive() as &Float32Array;
diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs
index 71a8e21a52f26..574928a09705f 100644
--- a/datafusion/functions/src/math/pi.rs
+++ b/datafusion/functions/src/math/pi.rs
@@ -19,7 +19,7 @@ use std::any::Any;
 
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Float64;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_or_internal_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -69,9 +69,11 @@ impl ScalarUDFImpl for PiFunc {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if !args.args.is_empty() {
-            return internal_err!("{} function does not accept arguments", self.name());
-        }
+        assert_or_internal_err!(
+            args.args.is_empty(),
+            "{} function does not accept arguments",
+            self.name()
+        );
         Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(
             std::f64::consts::PI,
         ))))
diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs
index ad2e795d086e9..33166f6444f2a 100644
--- a/datafusion/functions/src/math/power.rs
+++ b/datafusion/functions/src/math/power.rs
@@ -17,22 +17,25 @@
 
 //! Math function: `power()`.
 use std::any::Any;
-use std::sync::Arc;
 
 use super::log::LogFunc;
 
-use arrow::array::{ArrayRef, AsArray, Int64Array};
-use arrow::datatypes::{ArrowNativeTypeOp, DataType, Float64Type};
-use datafusion_common::{
-    arrow_datafusion_err, exec_datafusion_err, exec_err, plan_datafusion_err,
-    DataFusionError, Result, ScalarValue,
+use crate::utils::{calculate_binary_decimal_math, calculate_binary_math};
+use arrow::array::{Array, ArrayRef};
+use arrow::datatypes::{
+    ArrowNativeTypeOp, DataType, Decimal32Type, Decimal64Type, Decimal128Type,
+    Decimal256Type, Float64Type, Int64Type,
 };
+use arrow::error::ArrowError;
+use datafusion_common::types::{NativeType, logical_float64, logical_int64};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF, TypeSignature,
+    Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF,
+    ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, lit,
 };
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
 
 #[user_doc(
@@ -64,12 +67,23 @@ impl Default for PowerFunc {
 
 impl PowerFunc {
     pub fn new() -> Self {
-        use DataType::*;
+        let integer = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int64()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int64,
+        );
+        let decimal = Coercion::new_exact(TypeSignatureClass::Decimal);
+        let float = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_float64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
         Self {
             signature: Signature::one_of(
                 vec![
-                    TypeSignature::Exact(vec![Int64, Int64]),
-                    TypeSignature::Exact(vec![Float64, Float64]),
+                    TypeSignature::Coercible(vec![decimal.clone(), integer]),
+                    TypeSignature::Coercible(vec![decimal.clone(), float.clone()]),
+                    TypeSignature::Coercible(vec![float; 2]),
                 ],
                 Volatility::Immutable,
             ),
@@ -78,10 +92,110 @@ impl PowerFunc {
     }
 }
 
+/// Binary function to calculate a math power to integer exponent
+/// for scaled integer types.
+///
+/// Formula
+/// The power for a scaled integer `b` is
+///
+/// ```text
+/// (b * 10^(-s)) ^ e
+/// ```
+/// However, the result should be scaled back from scale 0 to scale `s`,
+/// which is done by multiplying by `10^s`.
+/// At the end, the formula is:
+///
+/// ```text
+///   b^e * 10^(-s * e) * 10^s = b^e / 10^(s * (e-1))
+/// ```
+/// Example of 2.5 ^ 4 = 39:
+///   2.5 is represented as 25 with scale 1
+///   The unscaled result is 25^4 = 390625
+///   Scale it back to 1: 390625 / 10^4 = 39
+///
+/// Returns error if base is invalid
+fn pow_decimal_int<T>(base: T, scale: i8, exp: i64) -> Result<T, ArrowError>
+where
+    T: From<i32> + ArrowNativeTypeOp,
+{
+    let exp: u32 = exp.try_into().map_err(|_| {
+        ArrowError::ArithmeticOverflow(format!("Unsupported exp value: {exp}"))
+    })?;
+    // Handle edge case for exp == 0
+    // If scale < 0, 10^scale (e.g., 10^-2 = 0.01) becomes 0 in integer arithmetic.
+    if exp == 0 {
+        return if scale >= 0 {
+            T::from(10).pow_checked(scale as u32).map_err(|_| {
+                ArrowError::ArithmeticOverflow(format!(
+                    "Cannot make unscale factor for {scale} and {exp}"
+                ))
+            })
+        } else {
+            Ok(T::from(0))
+        };
+    }
+    let powered: T = base.pow_checked(exp).map_err(|_| {
+        ArrowError::ArithmeticOverflow(format!("Cannot raise base {base:?} to exp {exp}"))
+    })?;
+
+    // Calculate the scale adjustment: s * (e - 1)
+    // We use i64 to prevent overflow during the intermediate multiplication
+    let mul_exp = (scale as i64).wrapping_mul(exp as i64 - 1);
+
+    if mul_exp == 0 {
+        return Ok(powered);
+    }
+
+    // If mul_exp is positive, we divide (standard case).
+    // If mul_exp is negative, we multiply (negative scale case).
+    if mul_exp > 0 {
+        let div_factor: T = T::from(10).pow_checked(mul_exp as u32).map_err(|_| {
+            ArrowError::ArithmeticOverflow(format!(
+                "Cannot make div factor for {scale} and {exp}"
+            ))
+        })?;
+        powered.div_checked(div_factor)
+    } else {
+        // mul_exp is negative, so we multiply by 10^(-mul_exp)
+        let abs_exp = mul_exp.checked_neg().ok_or_else(|| {
+            ArrowError::ArithmeticOverflow(
+                "Overflow while negating scale exponent".to_string(),
+            )
+        })?;
+        let mul_factor: T = T::from(10).pow_checked(abs_exp as u32).map_err(|_| {
+            ArrowError::ArithmeticOverflow(format!(
+                "Cannot make mul factor for {scale} and {exp}"
+            ))
+        })?;
+        powered.mul_checked(mul_factor)
+    }
+}
+
+/// Binary function to calculate a math power to float exponent
+/// for scaled integer types.
+/// Returns error if exponent is negative or non-integer, or base invalid
+fn pow_decimal_float<T>(base: T, scale: i8, exp: f64) -> Result<T, ArrowError>
+where
+    T: From<i32> + ArrowNativeTypeOp,
+{
+    if !exp.is_finite() || exp.trunc() != exp {
+        return Err(ArrowError::ComputeError(format!(
+            "Cannot use non-integer exp: {exp}"
+        )));
+    }
+    if exp < 0f64 || exp >= u32::MAX as f64 {
+        return Err(ArrowError::ArithmeticOverflow(format!(
+            "Unsupported exp value: {exp}"
+        )));
+    }
+    pow_decimal_int(base, scale, exp as i64)
+}
+
 impl ScalarUDFImpl for PowerFunc {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn name(&self) -> &str {
         "power"
     }
@@ -91,9 +205,10 @@ impl ScalarUDFImpl for PowerFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match arg_types[0] {
-            DataType::Int64 => Ok(DataType::Int64),
-            _ => Ok(DataType::Float64),
+        if arg_types[0].is_null() {
+            Ok(DataType::Float64)
+        } else {
+            Ok(arg_types[0].clone())
         }
     }
 
@@ -102,75 +217,156 @@ impl ScalarUDFImpl for PowerFunc {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(&args.args)?;
+        let [base, exponent] = take_function_args(self.name(), &args.args)?;
+        let base = base.to_array(args.number_rows)?;
 
-        let arr: ArrayRef = match args[0].data_type() {
-            DataType::Float64 => {
-                let bases = args[0].as_primitive::<Float64Type>();
-                let exponents = args[1].as_primitive::<Float64Type>();
-                let result = arrow::compute::binary::<_, _, _, Float64Type>(
-                    bases,
-                    exponents,
-                    f64::powf,
-                )?;
-                Arc::new(result) as _
+        let arr: ArrayRef = match (base.data_type(), exponent.data_type()) {
+            (DataType::Float64, DataType::Float64) => {
+                calculate_binary_math::<Float64Type, Float64Type, Float64Type, _>(
+                    &base,
+                    exponent,
+                    |b, e| Ok(f64::powf(b, e)),
+                )?
             }
-            DataType::Int64 => {
-                let bases = downcast_named_arg!(&args[0], "base", Int64Array);
-                let exponents = downcast_named_arg!(&args[1], "exponent", Int64Array);
-                bases
-                    .iter()
-                    .zip(exponents.iter())
-                    .map(|(base, exp)| match (base, exp) {
-                        (Some(base), Some(exp)) => Ok(Some(base.pow_checked(
-                            exp.try_into().map_err(|_| {
-                                exec_datafusion_err!(
-                                    "Can't use negative exponents: {exp} in integer computation, please use Float."
-                                )
-                            })?,
-                        ).map_err(|e| arrow_datafusion_err!(e))?)),
-                        _ => Ok(None),
-                    })
-                    .collect::<Result<Int64Array>>()
-                    .map(Arc::new)? as _
+            (DataType::Decimal32(precision, scale), DataType::Int64) => {
+                calculate_binary_decimal_math::<Decimal32Type, Int64Type, Decimal32Type, _>(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_int(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
             }
-
-            other => {
-                return exec_err!(
-                    "Unsupported data type {other:?} for function {}",
-                    self.name()
-                )
+            (DataType::Decimal32(precision, scale), DataType::Float64) => {
+                calculate_binary_decimal_math::<
+                    Decimal32Type,
+                    Float64Type,
+                    Decimal32Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_float(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal64(precision, scale), DataType::Int64) => {
+                calculate_binary_decimal_math::<Decimal64Type, Int64Type, Decimal64Type, _>(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_int(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal64(precision, scale), DataType::Float64) => {
+                calculate_binary_decimal_math::<
+                    Decimal64Type,
+                    Float64Type,
+                    Decimal64Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_float(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal128(precision, scale), DataType::Int64) => {
+                calculate_binary_decimal_math::<
+                    Decimal128Type,
+                    Int64Type,
+                    Decimal128Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_int(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal128(precision, scale), DataType::Float64) => {
+                calculate_binary_decimal_math::<
+                    Decimal128Type,
+                    Float64Type,
+                    Decimal128Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_float(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal256(precision, scale), DataType::Int64) => {
+                calculate_binary_decimal_math::<
+                    Decimal256Type,
+                    Int64Type,
+                    Decimal256Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_int(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (DataType::Decimal256(precision, scale), DataType::Float64) => {
+                calculate_binary_decimal_math::<
+                    Decimal256Type,
+                    Float64Type,
+                    Decimal256Type,
+                    _,
+                >(
+                    &base,
+                    exponent,
+                    |b, e| pow_decimal_float(b, *scale, e),
+                    *precision,
+                    *scale,
+                )?
+            }
+            (base_type, exp_type) => {
+                return internal_err!(
+                    "Unsupported data types for base {base_type:?} and exponent {exp_type:?} for power"
+                );
             }
         };
-
         Ok(ColumnarValue::Array(arr))
     }
 
     /// Simplify the `power` function by the relevant rules:
-    /// 1. Power(a, 0) ===> 0
+    /// 1. Power(a, 0) ===> 1
     /// 2. Power(a, 1) ===> a
     /// 3. Power(a, Log(a, b)) ===> b
     fn simplify(
         &self,
-        mut args: Vec<Expr>,
+        args: Vec<Expr>,
         info: &dyn SimplifyInfo,
     ) -> Result<ExprSimplifyResult> {
-        let exponent = args.pop().ok_or_else(|| {
-            plan_datafusion_err!("Expected power to have 2 arguments, got 0")
-        })?;
-        let base = args.pop().ok_or_else(|| {
-            plan_datafusion_err!("Expected power to have 2 arguments, got 1")
-        })?;
-
+        let [base, exponent] = take_function_args("power", args)?;
+        let base_type = info.get_data_type(&base)?;
         let exponent_type = info.get_data_type(&exponent)?;
+
+        // Null propagation
+        if base_type.is_null() || exponent_type.is_null() {
+            let return_type = self.return_type(&[base_type, exponent_type])?;
+            return Ok(ExprSimplifyResult::Simplified(lit(
+                ScalarValue::Null.cast_to(&return_type)?
+            )));
+        }
+
         match exponent {
             Expr::Literal(value, _)
                 if value == ScalarValue::new_zero(&exponent_type)? =>
             {
-                Ok(ExprSimplifyResult::Simplified(Expr::Literal(
-                    ScalarValue::new_one(&info.get_data_type(&base)?)?,
-                    None,
-                )))
+                Ok(ExprSimplifyResult::Simplified(lit(ScalarValue::new_one(
+                    &base_type,
+                )?)))
             }
             Expr::Literal(value, _) if value == ScalarValue::new_one(&exponent_type)? => {
                 Ok(ExprSimplifyResult::Simplified(base))
@@ -198,85 +394,23 @@ fn is_log(func: &ScalarUDF) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::Float64Array;
-    use arrow::datatypes::Field;
-    use datafusion_common::cast::{as_float64_array, as_int64_array};
-    use datafusion_common::config::ConfigOptions;
 
     #[test]
-    fn test_power_f64() {
-        let arg_fields = vec![
-            Field::new("a", DataType::Float64, true).into(),
-            Field::new("a", DataType::Float64, true).into(),
-        ];
-        let args = ScalarFunctionArgs {
-            args: vec![
-                ColumnarValue::Array(Arc::new(Float64Array::from(vec![
-                    2.0, 2.0, 3.0, 5.0,
-                ]))), // base
-                ColumnarValue::Array(Arc::new(Float64Array::from(vec![
-                    3.0, 2.0, 4.0, 4.0,
-                ]))), // exponent
-            ],
-            arg_fields,
-            number_rows: 4,
-            return_field: Field::new("f", DataType::Float64, true).into(),
-            config_options: Arc::new(ConfigOptions::default()),
-        };
-        let result = PowerFunc::new()
-            .invoke_with_args(args)
-            .expect("failed to initialize function power");
+    fn test_pow_decimal128_helper() {
+        // Expression: 2.5 ^ 4 = 39.0625
+        assert_eq!(pow_decimal_int(25, 1, 4).unwrap(), i128::from(390));
+        assert_eq!(pow_decimal_int(2500, 3, 4).unwrap(), i128::from(39062));
+        assert_eq!(pow_decimal_int(25000, 4, 4).unwrap(), i128::from(390625));
 
-        match result {
-            ColumnarValue::Array(arr) => {
-                let floats = as_float64_array(&arr)
-                    .expect("failed to convert result to a Float64Array");
-                assert_eq!(floats.len(), 4);
-                assert_eq!(floats.value(0), 8.0);
-                assert_eq!(floats.value(1), 4.0);
-                assert_eq!(floats.value(2), 81.0);
-                assert_eq!(floats.value(3), 625.0);
-            }
-            ColumnarValue::Scalar(_) => {
-                panic!("Expected an array value")
-            }
-        }
-    }
-
-    #[test]
-    fn test_power_i64() {
-        let arg_fields = vec![
-            Field::new("a", DataType::Int64, true).into(),
-            Field::new("a", DataType::Int64, true).into(),
-        ];
-        let args = ScalarFunctionArgs {
-            args: vec![
-                ColumnarValue::Array(Arc::new(Int64Array::from(vec![2, 2, 3, 5]))), // base
-                ColumnarValue::Array(Arc::new(Int64Array::from(vec![3, 2, 4, 4]))), // exponent
-            ],
-            arg_fields,
-            number_rows: 4,
-            return_field: Field::new("f", DataType::Int64, true).into(),
-            config_options: Arc::new(ConfigOptions::default()),
-        };
-        let result = PowerFunc::new()
-            .invoke_with_args(args)
-            .expect("failed to initialize function power");
+        // Expression: 25 ^ 4 = 390625
+        assert_eq!(pow_decimal_int(25, 0, 4).unwrap(), i128::from(390625));
 
-        match result {
-            ColumnarValue::Array(arr) => {
-                let ints = as_int64_array(&arr)
-                    .expect("failed to convert result to a Int64Array");
+        // Expressions for edge cases
+        assert_eq!(pow_decimal_int(25, 1, 1).unwrap(), i128::from(25));
+        assert_eq!(pow_decimal_int(25, 0, 1).unwrap(), i128::from(25));
+        assert_eq!(pow_decimal_int(25, 0, 0).unwrap(), i128::from(1));
+        assert_eq!(pow_decimal_int(25, 1, 0).unwrap(), i128::from(10));
 
-                assert_eq!(ints.len(), 4);
-                assert_eq!(ints.value(0), 8);
-                assert_eq!(ints.value(1), 4);
-                assert_eq!(ints.value(2), 81);
-                assert_eq!(ints.value(3), 625);
-            }
-            ColumnarValue::Scalar(_) => {
-                panic!("Expected an array value")
-            }
-        }
+        assert_eq!(pow_decimal_int(25, -1, 4).unwrap(), i128::from(390625000));
     }
 }
diff --git a/datafusion/functions/src/math/random.rs b/datafusion/functions/src/math/random.rs
index d63e76a06d011..78932873b485b 100644
--- a/datafusion/functions/src/math/random.rs
+++ b/datafusion/functions/src/math/random.rs
@@ -21,9 +21,9 @@ use std::sync::Arc;
 use arrow::array::Float64Array;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Float64;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
@@ -79,9 +79,11 @@ impl ScalarUDFImpl for RandomFunc {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if !args.args.is_empty() {
-            return internal_err!("{} function does not accept arguments", self.name());
-        }
+        assert_or_internal_err!(
+            args.args.is_empty(),
+            "{} function does not accept arguments",
+            self.name()
+        );
         let mut rng = rng();
         let mut values = vec![0.0; args.number_rows];
         // Equivalent to set each element with rng.random_range(0.0..1.0), but more efficient
diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs
index 837f0be432403..de70788128b88 100644
--- a/datafusion/functions/src/math/round.rs
+++ b/datafusion/functions/src/math/round.rs
@@ -16,20 +16,26 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::Arc;
 
-use crate::utils::make_scalar_function;
+use crate::utils::{calculate_binary_decimal_math, calculate_binary_math};
 
-use arrow::array::{ArrayRef, AsArray, PrimitiveArray};
-use arrow::compute::{cast_with_options, CastOptions};
-use arrow::datatypes::DataType::{Float32, Float64, Int32};
-use arrow::datatypes::{DataType, Float32Type, Float64Type, Int32Type};
-use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue};
+use arrow::array::ArrayRef;
+use arrow::datatypes::DataType::{
+    Decimal32, Decimal64, Decimal128, Decimal256, Float32, Float64,
+};
+use arrow::datatypes::{
+    ArrowNativeTypeOp, DataType, Decimal32Type, Decimal64Type, Decimal128Type,
+    Decimal256Type, Float32Type, Float64Type, Int32Type,
+};
+use arrow::error::ArrowError;
+use datafusion_common::types::{
+    NativeType, logical_float32, logical_float64, logical_int32,
+};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
-use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    Volatility,
+    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, TypeSignatureClass, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -64,14 +70,33 @@ impl Default for RoundFunc {
 
 impl RoundFunc {
     pub fn new() -> Self {
-        use DataType::*;
+        let decimal = Coercion::new_exact(TypeSignatureClass::Decimal);
+        let decimal_places = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int32,
+        );
+        let float32 = Coercion::new_exact(TypeSignatureClass::Native(logical_float32()));
+        let float64 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_float64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Float64, Int64]),
-                    Exact(vec![Float32, Int64]),
-                    Exact(vec![Float64]),
-                    Exact(vec![Float32]),
+                    TypeSignature::Coercible(vec![
+                        decimal.clone(),
+                        decimal_places.clone(),
+                    ]),
+                    TypeSignature::Coercible(vec![decimal]),
+                    TypeSignature::Coercible(vec![
+                        float32.clone(),
+                        decimal_places.clone(),
+                    ]),
+                    TypeSignature::Coercible(vec![float32]),
+                    TypeSignature::Coercible(vec![float64.clone(), decimal_places]),
+                    TypeSignature::Coercible(vec![float64]),
                 ],
                 Volatility::Immutable,
             ),
@@ -93,14 +118,30 @@ impl ScalarUDFImpl for RoundFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match arg_types[0] {
-            Float32 => Ok(Float32),
-            _ => Ok(Float64),
-        }
+        Ok(match arg_types[0].clone() {
+            Float32 => Float32,
+            dt @ Decimal128(_, _)
+            | dt @ Decimal256(_, _)
+            | dt @ Decimal32(_, _)
+            | dt @ Decimal64(_, _) => dt,
+            _ => Float64,
+        })
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        make_scalar_function(round, vec![])(&args.args)
+        if args.arg_fields.iter().any(|a| a.data_type().is_null()) {
+            return ColumnarValue::Scalar(ScalarValue::Null)
+                .cast_to(args.return_type(), None);
+        }
+
+        let default_decimal_places = ColumnarValue::Scalar(ScalarValue::Int32(Some(0)));
+        let decimal_places = if args.args.len() == 2 {
+            &args.args[1]
+        } else {
+            &default_decimal_places
+        };
+
+        round_columnar(&args.args[0], decimal_places, args.number_rows)
     }
 
     fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
@@ -123,119 +164,190 @@ impl ScalarUDFImpl for RoundFunc {
     }
 }
 
-/// Round SQL function
-pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 1 && args.len() != 2 {
-        return exec_err!(
-            "round function requires one or two arguments, got {}",
-            args.len()
-        );
+fn round_columnar(
+    value: &ColumnarValue,
+    decimal_places: &ColumnarValue,
+    number_rows: usize,
+) -> Result<ColumnarValue> {
+    let value_array = value.to_array(number_rows)?;
+    let both_scalars = matches!(value, ColumnarValue::Scalar(_))
+        && matches!(decimal_places, ColumnarValue::Scalar(_));
+
+    let arr: ArrayRef = match value_array.data_type() {
+        Float64 => {
+            let result = calculate_binary_math::<Float64Type, Int32Type, Float64Type, _>(
+                value_array.as_ref(),
+                decimal_places,
+                round_float::<f64>,
+            )?;
+            result as _
+        }
+        Float32 => {
+            let result = calculate_binary_math::<Float32Type, Int32Type, Float32Type, _>(
+                value_array.as_ref(),
+                decimal_places,
+                round_float::<f32>,
+            )?;
+            result as _
+        }
+        Decimal32(precision, scale) => {
+            let result = calculate_binary_decimal_math::<
+                Decimal32Type,
+                Int32Type,
+                Decimal32Type,
+                _,
+            >(
+                value_array.as_ref(),
+                decimal_places,
+                |v, dp| round_decimal(v, *scale, dp),
+                *precision,
+                *scale,
+            )?;
+            result as _
+        }
+        Decimal64(precision, scale) => {
+            let result = calculate_binary_decimal_math::<
+                Decimal64Type,
+                Int32Type,
+                Decimal64Type,
+                _,
+            >(
+                value_array.as_ref(),
+                decimal_places,
+                |v, dp| round_decimal(v, *scale, dp),
+                *precision,
+                *scale,
+            )?;
+            result as _
+        }
+        Decimal128(precision, scale) => {
+            let result = calculate_binary_decimal_math::<
+                Decimal128Type,
+                Int32Type,
+                Decimal128Type,
+                _,
+            >(
+                value_array.as_ref(),
+                decimal_places,
+                |v, dp| round_decimal(v, *scale, dp),
+                *precision,
+                *scale,
+            )?;
+            result as _
+        }
+        Decimal256(precision, scale) => {
+            let result = calculate_binary_decimal_math::<
+                Decimal256Type,
+                Int32Type,
+                Decimal256Type,
+                _,
+            >(
+                value_array.as_ref(),
+                decimal_places,
+                |v, dp| round_decimal(v, *scale, dp),
+                *precision,
+                *scale,
+            )?;
+            result as _
+        }
+        other => exec_err!("Unsupported data type {other:?} for function round")?,
+    };
+
+    if both_scalars {
+        ScalarValue::try_from_array(&arr, 0).map(ColumnarValue::Scalar)
+    } else {
+        Ok(ColumnarValue::Array(arr))
     }
+}
 
-    let mut decimal_places = ColumnarValue::Scalar(ScalarValue::Int64(Some(0)));
+fn round_float<T>(value: T, decimal_places: i32) -> Result<T, ArrowError>
+where
+    T: num_traits::Float,
+{
+    let factor = T::from(10_f64.powi(decimal_places)).ok_or_else(|| {
+        ArrowError::ComputeError(format!(
+            "Invalid value for decimal places: {decimal_places}"
+        ))
+    })?;
+    Ok((value * factor).round() / factor)
+}
 
-    if args.len() == 2 {
-        decimal_places = ColumnarValue::Array(Arc::clone(&args[1]));
+fn round_decimal<V: ArrowNativeTypeOp>(
+    value: V,
+    scale: i8,
+    decimal_places: i32,
+) -> Result<V, ArrowError> {
+    let diff = i64::from(scale) - i64::from(decimal_places);
+    if diff <= 0 {
+        return Ok(value);
     }
 
-    match args[0].data_type() {
-        Float64 => match decimal_places {
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => {
-                let decimal_places: i32 = decimal_places.try_into().map_err(|e| {
-                    exec_datafusion_err!(
-                        "Invalid value for decimal places: {decimal_places}: {e}"
-                    )
-                })?;
-
-                let result = args[0]
-                    .as_primitive::<Float64Type>()
-                    .unary::<_, Float64Type>(|value: f64| {
-                        (value * 10.0_f64.powi(decimal_places)).round()
-                            / 10.0_f64.powi(decimal_places)
-                    });
-                Ok(Arc::new(result) as _)
-            }
-            ColumnarValue::Array(decimal_places) => {
-                let options = CastOptions {
-                    safe: false, // raise error if the cast is not possible
-                    ..Default::default()
-                };
-                let decimal_places = cast_with_options(&decimal_places, &Int32, &options)
-                    .map_err(|e| {
-                        exec_datafusion_err!("Invalid values for decimal places: {e}")
-                    })?;
-
-                let values = args[0].as_primitive::<Float64Type>();
-                let decimal_places = decimal_places.as_primitive::<Int32Type>();
-                let result = arrow::compute::binary::<_, _, _, Float64Type>(
-                    values,
-                    decimal_places,
-                    |value, decimal_places| {
-                        (value * 10.0_f64.powi(decimal_places)).round()
-                            / 10.0_f64.powi(decimal_places)
-                    },
-                )?;
-                Ok(Arc::new(result) as _)
-            }
-            _ => {
-                exec_err!("round function requires a scalar or array for decimal_places")
-            }
-        },
-
-        Float32 => match decimal_places {
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => {
-                let decimal_places: i32 = decimal_places.try_into().map_err(|e| {
-                    exec_datafusion_err!(
-                        "Invalid value for decimal places: {decimal_places}: {e}"
-                    )
-                })?;
-                let result = args[0]
-                    .as_primitive::<Float32Type>()
-                    .unary::<_, Float32Type>(|value: f32| {
-                        (value * 10.0_f32.powi(decimal_places)).round()
-                            / 10.0_f32.powi(decimal_places)
-                    });
-                Ok(Arc::new(result) as _)
-            }
-            ColumnarValue::Array(_) => {
-                let ColumnarValue::Array(decimal_places) =
-                    decimal_places.cast_to(&Int32, None).map_err(|e| {
-                        exec_datafusion_err!("Invalid values for decimal places: {e}")
-                    })?
-                else {
-                    panic!("Unexpected result of ColumnarValue::Array.cast")
-                };
-
-                let values = args[0].as_primitive::<Float32Type>();
-                let decimal_places = decimal_places.as_primitive::<Int32Type>();
-                let result: PrimitiveArray<Float32Type> = arrow::compute::binary(
-                    values,
-                    decimal_places,
-                    |value, decimal_places| {
-                        (value * 10.0_f32.powi(decimal_places)).round()
-                            / 10.0_f32.powi(decimal_places)
-                    },
-                )?;
-                Ok(Arc::new(result) as _)
-            }
-            _ => {
-                exec_err!("round function requires a scalar or array for decimal_places")
-            }
-        },
-
-        other => exec_err!("Unsupported data type {other:?} for function round"),
+    let diff: u32 = diff.try_into().map_err(|e| {
+        ArrowError::ComputeError(format!(
+            "Invalid value for decimal places: {decimal_places}: {e}"
+        ))
+    })?;
+
+    let one = V::ONE;
+    let two = V::from_usize(2).ok_or_else(|| {
+        ArrowError::ComputeError("Internal error: could not create constant 2".into())
+    })?;
+    let ten = V::from_usize(10).ok_or_else(|| {
+        ArrowError::ComputeError("Internal error: could not create constant 10".into())
+    })?;
+
+    let factor = ten.pow_checked(diff).map_err(|_| {
+        ArrowError::ComputeError(format!(
+            "Overflow while rounding decimal with scale {scale} and decimal places {decimal_places}"
+        ))
+    })?;
+
+    let mut quotient = value.div_wrapping(factor);
+    let remainder = value.mod_wrapping(factor);
+
+    // `factor` is an even number (10^n, n > 0), so `factor / 2` is the tie threshold
+    let threshold = factor.div_wrapping(two);
+    if remainder >= threshold {
+        quotient = quotient.add_checked(one).map_err(|_| {
+            ArrowError::ComputeError("Overflow while rounding decimal".into())
+        })?;
+    } else if remainder <= threshold.neg_wrapping() {
+        quotient = quotient.sub_checked(one).map_err(|_| {
+            ArrowError::ComputeError("Overflow while rounding decimal".into())
+        })?;
     }
+
+    quotient
+        .mul_checked(factor)
+        .map_err(|_| ArrowError::ComputeError("Overflow while rounding decimal".into()))
 }
 
 #[cfg(test)]
 mod test {
     use std::sync::Arc;
 
-    use crate::math::round::round;
-
     use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array};
-    use datafusion_common::cast::{as_float32_array, as_float64_array};
     use datafusion_common::DataFusionError;
+    use datafusion_common::ScalarValue;
+    use datafusion_common::cast::{as_float32_array, as_float64_array};
+    use datafusion_expr::ColumnarValue;
+
+    fn round_arrays(
+        value: ArrayRef,
+        decimal_places: Option<ArrayRef>,
+    ) -> Result<ArrayRef, DataFusionError> {
+        let number_rows = value.len();
+        let value = ColumnarValue::Array(value);
+        let decimal_places = decimal_places
+            .map(ColumnarValue::Array)
+            .unwrap_or_else(|| ColumnarValue::Scalar(ScalarValue::Int32(Some(0))));
+
+        let result = super::round_columnar(&value, &decimal_places, number_rows)?;
+        match result {
+            ColumnarValue::Array(array) => Ok(array),
+            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(1),
+        }
+    }
 
     #[test]
     fn test_round_f32() {
@@ -244,7 +356,8 @@ mod test {
             Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places
         ];
 
-        let result = round(&args).expect("failed to initialize function round");
+        let result = round_arrays(Arc::clone(&args[0]), Some(Arc::clone(&args[1])))
+            .expect("failed to initialize function round");
         let floats =
             as_float32_array(&result).expect("failed to initialize function round");
 
@@ -262,7 +375,8 @@ mod test {
             Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places
         ];
 
-        let result = round(&args).expect("failed to initialize function round");
+        let result = round_arrays(Arc::clone(&args[0]), Some(Arc::clone(&args[1])))
+            .expect("failed to initialize function round");
         let floats =
             as_float64_array(&result).expect("failed to initialize function round");
 
@@ -279,7 +393,8 @@ mod test {
             Arc::new(Float32Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input
         ];
 
-        let result = round(&args).expect("failed to initialize function round");
+        let result = round_arrays(Arc::clone(&args[0]), None)
+            .expect("failed to initialize function round");
         let floats =
             as_float32_array(&result).expect("failed to initialize function round");
 
@@ -294,7 +409,8 @@ mod test {
             Arc::new(Float64Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input
         ];
 
-        let result = round(&args).expect("failed to initialize function round");
+        let result = round_arrays(Arc::clone(&args[0]), None)
+            .expect("failed to initialize function round");
         let floats =
             as_float64_array(&result).expect("failed to initialize function round");
 
@@ -310,9 +426,12 @@ mod test {
             Arc::new(Int64Array::from(vec![2147483648])), // decimal_places
         ];
 
-        let result = round(&args);
+        let result = round_arrays(Arc::clone(&args[0]), Some(Arc::clone(&args[1])));
 
         assert!(result.is_err());
-        assert!(matches!(result, Err(DataFusionError::Execution(_))));
+        assert!(matches!(
+            result,
+            Err(DataFusionError::ArrowError(_, _)) | Err(DataFusionError::Execution(_))
+        ));
     }
 }
diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs
index bbe6178f39b79..e217088c64c2e 100644
--- a/datafusion/functions/src/math/signum.rs
+++ b/datafusion/functions/src/math/signum.rs
@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, AsArray};
 use arrow::datatypes::DataType::{Float32, Float64};
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
 
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -107,18 +107,14 @@ impl ScalarUDFImpl for SignumFunc {
 }
 
 /// signum SQL function
-pub fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         Float64 => Ok(Arc::new(
             args[0]
                 .as_primitive::<Float64Type>()
                 .unary::<_, Float64Type>(
                     |x: f64| {
-                        if x == 0_f64 {
-                            0_f64
-                        } else {
-                            x.signum()
-                        }
+                        if x == 0_f64 { 0_f64 } else { x.signum() }
                     },
                 ),
         ) as ArrayRef),
@@ -128,11 +124,7 @@ pub fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .as_primitive::<Float32Type>()
                 .unary::<_, Float32Type>(
                     |x: f32| {
-                        if x == 0_f32 {
-                            0_f32
-                        } else {
-                            x.signum()
-                        }
+                        if x == 0_f32 { 0_f32 } else { x.signum() }
                     },
                 ),
         ) as ArrayRef),
diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs
index 9d1b4336f6389..6727ba8fbdf08 100644
--- a/datafusion/functions/src/math/trunc.rs
+++ b/datafusion/functions/src/math/trunc.rs
@@ -24,9 +24,9 @@ use arrow::array::{ArrayRef, AsArray, PrimitiveArray};
 use arrow::datatypes::DataType::{Float32, Float64};
 use arrow::datatypes::{DataType, Float32Type, Float64Type, Int64Type};
 use datafusion_common::ScalarValue::Int64;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     Volatility,
@@ -158,11 +158,7 @@ fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
                     args[0]
                         .as_primitive::<Float64Type>()
                         .unary::<_, Float64Type>(|x: f64| {
-                            if x == 0_f64 {
-                                0_f64
-                            } else {
-                                x.trunc()
-                            }
+                            if x == 0_f64 { 0_f64 } else { x.trunc() }
                         }),
                 ) as ArrayRef)
             }
@@ -184,11 +180,7 @@ fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
                     args[0]
                         .as_primitive::<Float32Type>()
                         .unary::<_, Float32Type>(|x: f32| {
-                            if x == 0_f32 {
-                                0_f32
-                            } else {
-                                x.trunc()
-                            }
+                            if x == 0_f32 { 0_f32 } else { x.trunc() }
                         }),
                 ) as ArrayRef)
             }
diff --git a/datafusion/functions/src/planner.rs b/datafusion/functions/src/planner.rs
index ccd167997003e..9854326945e95 100644
--- a/datafusion/functions/src/planner.rs
+++ b/datafusion/functions/src/planner.rs
@@ -19,9 +19,9 @@
 
 use datafusion_common::Result;
 use datafusion_expr::{
+    Expr,
     expr::ScalarFunction,
     planner::{ExprPlanner, PlannerResult},
-    Expr,
 };
 
 #[deprecated(
diff --git a/datafusion/functions/src/regex/mod.rs b/datafusion/functions/src/regex/mod.rs
index da4e23f91de7d..75cc5d9514cbd 100644
--- a/datafusion/functions/src/regex/mod.rs
+++ b/datafusion/functions/src/regex/mod.rs
@@ -19,8 +19,8 @@
 
 use arrow::error::ArrowError;
 use regex::Regex;
-use std::collections::hash_map::Entry;
 use std::collections::HashMap;
+use std::collections::hash_map::Entry;
 use std::sync::Arc;
 pub mod regexpcount;
 pub mod regexpinstr;
diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs
index 8bad506217aa5..de4c657dd0d20 100644
--- a/datafusion/functions/src/regex/regexpcount.rs
+++ b/datafusion/functions/src/regex/regexpcount.rs
@@ -22,7 +22,7 @@ use arrow::datatypes::{
     DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View,
 };
 use arrow::error::ArrowError;
-use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact,
     TypeSignature::Uniform, Volatility,
@@ -146,7 +146,9 @@ impl ScalarUDFImpl for RegexpCountFunc {
 pub fn regexp_count_func(args: &[ArrayRef]) -> Result<ArrayRef> {
     let args_len = args.len();
     if !(2..=4).contains(&args_len) {
-        return exec_err!("regexp_count was called with {args_len} arguments. It requires at least 2 and at most 4.");
+        return exec_err!(
+            "regexp_count was called with {args_len} arguments. It requires at least 2 and at most 4."
+        );
     }
 
     let values = &args[0];
@@ -183,7 +185,7 @@ pub fn regexp_count_func(args: &[ArrayRef]) -> Result<ArrayRef> {
 ///
 /// # Errors
 /// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile.
-pub fn regexp_count(
+fn regexp_count(
     values: &dyn Array,
     regex_array: &dyn Datum,
     start_array: Option<&dyn Datum>,
@@ -201,8 +203,8 @@ pub fn regexp_count(
 
     match (values.data_type(), regex_array.data_type(), flags_array) {
         (Utf8, Utf8, None) => regexp_count_inner(
-            values.as_string::<i32>(),
-            regex_array.as_string::<i32>(),
+            &values.as_string::<i32>(),
+            &regex_array.as_string::<i32>(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
@@ -210,17 +212,17 @@ pub fn regexp_count(
             is_flags_scalar,
         ),
         (Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 => regexp_count_inner(
-            values.as_string::<i32>(),
-            regex_array.as_string::<i32>(),
+            &values.as_string::<i32>(),
+            &regex_array.as_string::<i32>(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
-            Some(flags_array.as_string::<i32>()),
+            Some(&flags_array.as_string::<i32>()),
             is_flags_scalar,
         ),
         (LargeUtf8, LargeUtf8, None) => regexp_count_inner(
-            values.as_string::<i64>(),
-            regex_array.as_string::<i64>(),
+            &values.as_string::<i64>(),
+            &regex_array.as_string::<i64>(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
@@ -228,17 +230,17 @@ pub fn regexp_count(
             is_flags_scalar,
         ),
         (LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type() == LargeUtf8 => regexp_count_inner(
-            values.as_string::<i64>(),
-            regex_array.as_string::<i64>(),
+            &values.as_string::<i64>(),
+            &regex_array.as_string::<i64>(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
-            Some(flags_array.as_string::<i64>()),
+            Some(&flags_array.as_string::<i64>()),
             is_flags_scalar,
         ),
         (Utf8View, Utf8View, None) => regexp_count_inner(
-            values.as_string_view(),
-            regex_array.as_string_view(),
+            &values.as_string_view(),
+            &regex_array.as_string_view(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
@@ -246,12 +248,12 @@ pub fn regexp_count(
             is_flags_scalar,
         ),
         (Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() == Utf8View => regexp_count_inner(
-            values.as_string_view(),
-            regex_array.as_string_view(),
+            &values.as_string_view(),
+            &regex_array.as_string_view(),
             is_regex_scalar,
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             is_start_scalar,
-            Some(flags_array.as_string_view()),
+            Some(&flags_array.as_string_view()),
             is_flags_scalar,
         ),
         _ => Err(ArrowError::ComputeError(
@@ -260,13 +262,13 @@ pub fn regexp_count(
     }
 }
 
-pub fn regexp_count_inner<'a, S>(
-    values: S,
-    regex_array: S,
+fn regexp_count_inner<'a, S>(
+    values: &S,
+    regex_array: &S,
     is_regex_scalar: bool,
     start_array: Option<&Int64Array>,
     is_start_scalar: bool,
-    flags_array: Option<S>,
+    flags_array: Option<&S>,
     is_flags_scalar: bool,
 ) -> Result<ArrayRef, ArrowError>
 where
@@ -306,7 +308,7 @@ where
         (true, true, true) => {
             let regex = match regex_scalar {
                 None | Some("") => {
-                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])))
+                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
                 }
                 Some(regex) => regex,
             };
@@ -323,7 +325,7 @@ where
         (true, true, false) => {
             let regex = match regex_scalar {
                 None | Some("") => {
-                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])))
+                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
                 }
                 Some(regex) => regex,
             };
@@ -352,7 +354,7 @@ where
         (true, false, true) => {
             let regex = match regex_scalar {
                 None | Some("") => {
-                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])))
+                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
                 }
                 Some(regex) => regex,
             };
@@ -372,7 +374,7 @@ where
         (true, false, false) => {
             let regex = match regex_scalar {
                 None | Some("") => {
-                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])))
+                    return Ok(Arc::new(Int64Array::from(vec![0; values.len()])));
                 }
                 Some(regex) => regex,
             };
diff --git a/datafusion/functions/src/regex/regexpinstr.rs b/datafusion/functions/src/regex/regexpinstr.rs
index 851c182a90dd0..dce689a0c28d6 100644
--- a/datafusion/functions/src/regex/regexpinstr.rs
+++ b/datafusion/functions/src/regex/regexpinstr.rs
@@ -23,7 +23,7 @@ use arrow::datatypes::{
     DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View,
 };
 use arrow::error::ArrowError;
-use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact,
     TypeSignature::Uniform, Volatility,
@@ -163,7 +163,9 @@ impl ScalarUDFImpl for RegexpInstrFunc {
 pub fn regexp_instr_func(args: &[ArrayRef]) -> Result<ArrayRef> {
     let args_len = args.len();
     if !(2..=6).contains(&args_len) {
-        return exec_err!("regexp_instr was called with {args_len} arguments. It requires at least 2 and at most 6.");
+        return exec_err!(
+            "regexp_instr was called with {args_len} arguments. It requires at least 2 and at most 6."
+        );
     }
 
     let values = &args[0];
@@ -205,7 +207,7 @@ pub fn regexp_instr_func(args: &[ArrayRef]) -> Result<ArrayRef> {
 ///
 /// # Errors
 /// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile.
-pub fn regexp_instr(
+fn regexp_instr(
     values: &dyn Array,
     regex_array: &dyn Datum,
     start_array: Option<&dyn Datum>,
@@ -233,48 +235,48 @@ pub fn regexp_instr(
 
     match (values.data_type(), regex_array.data_type(), flags_array) {
         (Utf8, Utf8, None) => regexp_instr_inner(
-            values.as_string::<i32>(),
-            regex_array.as_string::<i32>(),
+            &values.as_string::<i32>(),
+            &regex_array.as_string::<i32>(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             None,
             subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
         ),
         (Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 => regexp_instr_inner(
-            values.as_string::<i32>(),
-            regex_array.as_string::<i32>(),
+            &values.as_string::<i32>(),
+            &regex_array.as_string::<i32>(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             Some(flags_array.as_string::<i32>()),
             subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
         ),
         (LargeUtf8, LargeUtf8, None) => regexp_instr_inner(
-            values.as_string::<i64>(),
-            regex_array.as_string::<i64>(),
+            &values.as_string::<i64>(),
+            &regex_array.as_string::<i64>(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             None,
             subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
         ),
         (LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type() == LargeUtf8 => regexp_instr_inner(
-            values.as_string::<i64>(),
-            regex_array.as_string::<i64>(),
+            &values.as_string::<i64>(),
+            &regex_array.as_string::<i64>(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             Some(flags_array.as_string::<i64>()),
             subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
         ),
         (Utf8View, Utf8View, None) => regexp_instr_inner(
-            values.as_string_view(),
-            regex_array.as_string_view(),
+            &values.as_string_view(),
+            &regex_array.as_string_view(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             None,
             subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
         ),
         (Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() == Utf8View => regexp_instr_inner(
-            values.as_string_view(),
-            regex_array.as_string_view(),
+            &values.as_string_view(),
+            &regex_array.as_string_view(),
             start_array.map(|start| start.as_primitive::<Int64Type>()),
             nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
             Some(flags_array.as_string_view()),
@@ -286,10 +288,9 @@ pub fn regexp_instr(
     }
 }
 
-#[allow(clippy::too_many_arguments)]
-pub fn regexp_instr_inner<'a, S>(
-    values: S,
-    regex_array: S,
+fn regexp_instr_inner<'a, S>(
+    values: &S,
+    regex_array: &S,
     start_array: Option<&Int64Array>,
     nth_array: Option<&Int64Array>,
     flags_array: Option<S>,
@@ -357,14 +358,14 @@ fn handle_subexp(
     value: &str,
     byte_start_offset: usize,
 ) -> Result<Option<i64>, ArrowError> {
-    if let Some(captures) = pattern.captures(search_slice) {
-        if let Some(matched) = captures.get(subexpr as usize) {
-            // Convert byte offset relative to search_slice back to 1-based character offset
-            // relative to the original `value` string.
-            let start_char_offset =
-                value[..byte_start_offset + matched.start()].chars().count() as i64 + 1;
-            return Ok(Some(start_char_offset));
-        }
+    if let Some(captures) = pattern.captures(search_slice)
+        && let Some(matched) = captures.get(subexpr as usize)
+    {
+        // Convert byte offset relative to search_slice back to 1-based character offset
+        // relative to the original `value` string.
+        let start_char_offset =
+            value[..byte_start_offset + matched.start()].chars().count() as i64 + 1;
+        return Ok(Some(start_char_offset));
     }
     Ok(Some(0)) // Return 0 if the subexpression was not found
 }
diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs
index d75eb9141c056..f707c8e0d8c7f 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -23,12 +23,11 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
 use datafusion_common::types::logical_string;
 use datafusion_common::{
-    arrow_datafusion_err, exec_err, internal_err, plan_err, DataFusionError, Result,
-    ScalarValue,
+    Result, ScalarValue, arrow_datafusion_err, exec_err, internal_err, plan_err,
 };
 use datafusion_expr::{
-    binary_expr, cast, Coercion, ColumnarValue, Documentation, Expr, ScalarUDFImpl,
-    Signature, TypeSignature, TypeSignatureClass, Volatility,
+    Coercion, ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature,
+    TypeSignature, TypeSignatureClass, Volatility, binary_expr, cast,
 };
 use datafusion_macros::user_doc;
 
@@ -56,7 +55,7 @@ SELECT regexp_like('aBc', '(b|d)', 'i');
 | true                                             |
 +--------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 "#,
     standard_argument(name = "str", prefix = "String"),
     standard_argument(name = "regexp", prefix = "Regular"),
@@ -276,29 +275,31 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef> {
                 Utf8 => args[2].as_string::<i32>(),
                 LargeUtf8 => {
                     let large_string_array = args[2].as_string::<i64>();
-                    let string_vec: Vec<Option<&str>> = (0..large_string_array.len()).map(|i| {
-                        if large_string_array.is_null(i) {
-                            None
-                        } else {
-                            Some(large_string_array.value(i))
-                        }
-                    })
-                    .collect();
+                    let string_vec: Vec<Option<&str>> = (0..large_string_array.len())
+                        .map(|i| {
+                            if large_string_array.is_null(i) {
+                                None
+                            } else {
+                                Some(large_string_array.value(i))
+                            }
+                        })
+                        .collect();
 
                     &GenericStringArray::<i32>::from(string_vec)
-                },
+                }
                 _ => {
                     let string_view_array = args[2].as_string_view();
-                    let string_vec: Vec<Option<String>> = (0..string_view_array.len()).map(|i| {
-                        if string_view_array.is_null(i) {
-                            None
-                        } else {
-                            Some(string_view_array.value(i).to_string())
-                        }
-                    })
-                    .collect();
+                    let string_vec: Vec<Option<String>> = (0..string_view_array.len())
+                        .map(|i| {
+                            if string_view_array.is_null(i) {
+                                None
+                            } else {
+                                Some(string_view_array.value(i).to_string())
+                            }
+                        })
+                        .collect();
                     &GenericStringArray::<i32>::from(string_vec)
-                },
+                }
             };
 
             if flags.iter().any(|s| s == Some("g")) {
@@ -306,7 +307,7 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef> {
             }
 
             handle_regexp_like(&args[0], &args[1], Some(flags))
-        },
+        }
         other => exec_err!(
             "`regexp_like` was called with {other} arguments. It requires at least 2 and at most 3."
         ),
@@ -385,7 +386,7 @@ fn handle_regexp_like(
         other => {
             return internal_err!(
                 "Unsupported data type {other:?} for function `regexp_like`"
-            )
+            );
         }
     };
 
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
index ba52822a02f8c..e026d16e1cddb 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -20,10 +20,10 @@ use arrow::array::{Array, ArrayRef, AsArray};
 use arrow::compute::kernels::regexp;
 use arrow::datatypes::DataType;
 use arrow::datatypes::Field;
-use datafusion_common::exec_err;
+use datafusion_common::Result;
 use datafusion_common::ScalarValue;
+use datafusion_common::exec_err;
 use datafusion_common::{arrow_datafusion_err, plan_err};
-use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
@@ -48,7 +48,7 @@ use std::sync::Arc;
             | [B]                                               |
             +---------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 "#,
     standard_argument(name = "str", prefix = "String"),
     argument(
@@ -155,29 +155,35 @@ impl ScalarUDFImpl for RegexpMatchFunc {
 
 pub fn regexp_match(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
-        2 => {
-            regexp::regexp_match(&args[0], &args[1], None)
-                .map_err(|e| arrow_datafusion_err!(e))
-        }
+        2 => regexp::regexp_match(&args[0], &args[1], None)
+            .map_err(|e| arrow_datafusion_err!(e)),
         3 => {
             match args[2].data_type() {
                 DataType::Utf8View => {
                     if args[2].as_string_view().iter().any(|s| s == Some("g")) {
-                        return plan_err!("regexp_match() does not support the \"global\" option");
+                        return plan_err!(
+                            "regexp_match() does not support the \"global\" option"
+                        );
                     }
                 }
                 DataType::Utf8 => {
                     if args[2].as_string::<i32>().iter().any(|s| s == Some("g")) {
-                        return plan_err!("regexp_match() does not support the \"global\" option");
+                        return plan_err!(
+                            "regexp_match() does not support the \"global\" option"
+                        );
                     }
                 }
                 DataType::LargeUtf8 => {
                     if args[2].as_string::<i64>().iter().any(|s| s == Some("g")) {
-                        return plan_err!("regexp_match() does not support the \"global\" option");
+                        return plan_err!(
+                            "regexp_match() does not support the \"global\" option"
+                        );
                     }
                 }
                 e => {
-                    return plan_err!("regexp_match was called with unexpected data type {e:?}");
+                    return plan_err!(
+                        "regexp_match was called with unexpected data type {e:?}"
+                    );
                 }
             }
 
@@ -254,6 +260,9 @@ mod tests {
             regexp_match(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)])
                 .expect_err("unsupported flag should have failed");
 
-        assert_eq!(re_err.strip_backtrace(), "Error during planning: regexp_match() does not support the \"global\" option");
+        assert_eq!(
+            re_err.strip_backtrace(),
+            "Error during planning: regexp_match() does not support the \"global\" option"
+        );
     }
 }
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index ca3d19822e137..b5ab46f0ec713 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -20,22 +20,22 @@ use arrow::array::ArrayDataBuilder;
 use arrow::array::BufferBuilder;
 use arrow::array::GenericStringArray;
 use arrow::array::StringViewBuilder;
-use arrow::array::{new_null_array, ArrayIter, AsArray};
 use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
 use arrow::array::{ArrayAccessor, StringViewArray};
+use arrow::array::{ArrayIter, AsArray, new_null_array};
 use arrow::datatypes::DataType;
+use datafusion_common::ScalarValue;
 use datafusion_common::cast::{
     as_large_string_array, as_string_array, as_string_view_array,
 };
 use datafusion_common::exec_err;
 use datafusion_common::plan_err;
-use datafusion_common::ScalarValue;
 use datafusion_common::{
-    cast::as_generic_string_array, internal_err, DataFusionError, Result,
+    DataFusionError, Result, cast::as_generic_string_array, internal_err,
 };
-use datafusion_expr::function::Hint;
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::TypeSignature;
+use datafusion_expr::function::Hint;
 use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
 use regex::Regex;
@@ -61,7 +61,7 @@ SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
 | aAbBac                                                            |
 +-------------------------------------------------------------------+
 ```
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 "#,
     standard_argument(name = "str", prefix = "String"),
     argument(
@@ -76,7 +76,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
     argument(
         name = "flags",
         description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
-- **g**: (global) Search globally and don't return after the first match        
+- **g**: (global) Search globally and don't return after the first match
 - **i**: case-insensitive: letters match both upper and lower case
 - **m**: multi-line mode: ^ and $ match begin/end of line
 - **s**: allow . to match \n
@@ -382,48 +382,32 @@ where
     }
 }
 
-fn _regexp_replace_early_abort<T: ArrayAccessor>(
-    input_array: T,
-    sz: usize,
-) -> Result<ArrayRef> {
-    // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
-    // are actually null, then the result will be an array of the same size as the first argument with all nulls.
-    //
-    // Also acts like an early abort mechanism when the input array is empty.
-    Ok(new_null_array(input_array.data_type(), sz))
-}
-
 /// Get the first argument from the given string array.
 ///
 /// Note: If the array is empty or the first argument is null,
-/// then calls the given early abort function.
+/// then aborts early.
 macro_rules! fetch_string_arg {
-    ($ARG:expr, $NAME:expr, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{
+    ($ARG:expr, $NAME:expr, $ARRAY_SIZE:expr) => {{
         let string_array_type = ($ARG).data_type();
         match string_array_type {
+            dt if $ARG.len() == 0 || $ARG.is_null(0) => {
+                // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
+                // are actually null, then the result will be an array of the same size as the first argument with all nulls.
+                //
+                // Also acts like an early abort mechanism when the input array is empty.
+                return Ok(new_null_array(dt, $ARRAY_SIZE));
+            }
             DataType::Utf8 => {
                 let array = as_string_array($ARG)?;
-                if array.len() == 0 || array.is_null(0) {
-                    return $EARLY_ABORT(array, $ARRAY_SIZE);
-                } else {
-                    array.value(0)
-                }
+                array.value(0)
             }
             DataType::LargeUtf8 => {
                 let array = as_large_string_array($ARG)?;
-                if array.len() == 0 || array.is_null(0) {
-                    return $EARLY_ABORT(array, $ARRAY_SIZE);
-                } else {
-                    array.value(0)
-                }
+                array.value(0)
             }
             DataType::Utf8View => {
                 let array = as_string_view_array($ARG)?;
-                if array.len() == 0 || array.is_null(0) {
-                    return $EARLY_ABORT(array, $ARRAY_SIZE);
-                } else {
-                    array.value(0)
-                }
+                array.value(0)
             }
             _ => unreachable!(
                 "Invalid data type for regexp_replace: {}",
@@ -442,21 +426,15 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
     args: &[ArrayRef],
 ) -> Result<ArrayRef> {
     let array_size = args[0].len();
-    let pattern =
-        fetch_string_arg!(&args[1], "pattern", _regexp_replace_early_abort, array_size);
-    let replacement = fetch_string_arg!(
-        &args[2],
-        "replacement",
-        _regexp_replace_early_abort,
-        array_size
-    );
+    let pattern = fetch_string_arg!(&args[1], "pattern", array_size);
+    let replacement = fetch_string_arg!(&args[2], "replacement", array_size);
     let flags = match args.len() {
         3 => None,
-        4 => Some(fetch_string_arg!(&args[3], "flags", _regexp_replace_early_abort, array_size)),
+        4 => Some(fetch_string_arg!(&args[3], "flags", array_size)),
         other => {
             return exec_err!(
                 "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
-            )
+            );
         }
     };
 
@@ -537,7 +515,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
 
 /// Determine which implementation of the regexp_replace to use based
 /// on the given set of arguments.
-pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
+fn specialize_regexp_replace<T: OffsetSizeTrait>(
     args: &[ColumnarValue],
 ) -> Result<ArrayRef> {
     // This will serve as a dispatch table where we can
diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs
index bdf30833127a2..fe3c508edea07 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -20,7 +20,7 @@ use arrow::array::{ArrayRef, AsArray, Int32Array, StringArrayType};
 use arrow::datatypes::DataType;
 use arrow::error::ArrowError;
 use datafusion_common::types::logical_string;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
 use datafusion_expr_common::signature::Coercion;
@@ -99,7 +99,7 @@ impl ScalarUDFImpl for AsciiFunc {
     }
 }
 
-fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
+fn calculate_ascii<'a, V>(array: &V) -> Result<ArrayRef, ArrowError>
 where
     V: StringArrayType<'a, Item = &'a str>,
 {
@@ -124,15 +124,15 @@ pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => {
             let string_array = args[0].as_string::<i32>();
-            Ok(calculate_ascii(string_array)?)
+            Ok(calculate_ascii(&string_array)?)
         }
         DataType::LargeUtf8 => {
             let string_array = args[0].as_string::<i64>();
-            Ok(calculate_ascii(string_array)?)
+            Ok(calculate_ascii(&string_array)?)
         }
         DataType::Utf8View => {
             let string_array = args[0].as_string_view();
-            Ok(calculate_ascii(string_array)?)
+            Ok(calculate_ascii(&string_array)?)
         }
         _ => internal_err!("Unsupported data type"),
     }
diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs
index a7fbdb3c69213..2d0b064ffbb86 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -20,7 +20,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 use arrow::array::{ArrayRef, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::function::Hint;
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs
index 4d2beafbae53a..ba011b94367e3 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -26,14 +26,14 @@ use arrow::datatypes::DataType::Utf8;
 
 use crate::utils::make_scalar_function;
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
 
 /// Returns the character with the given code.
 /// chr(65) = 'A'
-pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
     let integer_array = as_int64_array(&args[0])?;
 
     let mut builder = GenericStringBuilder::<i32>::with_capacity(
@@ -47,11 +47,11 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
     for integer in integer_array {
         match integer {
             Some(integer) => {
-                if let Ok(u) = u32::try_from(integer) {
-                    if let Some(c) = core::char::from_u32(u) {
-                        builder.append_value(c.encode_utf8(&mut buf));
-                        continue;
-                    }
+                if let Ok(u) = u32::try_from(integer)
+                    && let Some(c) = core::char::from_u32(u)
+                {
+                    builder.append_value(c.encode_utf8(&mut buf));
+                    continue;
                 }
 
                 return exec_err!("invalid Unicode scalar value: {integer}");
diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs
index 5e0567eafea2e..ebfada9536fa4 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -22,16 +22,17 @@ use std::sync::Arc;
 
 use crate::strings::make_and_append_view;
 use arrow::array::{
-    new_null_array, Array, ArrayRef, GenericStringArray, GenericStringBuilder,
-    NullBufferBuilder, OffsetSizeTrait, StringBuilder, StringViewArray,
+    Array, ArrayRef, GenericStringArray, GenericStringBuilder, NullBufferBuilder,
+    OffsetSizeTrait, StringBuilder, StringViewArray, new_null_array,
 };
 use arrow::buffer::{Buffer, ScalarBuffer};
 use arrow::datatypes::DataType;
-use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::Result;
-use datafusion_common::{exec_err, ScalarValue};
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
+use datafusion_common::{ScalarValue, exec_err};
 use datafusion_expr::ColumnarValue;
 
+#[derive(Copy, Clone)]
 pub(crate) enum TrimType {
     Left,
     Right,
@@ -185,7 +186,7 @@ fn string_view_trim<'a>(
         }
         other => {
             return exec_err!(
-            "Function TRIM was called with {other} arguments. It requires at least 1 and at most 2."
+                "Function TRIM was called with {other} arguments. It requires at least 1 and at most 2."
             );
         }
     }
@@ -294,7 +295,7 @@ fn string_trim<'a, T: OffsetSizeTrait>(
         }
         other => {
             exec_err!(
-            "Function TRIM was called with {other} arguments. It requires at least 1 and at most 2."
+                "Function TRIM was called with {other} arguments. It requires at least 1 and at most 2."
             )
         }
     }
diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs
index a93e70e714e8b..42d455a05760a 100644
--- a/datafusion/functions/src/string/concat.rs
+++ b/datafusion/functions/src/string/concat.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{as_largestring_array, Array};
+use arrow::array::{Array, as_largestring_array};
 use arrow::datatypes::DataType;
 use datafusion_expr::sort_properties::ExprProperties;
 use std::any::Any;
@@ -26,10 +26,10 @@ use crate::strings::{
     ColumnarValueRef, LargeStringArrayBuilder, StringArrayBuilder, StringViewArrayBuilder,
 };
 use datafusion_common::cast::{as_string_array, as_string_view_array};
-use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err, plan_err};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
 
@@ -189,7 +189,7 @@ impl ScalarUDFImpl for ConcatFunc {
                                 ColumnarValueRef::NonNullableArray(string_array)
                             };
                             columns.push(column);
-                        },
+                        }
                         DataType::LargeUtf8 => {
                             let string_array = as_largestring_array(array);
 
@@ -197,10 +197,12 @@ impl ScalarUDFImpl for ConcatFunc {
                             let column = if array.is_nullable() {
                                 ColumnarValueRef::NullableLargeStringArray(string_array)
                             } else {
-                                ColumnarValueRef::NonNullableLargeStringArray(string_array)
+                                ColumnarValueRef::NonNullableLargeStringArray(
+                                    string_array,
+                                )
                             };
                             columns.push(column);
-                        },
+                        }
                         DataType::Utf8View => {
                             let string_array = as_string_view_array(array)?;
 
@@ -211,9 +213,11 @@ impl ScalarUDFImpl for ConcatFunc {
                                 ColumnarValueRef::NonNullableStringViewArray(string_array)
                             };
                             columns.push(column);
-                        },
+                        }
                         other => {
-                            return plan_err!("Input was {other} which is not a supported datatype for concat function")
+                            return plan_err!(
+                                "Input was {other} which is not a supported datatype for concat function"
+                            );
                         }
                     };
                 }
@@ -287,7 +291,7 @@ impl ScalarUDFImpl for ConcatFunc {
     }
 }
 
-pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
+pub(crate) fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
     let mut new_args = Vec::with_capacity(args.len());
     let mut contiguous_scalar = "".to_string();
 
@@ -305,9 +309,8 @@ pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
     for arg in args.clone() {
         match arg {
             Expr::Literal(ScalarValue::Utf8(None), _) => {}
-            Expr::Literal(ScalarValue::LargeUtf8(None), _) => {
-            }
-            Expr::Literal(ScalarValue::Utf8View(None), _) => { }
+            Expr::Literal(ScalarValue::LargeUtf8(None), _) => {}
+            Expr::Literal(ScalarValue::Utf8View(None), _) => {}
 
             // filter out `null` args
             // All literals have been converted to Utf8 or LargeUtf8 in type_coercion.
@@ -325,7 +328,7 @@ pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
             Expr::Literal(x, _) => {
                 return internal_err!(
                     "The scalar {x} should be casted to string type during the type coercion."
-                )
+                );
             }
             // If the arg is not a literal, we should first push the current `contiguous_scalar`
             // to the `new_args` (if it is not empty) and reset it to empty string.
@@ -334,8 +337,10 @@ pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
                 if !contiguous_scalar.is_empty() {
                     match return_type {
                         DataType::Utf8 => new_args.push(lit(contiguous_scalar)),
-                        DataType::LargeUtf8 => new_args.push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))),
-                        DataType::Utf8View => new_args.push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))),
+                        DataType::LargeUtf8 => new_args
+                            .push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))),
+                        DataType::Utf8View => new_args
+                            .push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))),
                         _ => unreachable!(),
                     }
                     contiguous_scalar = "".to_string();
@@ -374,11 +379,11 @@ pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
 mod tests {
     use super::*;
     use crate::utils::test::test_function;
+    use DataType::*;
     use arrow::array::{Array, LargeStringArray, StringViewArray};
     use arrow::array::{ArrayRef, StringArray};
     use arrow::datatypes::Field;
     use datafusion_common::config::ConfigOptions;
-    use DataType::*;
 
     #[test]
     fn test_functions() -> Result<()> {
diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs
index cdd30ac8755ab..8fe095c5ce2be 100644
--- a/datafusion/functions/src/string/concat_ws.rs
+++ b/datafusion/functions/src/string/concat_ws.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{as_largestring_array, Array, StringArray};
+use arrow::array::{Array, StringArray, as_largestring_array};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -26,10 +26,10 @@ use crate::string::concat::simplify_concat;
 use crate::string::concat_ws;
 use crate::strings::{ColumnarValueRef, StringArrayBuilder};
 use datafusion_common::cast::{as_string_array, as_string_view_array};
-use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
 
@@ -155,7 +155,7 @@ impl ScalarUDFImpl for ConcatWsFunc {
                     }
                     Some(None) => {} // null literal string
                     None => {
-                        return internal_err!("Expected string literal, got {scalar:?}")
+                        return internal_err!("Expected string literal, got {scalar:?}");
                     }
                 }
             }
@@ -169,7 +169,7 @@ impl ScalarUDFImpl for ConcatWsFunc {
                     }
                     Some(None) => {} // null literal string
                     None => {
-                        return internal_err!("Expected string literal, got {scalar:?}")
+                        return internal_err!("Expected string literal, got {scalar:?}");
                     }
                 }
             }
@@ -225,7 +225,7 @@ impl ScalarUDFImpl for ConcatWsFunc {
                                 ColumnarValueRef::NonNullableArray(string_array)
                             };
                             columns.push(column);
-                        },
+                        }
                         DataType::LargeUtf8 => {
                             let string_array = as_largestring_array(array);
 
@@ -233,23 +233,31 @@ impl ScalarUDFImpl for ConcatWsFunc {
                             let column = if array.is_nullable() {
                                 ColumnarValueRef::NullableLargeStringArray(string_array)
                             } else {
-                                ColumnarValueRef::NonNullableLargeStringArray(string_array)
+                                ColumnarValueRef::NonNullableLargeStringArray(
+                                    string_array,
+                                )
                             };
                             columns.push(column);
-                        },
+                        }
                         DataType::Utf8View => {
                             let string_array = as_string_view_array(array)?;
 
-                            data_size += string_array.data_buffers().iter().map(|buf| buf.len()).sum::<usize>();
+                            data_size += string_array
+                                .data_buffers()
+                                .iter()
+                                .map(|buf| buf.len())
+                                .sum::<usize>();
                             let column = if array.is_nullable() {
                                 ColumnarValueRef::NullableStringViewArray(string_array)
                             } else {
                                 ColumnarValueRef::NonNullableStringViewArray(string_array)
                             };
                             columns.push(column);
-                        },
+                        }
                         other => {
-                            return plan_err!("Input was {other} which is not a supported datatype for concat_ws function.")
+                            return plan_err!(
+                                "Input was {other} which is not a supported datatype for concat_ws function."
+                            );
                         }
                     };
                 }
@@ -337,18 +345,30 @@ fn simplify_concat_ws(delimiter: &Expr, args: &[Expr]) -> Result<ExprSimplifyRes
                     for arg in args {
                         match arg {
                             // filter out null args
-                            Expr::Literal(ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) | ScalarValue::Utf8View(None), _) => {}
-                            Expr::Literal(ScalarValue::Utf8(Some(v)) | ScalarValue::LargeUtf8(Some(v)) | ScalarValue::Utf8View(Some(v)), _) => {
-                                match contiguous_scalar {
-                                    None => contiguous_scalar = Some(v.to_string()),
-                                    Some(mut pre) => {
-                                        pre += delimiter;
-                                        pre += v;
-                                        contiguous_scalar = Some(pre)
-                                    }
+                            Expr::Literal(
+                                ScalarValue::Utf8(None)
+                                | ScalarValue::LargeUtf8(None)
+                                | ScalarValue::Utf8View(None),
+                                _,
+                            ) => {}
+                            Expr::Literal(
+                                ScalarValue::Utf8(Some(v))
+                                | ScalarValue::LargeUtf8(Some(v))
+                                | ScalarValue::Utf8View(Some(v)),
+                                _,
+                            ) => match contiguous_scalar {
+                                None => contiguous_scalar = Some(v.to_string()),
+                                Some(mut pre) => {
+                                    pre += delimiter;
+                                    pre += v;
+                                    contiguous_scalar = Some(pre)
                                 }
+                            },
+                            Expr::Literal(s, _) => {
+                                return internal_err!(
+                                    "The scalar {s} should be casted to string type during the type coercion."
+                                );
                             }
-                            Expr::Literal(s, _) => return internal_err!("The scalar {s} should be casted to string type during the type coercion."),
                             // If the arg is not a literal, we should first push the current `contiguous_scalar`
                             // to the `new_args` and reset it to None.
                             // Then pushing this arg to the `new_args`.
@@ -409,9 +429,9 @@ mod tests {
     use arrow::array::{Array, ArrayRef, StringArray};
     use arrow::datatypes::DataType::Utf8;
     use arrow::datatypes::Field;
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::Result;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
 
     use crate::utils::test::test_function;
diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs
index 7e50676933c8d..b85e0ed7966a1 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -21,7 +21,7 @@ use arrow::compute::contains as arrow_contains;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, exec_err};
 use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -153,8 +153,8 @@ mod test {
     use crate::expr_fn::contains;
     use arrow::array::{BooleanArray, StringArray};
     use arrow::datatypes::{DataType, Field};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl};
     use std::sync::Arc;
 
diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs
index 6090d9c84d4cd..a1fa124548d0a 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -18,12 +18,12 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::ArrayRef;
+use arrow::array::{ArrayRef, Scalar};
+use arrow::compute::kernels::comparison::ends_with as arrow_ends_with;
 use arrow::datatypes::DataType;
 
-use crate::utils::make_scalar_function;
 use datafusion_common::types::logical_string;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -95,12 +95,75 @@ impl ScalarUDFImpl for EndsWithFunc {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        match args.args[0].data_type() {
-            DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
-                make_scalar_function(ends_with, vec![])(&args.args)
+        let [str_arg, suffix_arg] = args.args.as_slice() else {
+            return exec_err!(
+                "ends_with was called with {} arguments, expected 2",
+                args.args.len()
+            );
+        };
+
+        // Determine the common type for coercion
+        let coercion_type = string_coercion(
+            &str_arg.data_type(),
+            &suffix_arg.data_type(),
+        )
+        .or_else(|| {
+            binary_to_string_coercion(&str_arg.data_type(), &suffix_arg.data_type())
+        });
+
+        let Some(coercion_type) = coercion_type else {
+            return exec_err!(
+                "Unsupported data types {:?}, {:?} for function `ends_with`.",
+                str_arg.data_type(),
+                suffix_arg.data_type()
+            );
+        };
+
+        // Helper to cast an array if needed
+        let maybe_cast = |arr: &ArrayRef, target: &DataType| -> Result<ArrayRef> {
+            if arr.data_type() == target {
+                Ok(Arc::clone(arr))
+            } else {
+                Ok(arrow::compute::kernels::cast::cast(arr, target)?)
+            }
+        };
+
+        match (str_arg, suffix_arg) {
+            // Both scalars - just compute directly
+            (ColumnarValue::Scalar(str_scalar), ColumnarValue::Scalar(suffix_scalar)) => {
+                let str_arr = str_scalar.to_array_of_size(1)?;
+                let suffix_arr = suffix_scalar.to_array_of_size(1)?;
+                let str_arr = maybe_cast(&str_arr, &coercion_type)?;
+                let suffix_arr = maybe_cast(&suffix_arr, &coercion_type)?;
+                let result = arrow_ends_with(&str_arr, &suffix_arr)?;
+                Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(
+                    &result, 0,
+                )?))
             }
-            other => {
-                internal_err!("Unsupported data type {other:?} for function ends_with. Expected Utf8, LargeUtf8 or Utf8View")?
+            // String is array, suffix is scalar - use Scalar wrapper for optimization
+            (ColumnarValue::Array(str_arr), ColumnarValue::Scalar(suffix_scalar)) => {
+                let str_arr = maybe_cast(str_arr, &coercion_type)?;
+                let suffix_arr = suffix_scalar.to_array_of_size(1)?;
+                let suffix_arr = maybe_cast(&suffix_arr, &coercion_type)?;
+                let suffix_scalar = Scalar::new(suffix_arr);
+                let result = arrow_ends_with(&str_arr, &suffix_scalar)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
+            }
+            // String is scalar, suffix is array - use Scalar wrapper for string
+            (ColumnarValue::Scalar(str_scalar), ColumnarValue::Array(suffix_arr)) => {
+                let str_arr = str_scalar.to_array_of_size(1)?;
+                let str_arr = maybe_cast(&str_arr, &coercion_type)?;
+                let str_scalar = Scalar::new(str_arr);
+                let suffix_arr = maybe_cast(suffix_arr, &coercion_type)?;
+                let result = arrow_ends_with(&str_scalar, &suffix_arr)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
+            }
+            // Both arrays - pass directly
+            (ColumnarValue::Array(str_arr), ColumnarValue::Array(suffix_arr)) => {
+                let str_arr = maybe_cast(str_arr, &coercion_type)?;
+                let suffix_arr = maybe_cast(suffix_arr, &coercion_type)?;
+                let result = arrow_ends_with(&str_arr, &suffix_arr)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
             }
         }
     }
@@ -110,47 +173,24 @@ impl ScalarUDFImpl for EndsWithFunc {
     }
 }
 
-/// Returns true if string ends with suffix.
-/// ends_with('alphabet', 'abet') = 't'
-fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if let Some(coercion_data_type) =
-        string_coercion(args[0].data_type(), args[1].data_type()).or_else(|| {
-            binary_to_string_coercion(args[0].data_type(), args[1].data_type())
-        })
-    {
-        let arg0 = if args[0].data_type() == &coercion_data_type {
-            Arc::clone(&args[0])
-        } else {
-            arrow::compute::kernels::cast::cast(&args[0], &coercion_data_type)?
-        };
-        let arg1 = if args[1].data_type() == &coercion_data_type {
-            Arc::clone(&args[1])
-        } else {
-            arrow::compute::kernels::cast::cast(&args[1], &coercion_data_type)?
-        };
-        let result = arrow::compute::kernels::comparison::ends_with(&arg0, &arg1)?;
-        Ok(Arc::new(result) as ArrayRef)
-    } else {
-        internal_err!(
-            "Unsupported data types for ends_with. Expected Utf8, LargeUtf8 or Utf8View"
-        )
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use arrow::array::{Array, BooleanArray};
+    use arrow::array::{Array, BooleanArray, StringArray};
     use arrow::datatypes::DataType::Boolean;
+    use arrow::datatypes::{DataType, Field};
+    use std::sync::Arc;
 
     use datafusion_common::Result;
     use datafusion_common::ScalarValue;
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_common::config::ConfigOptions;
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
 
     use crate::string::ends_with::EndsWithFunc;
     use crate::utils::test::test_function;
 
     #[test]
-    fn test_functions() -> Result<()> {
+    fn test_scalar_scalar() -> Result<()> {
+        // Test Scalar + Scalar combinations
         test_function!(
             EndsWithFunc::new(),
             vec![
@@ -196,6 +236,186 @@ mod tests {
             BooleanArray
         );
 
+        // Test with LargeUtf8
+        test_function!(
+            EndsWithFunc::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(
+                    "alphabet".to_string()
+                ))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("bet".to_string()))),
+            ],
+            Ok(Some(true)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+
+        // Test with Utf8View
+        test_function!(
+            EndsWithFunc::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                    "alphabet".to_string()
+                ))),
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("bet".to_string()))),
+            ],
+            Ok(Some(true)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_scalar() -> Result<()> {
+        // Test Array + Scalar (the optimized path)
+        let array = ColumnarValue::Array(Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("alphabet"),
+            Some("beta"),
+            None,
+        ])));
+        let scalar = ColumnarValue::Scalar(ScalarValue::Utf8(Some("bet".to_string())));
+
+        let args = vec![array, scalar];
+        test_function!(
+            EndsWithFunc::new(),
+            args,
+            Ok(Some(true)), // First element result: "alphabet" ends with "bet"
+            bool,
+            Boolean,
+            BooleanArray
+        );
+
         Ok(())
     }
+
+    #[test]
+    fn test_array_scalar_full_result() {
+        // Test Array + Scalar and verify all results
+        let func = EndsWithFunc::new();
+        let array = Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("alphabet"),
+            Some("beta"),
+            None,
+        ]));
+        let args = vec![
+            ColumnarValue::Array(array),
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some("bet".to_string()))),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" ends with "bet"
+        assert!(bool_array.value(1)); // "alphabet" ends with "bet"
+        assert!(!bool_array.value(2)); // "beta" does not end with "bet"
+        assert!(bool_array.is_null(3)); // null input -> null output
+    }
+
+    #[test]
+    fn test_scalar_array() {
+        // Test Scalar + Array
+        let func = EndsWithFunc::new();
+        let suffixes = Arc::new(StringArray::from(vec![
+            Some("bet"),
+            Some("alph"),
+            Some("phabet"),
+            None,
+        ]));
+        let args = vec![
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some("alphabet".to_string()))),
+            ColumnarValue::Array(suffixes),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" ends with "bet"
+        assert!(!bool_array.value(1)); // "alphabet" does not end with "alph"
+        assert!(bool_array.value(2)); // "alphabet" ends with "phabet"
+        assert!(bool_array.is_null(3)); // null suffix -> null output
+    }
+
+    #[test]
+    fn test_array_array() {
+        // Test Array + Array
+        let func = EndsWithFunc::new();
+        let strings = Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("rust"),
+            Some("datafusion"),
+            None,
+        ]));
+        let suffixes = Arc::new(StringArray::from(vec![
+            Some("bet"),
+            Some("st"),
+            Some("hello"),
+            Some("test"),
+        ]));
+        let args = vec![
+            ColumnarValue::Array(strings),
+            ColumnarValue::Array(suffixes),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" ends with "bet"
+        assert!(bool_array.value(1)); // "rust" ends with "st"
+        assert!(!bool_array.value(2)); // "datafusion" does not end with "hello"
+        assert!(bool_array.is_null(3)); // null string -> null output
+    }
 }
diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs
index 2f7894df903d6..f46c674cd7f0c 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -26,7 +26,7 @@ use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::types::logical_string;
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::type_coercion::binary::{
     binary_to_string_coercion, string_coercion,
 };
@@ -101,7 +101,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
         {
             utf8_to_int_type(&coercion_data_type, "levenshtein")
         } else {
-            exec_err!("Unsupported data types for levenshtein. Expected Utf8, LargeUtf8 or Utf8View")
+            exec_err!(
+                "Unsupported data types for levenshtein. Expected Utf8, LargeUtf8 or Utf8View"
+            )
         }
     }
 
@@ -198,7 +200,9 @@ fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             }
         }
     } else {
-        exec_err!("Unsupported data types for levenshtein. Expected Utf8, LargeUtf8 or Utf8View")
+        exec_err!(
+            "Unsupported data types for levenshtein. Expected Utf8, LargeUtf8 or Utf8View"
+        )
     }
 }
 
diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs
index ee56a6a549857..3750d3d290a9c 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -20,8 +20,8 @@ use std::any::Any;
 
 use crate::string::common::to_lower;
 use crate::utils::utf8_to_str_type;
-use datafusion_common::types::logical_string;
 use datafusion_common::Result;
+use datafusion_common::types::logical_string;
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     TypeSignatureClass, Volatility,
diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs
index dc6d30d38188c..18a61869a8dc2 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use crate::string::common::*;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::function::Hint;
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs
index aa8257ef8fc53..3732897f3d372 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -119,7 +119,7 @@ mod tests {
     use arrow::datatypes::DataType::Int32;
 
     use datafusion_common::ScalarValue;
-    use datafusion_common::{exec_err, Result};
+    use datafusion_common::{Result, exec_err};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
     use crate::string::octet_length::OctetLengthFunc;
diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs
index 3f6128b6516b9..2ca5e190c6e02 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -26,8 +26,8 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::types::{logical_int64, logical_string, NativeType};
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::types::{NativeType, logical_int64, logical_string};
+use datafusion_common::{DataFusionError, Result, exec_err};
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
@@ -115,7 +115,7 @@ fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
         Utf8View => {
             let string_view_array = args[0].as_string_view();
             repeat_impl::<i32, &StringViewArray>(
-                string_view_array,
+                &string_view_array,
                 number_array,
                 i32::MAX as usize,
             )
@@ -123,7 +123,7 @@ fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
         Utf8 => {
             let string_array = args[0].as_string::<i32>();
             repeat_impl::<i32, &GenericStringArray<i32>>(
-                string_array,
+                &string_array,
                 number_array,
                 i32::MAX as usize,
             )
@@ -131,7 +131,7 @@ fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
         LargeUtf8 => {
             let string_array = args[0].as_string::<i64>();
             repeat_impl::<i64, &GenericStringArray<i64>>(
-                string_array,
+                &string_array,
                 number_array,
                 i64::MAX as usize,
             )
@@ -144,7 +144,7 @@ fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 fn repeat_impl<'a, T, S>(
-    string_array: S,
+    string_array: &S,
     number_array: &Int64Array,
     max_str_len: usize,
 ) -> Result<ArrayRef>
@@ -153,6 +153,7 @@ where
     S: StringArrayType<'a>,
 {
     let mut total_capacity = 0;
+    let mut max_item_capacity = 0;
     string_array.iter().zip(number_array.iter()).try_for_each(
         |(string, number)| -> Result<(), DataFusionError> {
             match (string, number) {
@@ -166,6 +167,7 @@ where
                         );
                     }
                     total_capacity += item_capacity;
+                    max_item_capacity = max_item_capacity.max(item_capacity);
                 }
                 _ => (),
             }
@@ -176,18 +178,37 @@ where
     let mut builder =
         GenericStringBuilder::<T>::with_capacity(string_array.len(), total_capacity);
 
-    string_array.iter().zip(number_array.iter()).try_for_each(
-        |(string, number)| -> Result<(), DataFusionError> {
+    // Reusable buffer to avoid allocations in string.repeat()
+    let mut buffer = Vec::<u8>::with_capacity(max_item_capacity);
+
+    string_array
+        .iter()
+        .zip(number_array.iter())
+        .for_each(|(string, number)| {
             match (string, number) {
                 (Some(string), Some(number)) if number >= 0 => {
-                    builder.append_value(string.repeat(number as usize));
+                    buffer.clear();
+                    let count = number as usize;
+                    if count > 0 && !string.is_empty() {
+                        let src = string.as_bytes();
+                        // Initial copy
+                        buffer.extend_from_slice(src);
+                        // Doubling strategy: copy what we have so far until we reach the target
+                        while buffer.len() < src.len() * count {
+                            let copy_len =
+                                buffer.len().min(src.len() * count - buffer.len());
+                            // SAFETY: we're copying valid UTF-8 bytes that we already verified
+                            buffer.extend_from_within(..copy_len);
+                        }
+                    }
+                    // SAFETY: buffer contains valid UTF-8 since we only ever copy from a valid &str
+                    builder
+                        .append_value(unsafe { std::str::from_utf8_unchecked(&buffer) });
                 }
                 (Some(_), Some(_)) => builder.append_value(""),
                 _ => builder.append_null(),
             }
-            Ok(())
-        },
-    )?;
+        });
     let array = builder.finish();
 
     Ok(Arc::new(array) as ArrayRef)
@@ -199,7 +220,7 @@ mod tests {
     use arrow::datatypes::DataType::Utf8;
 
     use datafusion_common::ScalarValue;
-    use datafusion_common::{exec_err, Result};
+    use datafusion_common::{Result, exec_err};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
     use crate::string::repeat::RepeatFunc;
diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs
index f127b452b2d34..a976ca7b9139d 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -24,7 +24,7 @@ use arrow::datatypes::DataType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::type_coercion::binary::{
     binary_to_string_coercion, string_coercion,
 };
@@ -101,7 +101,9 @@ impl ScalarUDFImpl for ReplaceFunc {
         {
             utf8_to_str_type(&coercion_data_type, "replace")
         } else {
-            exec_err!("Unsupported data types for replace. Expected Utf8, LargeUtf8 or Utf8View")
+            exec_err!(
+                "Unsupported data types for replace. Expected Utf8, LargeUtf8 or Utf8View"
+            )
         }
     }
 
diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs
index be0595f65542a..f0bafc980e324 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use crate::string::common::*;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::function::Hint;
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs
index 8462dd5149cbf..8ac505bf360f6 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -22,9 +22,9 @@ use arrow::array::{
 };
 use arrow::array::{AsArray, GenericStringBuilder};
 use arrow::datatypes::DataType;
-use datafusion_common::cast::as_int64_array;
 use datafusion_common::ScalarValue;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::cast::as_int64_array;
+use datafusion_common::{DataFusionError, Result, exec_err};
 use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
@@ -123,64 +123,64 @@ impl ScalarUDFImpl for SplitPartFunc {
         let result = match (args[0].data_type(), args[1].data_type()) {
             (DataType::Utf8View, DataType::Utf8View) => {
                 split_part_impl::<&StringViewArray, &StringViewArray, i32>(
-                    args[0].as_string_view(),
-                    args[1].as_string_view(),
+                    &args[0].as_string_view(),
+                    &args[1].as_string_view(),
                     n_array,
                 )
             }
             (DataType::Utf8View, DataType::Utf8) => {
                 split_part_impl::<&StringViewArray, &GenericStringArray<i32>, i32>(
-                    args[0].as_string_view(),
-                    args[1].as_string::<i32>(),
+                    &args[0].as_string_view(),
+                    &args[1].as_string::<i32>(),
                     n_array,
                 )
             }
             (DataType::Utf8View, DataType::LargeUtf8) => {
                 split_part_impl::<&StringViewArray, &GenericStringArray<i64>, i32>(
-                    args[0].as_string_view(),
-                    args[1].as_string::<i64>(),
+                    &args[0].as_string_view(),
+                    &args[1].as_string::<i64>(),
                     n_array,
                 )
             }
             (DataType::Utf8, DataType::Utf8View) => {
                 split_part_impl::<&GenericStringArray<i32>, &StringViewArray, i32>(
-                    args[0].as_string::<i32>(),
-                    args[1].as_string_view(),
+                    &args[0].as_string::<i32>(),
+                    &args[1].as_string_view(),
                     n_array,
                 )
             }
             (DataType::LargeUtf8, DataType::Utf8View) => {
                 split_part_impl::<&GenericStringArray<i64>, &StringViewArray, i64>(
-                    args[0].as_string::<i64>(),
-                    args[1].as_string_view(),
+                    &args[0].as_string::<i64>(),
+                    &args[1].as_string_view(),
                     n_array,
                 )
             }
             (DataType::Utf8, DataType::Utf8) => {
                 split_part_impl::<&GenericStringArray<i32>, &GenericStringArray<i32>, i32>(
-                    args[0].as_string::<i32>(),
-                    args[1].as_string::<i32>(),
+                    &args[0].as_string::<i32>(),
+                    &args[1].as_string::<i32>(),
                     n_array,
                 )
             }
             (DataType::LargeUtf8, DataType::LargeUtf8) => {
                 split_part_impl::<&GenericStringArray<i64>, &GenericStringArray<i64>, i64>(
-                    args[0].as_string::<i64>(),
-                    args[1].as_string::<i64>(),
+                    &args[0].as_string::<i64>(),
+                    &args[1].as_string::<i64>(),
                     n_array,
                 )
             }
             (DataType::Utf8, DataType::LargeUtf8) => {
                 split_part_impl::<&GenericStringArray<i32>, &GenericStringArray<i64>, i32>(
-                    args[0].as_string::<i32>(),
-                    args[1].as_string::<i64>(),
+                    &args[0].as_string::<i32>(),
+                    &args[1].as_string::<i64>(),
                     n_array,
                 )
             }
             (DataType::LargeUtf8, DataType::Utf8) => {
                 split_part_impl::<&GenericStringArray<i64>, &GenericStringArray<i32>, i64>(
-                    args[0].as_string::<i64>(),
-                    args[1].as_string::<i32>(),
+                    &args[0].as_string::<i64>(),
+                    &args[1].as_string::<i32>(),
                     n_array,
                 )
             }
@@ -200,10 +200,9 @@ impl ScalarUDFImpl for SplitPartFunc {
     }
 }
 
-/// impl
-pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
-    string_array: StringArrType,
-    delimiter_array: DelimiterArrType,
+fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
+    string_array: &StringArrType,
+    delimiter_array: &DelimiterArrType,
     n_array: &Int64Array,
 ) -> Result<ArrayRef>
 where
@@ -251,7 +250,7 @@ mod tests {
     use arrow::datatypes::DataType::Utf8;
 
     use datafusion_common::ScalarValue;
-    use datafusion_common::{exec_err, Result};
+    use datafusion_common::{Result, exec_err};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
     use crate::string::split_part::SplitPartFunc;
diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs
index c4159cba86f34..259612c42997e 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -18,47 +18,22 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::ArrayRef;
+use arrow::array::{ArrayRef, Scalar};
+use arrow::compute::kernels::comparison::starts_with as arrow_starts_with;
 use arrow::datatypes::DataType;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::type_coercion::binary::{
     binary_to_string_coercion, string_coercion,
 };
 
-use crate::utils::make_scalar_function;
 use datafusion_common::types::logical_string;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_expr::{
-    cast, Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs,
-    ScalarUDFImpl, Signature, TypeSignatureClass, Volatility,
+    Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, cast,
 };
 use datafusion_macros::user_doc;
 
-/// Returns true if string starts with prefix.
-/// starts_with('alphabet', 'alph') = 't'
-fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if let Some(coercion_data_type) =
-        string_coercion(args[0].data_type(), args[1].data_type()).or_else(|| {
-            binary_to_string_coercion(args[0].data_type(), args[1].data_type())
-        })
-    {
-        let arg0 = if args[0].data_type() == &coercion_data_type {
-            Arc::clone(&args[0])
-        } else {
-            arrow::compute::kernels::cast::cast(&args[0], &coercion_data_type)?
-        };
-        let arg1 = if args[1].data_type() == &coercion_data_type {
-            Arc::clone(&args[1])
-        } else {
-            arrow::compute::kernels::cast::cast(&args[1], &coercion_data_type)?
-        };
-        let result = arrow::compute::kernels::comparison::starts_with(&arg0, &arg1)?;
-        Ok(Arc::new(result) as ArrayRef)
-    } else {
-        internal_err!("Unsupported data types for starts_with. Expected Utf8, LargeUtf8 or Utf8View")
-    }
-}
-
 #[user_doc(
     doc_section(label = "String Functions"),
     description = "Tests if a string starts with a substring.",
@@ -117,11 +92,76 @@ impl ScalarUDFImpl for StartsWithFunc {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        match args.args[0].data_type() {
-            DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
-                make_scalar_function(starts_with, vec![])(&args.args)
+        let [str_arg, prefix_arg] = args.args.as_slice() else {
+            return exec_err!(
+                "starts_with was called with {} arguments, expected 2",
+                args.args.len()
+            );
+        };
+
+        // Determine the common type for coercion
+        let coercion_type = string_coercion(
+            &str_arg.data_type(),
+            &prefix_arg.data_type(),
+        )
+        .or_else(|| {
+            binary_to_string_coercion(&str_arg.data_type(), &prefix_arg.data_type())
+        });
+
+        let Some(coercion_type) = coercion_type else {
+            return exec_err!(
+                "Unsupported data types {:?}, {:?} for function `starts_with`.",
+                str_arg.data_type(),
+                prefix_arg.data_type()
+            );
+        };
+
+        // Helper to cast an array if needed
+        let maybe_cast = |arr: &ArrayRef, target: &DataType| -> Result<ArrayRef> {
+            if arr.data_type() == target {
+                Ok(Arc::clone(arr))
+            } else {
+                Ok(arrow::compute::kernels::cast::cast(arr, target)?)
+            }
+        };
+
+        match (str_arg, prefix_arg) {
+            // Both scalars - just compute directly
+            (ColumnarValue::Scalar(str_scalar), ColumnarValue::Scalar(prefix_scalar)) => {
+                let str_arr = str_scalar.to_array_of_size(1)?;
+                let prefix_arr = prefix_scalar.to_array_of_size(1)?;
+                let str_arr = maybe_cast(&str_arr, &coercion_type)?;
+                let prefix_arr = maybe_cast(&prefix_arr, &coercion_type)?;
+                let result = arrow_starts_with(&str_arr, &prefix_arr)?;
+                Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(
+                    &result, 0,
+                )?))
+            }
+            // String is array, prefix is scalar - use Scalar wrapper for optimization
+            (ColumnarValue::Array(str_arr), ColumnarValue::Scalar(prefix_scalar)) => {
+                let str_arr = maybe_cast(str_arr, &coercion_type)?;
+                let prefix_arr = prefix_scalar.to_array_of_size(1)?;
+                let prefix_arr = maybe_cast(&prefix_arr, &coercion_type)?;
+                let prefix_scalar = Scalar::new(prefix_arr);
+                let result = arrow_starts_with(&str_arr, &prefix_scalar)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
+            }
+            // String is scalar, prefix is array - use Scalar wrapper for string
+            (ColumnarValue::Scalar(str_scalar), ColumnarValue::Array(prefix_arr)) => {
+                let str_arr = str_scalar.to_array_of_size(1)?;
+                let str_arr = maybe_cast(&str_arr, &coercion_type)?;
+                let str_scalar = Scalar::new(str_arr);
+                let prefix_arr = maybe_cast(prefix_arr, &coercion_type)?;
+                let result = arrow_starts_with(&str_scalar, &prefix_arr)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
+            }
+            // Both arrays - pass directly
+            (ColumnarValue::Array(str_arr), ColumnarValue::Array(prefix_arr)) => {
+                let str_arr = maybe_cast(str_arr, &coercion_type)?;
+                let prefix_arr = maybe_cast(prefix_arr, &coercion_type)?;
+                let result = arrow_starts_with(&str_arr, &prefix_arr)?;
+                Ok(ColumnarValue::Array(Arc::new(result)))
             }
-            _ => internal_err!("Unsupported data types for starts_with. Expected Utf8, LargeUtf8 or Utf8View")?,
         }
     }
 
@@ -132,15 +172,18 @@ impl ScalarUDFImpl for StartsWithFunc {
     ) -> Result<ExprSimplifyResult> {
         if let Expr::Literal(scalar_value, _) = &args[1] {
             // Convert starts_with(col, 'prefix') to col LIKE 'prefix%' with proper escaping
-            // Example: starts_with(col, 'ja%') -> col LIKE 'ja\%%'
-            //   1. 'ja%'         (input pattern)
-            //   2. 'ja\%'        (escape special char '%')
-            //   3. 'ja\%%'       (add suffix for starts_with)
+            // Escapes pattern characters: starts_with(col, 'j\_a%') -> col LIKE 'j\\\_a\%%'
+            //   1. 'j\_a%'         (input pattern)
+            //   2. 'j\\\_a\%'       (escape special chars '%', '_' and '\')
+            //   3. 'j\\\_a\%%'      (add unescaped % suffix for starts_with)
             let like_expr = match scalar_value {
                 ScalarValue::Utf8(Some(pattern))
                 | ScalarValue::LargeUtf8(Some(pattern))
                 | ScalarValue::Utf8View(Some(pattern)) => {
-                    let escaped_pattern = pattern.replace("%", "\\%");
+                    let escaped_pattern = pattern
+                        .replace("\\", "\\\\")
+                        .replace("%", "\\%")
+                        .replace("_", "\\_");
                     let like_pattern = format!("{escaped_pattern}%");
                     Expr::Literal(ScalarValue::Utf8(Some(like_pattern)), None)
                 }
@@ -188,16 +231,19 @@ impl ScalarUDFImpl for StartsWithFunc {
 #[cfg(test)]
 mod tests {
     use crate::utils::test::test_function;
-    use arrow::array::{Array, BooleanArray};
+    use arrow::array::{Array, BooleanArray, StringArray};
     use arrow::datatypes::DataType::Boolean;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
+    use std::sync::Arc;
 
     use super::*;
 
     #[test]
-    fn test_functions() -> Result<()> {
-        // Generate test cases for starts_with
+    fn test_scalar_scalar() -> Result<()> {
+        // Test Scalar + Scalar combinations
         let test_cases = vec![
             (Some("alphabet"), Some("alph"), Some(true)),
             (Some("alphabet"), Some("bet"), Some(false)),
@@ -241,4 +287,154 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_array_scalar() -> Result<()> {
+        // Test Array + Scalar (the optimized path)
+        let array = ColumnarValue::Array(Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("alphabet"),
+            Some("beta"),
+            None,
+        ])));
+        let scalar = ColumnarValue::Scalar(ScalarValue::Utf8(Some("alph".to_string())));
+
+        let args = vec![array, scalar];
+        test_function!(
+            StartsWithFunc::new(),
+            args,
+            Ok(Some(true)), // First element result
+            bool,
+            Boolean,
+            BooleanArray
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_scalar_full_result() {
+        // Test Array + Scalar and verify all results
+        let func = StartsWithFunc::new();
+        let array = Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("alphabet"),
+            Some("beta"),
+            None,
+        ]));
+        let args = vec![
+            ColumnarValue::Array(array),
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some("alph".to_string()))),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" starts with "alph"
+        assert!(bool_array.value(1)); // "alphabet" starts with "alph"
+        assert!(!bool_array.value(2)); // "beta" does not start with "alph"
+        assert!(bool_array.is_null(3)); // null input -> null output
+    }
+
+    #[test]
+    fn test_scalar_array() {
+        // Test Scalar + Array
+        let func = StartsWithFunc::new();
+        let prefixes = Arc::new(StringArray::from(vec![
+            Some("alph"),
+            Some("bet"),
+            Some("alpha"),
+            None,
+        ]));
+        let args = vec![
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some("alphabet".to_string()))),
+            ColumnarValue::Array(prefixes),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" starts with "alph"
+        assert!(!bool_array.value(1)); // "alphabet" does not start with "bet"
+        assert!(bool_array.value(2)); // "alphabet" starts with "alpha"
+        assert!(bool_array.is_null(3)); // null prefix -> null output
+    }
+
+    #[test]
+    fn test_array_array() {
+        // Test Array + Array
+        let func = StartsWithFunc::new();
+        let strings = Arc::new(StringArray::from(vec![
+            Some("alphabet"),
+            Some("rust"),
+            Some("datafusion"),
+            None,
+        ]));
+        let prefixes = Arc::new(StringArray::from(vec![
+            Some("alph"),
+            Some("ru"),
+            Some("hello"),
+            Some("test"),
+        ]));
+        let args = vec![
+            ColumnarValue::Array(strings),
+            ColumnarValue::Array(prefixes),
+        ];
+
+        let result = func
+            .invoke_with_args(ScalarFunctionArgs {
+                args,
+                arg_fields: vec![
+                    Field::new("a", DataType::Utf8, true).into(),
+                    Field::new("b", DataType::Utf8, true).into(),
+                ],
+                number_rows: 4,
+                return_field: Field::new("f", Boolean, true).into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })
+            .unwrap();
+
+        let result_array = result.into_array(4).unwrap();
+        let bool_array = result_array
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        assert!(bool_array.value(0)); // "alphabet" starts with "alph"
+        assert!(bool_array.value(1)); // "rust" starts with "ru"
+        assert!(!bool_array.value(2)); // "datafusion" does not start with "hello"
+        assert!(bool_array.is_null(3)); // null string -> null output
+    }
 }
diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs
index 26be0066c2df3..dd4f4174266fc 100644
--- a/datafusion/functions/src/string/to_hex.rs
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -16,20 +16,20 @@
 // under the License.
 
 use std::any::Any;
-use std::fmt::Write;
 use std::sync::Arc;
 
 use crate::utils::make_scalar_function;
-use arrow::array::{ArrayRef, GenericStringBuilder};
+use arrow::array::{Array, ArrayRef, StringArray};
+use arrow::buffer::{Buffer, OffsetBuffer};
 use arrow::datatypes::DataType::{
-    Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Utf8,
+    Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Utf8,
 };
 use arrow::datatypes::{
-    ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
-    Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
+    Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
 };
-use datafusion_common::cast::as_primitive_array;
 use datafusion_common::Result;
+use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{exec_err, plan_err};
 
 use datafusion_expr::{ColumnarValue, Documentation};
@@ -37,42 +37,142 @@ use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
 use datafusion_expr_common::signature::TypeSignature::Exact;
 use datafusion_macros::user_doc;
 
+/// Hex lookup table for fast conversion
+const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
+
 /// Converts the number to its equivalent hexadecimal representation.
 /// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
 where
-    T::Native: std::fmt::LowerHex,
+    T::Native: ToHex,
 {
     let integer_array = as_primitive_array::<T>(&args[0])?;
+    let len = integer_array.len();
 
-    let mut result = GenericStringBuilder::<i32>::with_capacity(
-        integer_array.len(),
-        // * 8 to convert to bits, / 4 bits per hex char
-        integer_array.len() * (T::Native::get_byte_width() * 8 / 4),
-    );
+    // Max hex string length: 16 chars for u64/i64
+    let max_hex_len = T::Native::get_byte_width() * 2;
 
-    for integer in integer_array {
-        if let Some(value) = integer {
-            if let Some(value_usize) = value.to_usize() {
-                write!(result, "{value_usize:x}")?;
-            } else if let Some(value_isize) = value.to_isize() {
-                write!(result, "{value_isize:x}")?;
-            } else {
-                return exec_err!(
-                    "Unsupported data type {integer:?} for function to_hex"
-                );
-            }
-            result.append_value("");
-        } else {
-            result.append_null();
-        }
+    // Pre-allocate buffers - avoid the builder API overhead
+    let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
+    let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
+
+    // Reusable buffer for hex conversion
+    let mut hex_buffer = [0u8; 16];
+
+    // Start with offset 0
+    offsets.push(0);
+
+    // Process all values directly (including null slots - we write empty strings for nulls)
+    // The null bitmap will mark which entries are actually null
+    for value in integer_array.values() {
+        let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
+        values.extend_from_slice(&hex_buffer[16 - hex_len..]);
+        offsets.push(values.len() as i32);
     }
 
-    let result = result.finish();
+    // Copy null bitmap from input (nulls pass through unchanged)
+    let nulls = integer_array.nulls().cloned();
+
+    // SAFETY: offsets are valid (monotonically increasing, last value equals values.len())
+    // and values contains valid UTF-8 (only ASCII hex digits)
+    let offsets =
+        unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
+    let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
+/// Trait for converting integer types to hexadecimal in a buffer
+trait ToHex: ArrowNativeType {
+    /// Write hex representation to buffer and return the number of hex digits written.
+    /// The hex digits are written right-aligned in the buffer (starting from position 16 - len).
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
+}
+
+/// Write unsigned value to hex buffer and return the number of digits written.
+/// Digits are written right-aligned in the buffer.
+#[inline]
+fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
+    if value == 0 {
+        buffer[15] = b'0';
+        return 1;
+    }
+
+    // Write hex digits from right to left
+    let mut pos = 16;
+    let mut v = value;
+    while v > 0 {
+        pos -= 1;
+        buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
+        v >>= 4;
+    }
+
+    16 - pos
+}
+
+/// Write signed value to hex buffer (two's complement for negative) and return digit count
+#[inline]
+fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
+    // For negative values, use two's complement representation (same as casting to u64)
+    write_unsigned_hex_to_buffer(value as u64, buffer)
+}
+
+impl ToHex for i8 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_signed_hex_to_buffer(self as i64, buffer)
+    }
+}
+
+impl ToHex for i16 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_signed_hex_to_buffer(self as i64, buffer)
+    }
+}
+
+impl ToHex for i32 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_signed_hex_to_buffer(self as i64, buffer)
+    }
+}
+
+impl ToHex for i64 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_signed_hex_to_buffer(self, buffer)
+    }
+}
+
+impl ToHex for u8 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_unsigned_hex_to_buffer(self as u64, buffer)
+    }
+}
+
+impl ToHex for u16 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_unsigned_hex_to_buffer(self as u64, buffer)
+    }
+}
+
+impl ToHex for u32 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_unsigned_hex_to_buffer(self as u64, buffer)
+    }
+}
+
+impl ToHex for u64 {
+    #[inline]
+    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
+        write_unsigned_hex_to_buffer(self, buffer)
+    }
+}
+
 #[user_doc(
     doc_section(label = "String Functions"),
     description = "Converts an integer to a hexadecimal string.",
@@ -162,8 +262,8 @@ impl ScalarUDFImpl for ToHexFunc {
 #[cfg(test)]
 mod tests {
     use arrow::array::{
-        Int16Array, Int32Array, Int64Array, Int8Array, StringArray, UInt16Array,
-        UInt32Array, UInt64Array, UInt8Array,
+        Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
+        UInt16Array, UInt32Array, UInt64Array,
     };
     use datafusion_common::cast::as_string_array;
 
diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs
index 8bb2ec1d511cd..a2a7db1848f59 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -18,8 +18,8 @@
 use crate::string::common::to_upper;
 use crate::utils::utf8_to_str_type;
 use arrow::datatypes::DataType;
-use datafusion_common::types::logical_string;
 use datafusion_common::Result;
+use datafusion_common::types::logical_string;
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     TypeSignatureClass, Volatility,
diff --git a/datafusion/functions/src/string/uuid.rs b/datafusion/functions/src/string/uuid.rs
index a5ad6db5354f3..3171eb98fa2bf 100644
--- a/datafusion/functions/src/string/uuid.rs
+++ b/datafusion/functions/src/string/uuid.rs
@@ -24,14 +24,14 @@ use arrow::datatypes::DataType::Utf8;
 use rand::Rng;
 use uuid::Uuid;
 
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_macros::user_doc;
 
 #[user_doc(
     doc_section(label = "String Functions"),
-    description = "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) string value which is unique per row.",
+    description = "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_%28random%29) string value which is unique per row.",
     syntax_example = "uuid()",
     sql_example = r#"```sql
 > select uuid();
@@ -81,9 +81,11 @@ impl ScalarUDFImpl for UuidFunc {
     /// Prints random (v4) uuid values per row
     /// uuid() = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if !args.args.is_empty() {
-            return internal_err!("{} function does not accept arguments", self.name());
-        }
+        assert_or_internal_err!(
+            args.args.is_empty(),
+            "{} function does not accept arguments",
+            self.name()
+        );
 
         // Generate random u128 values
         let mut rng = rand::rng();
diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs
index 108c20e136670..a7be3ef792994 100644
--- a/datafusion/functions/src/strings.rs
+++ b/datafusion/functions/src/strings.rs
@@ -18,8 +18,8 @@
 use std::mem::size_of;
 
 use arrow::array::{
-    make_view, Array, ArrayAccessor, ArrayDataBuilder, ByteView, LargeStringArray,
-    NullBufferBuilder, StringArray, StringViewArray, StringViewBuilder,
+    Array, ArrayAccessor, ArrayDataBuilder, ByteView, LargeStringArray,
+    NullBufferBuilder, StringArray, StringViewArray, StringViewBuilder, make_view,
 };
 use arrow::buffer::{MutableBuffer, NullBuffer};
 use arrow::datatypes::DataType;
diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs
index 85fe0956a951b..1073fa060dab4 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -111,21 +111,21 @@ fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => {
             let string_array = args[0].as_string::<i32>();
-            character_length_general::<Int32Type, _>(string_array)
+            character_length_general::<Int32Type, _>(&string_array)
         }
         DataType::LargeUtf8 => {
             let string_array = args[0].as_string::<i64>();
-            character_length_general::<Int64Type, _>(string_array)
+            character_length_general::<Int64Type, _>(&string_array)
         }
         DataType::Utf8View => {
             let string_array = args[0].as_string_view();
-            character_length_general::<Int32Type, _>(string_array)
+            character_length_general::<Int32Type, _>(&string_array)
         }
         _ => unreachable!("CharacterLengthFunc"),
     }
 }
 
-fn character_length_general<'a, T, V>(array: V) -> Result<ArrayRef>
+fn character_length_general<'a, T, V>(array: &V) -> Result<ArrayRef>
 where
     T: ArrowPrimitiveType,
     T::Native: OffsetSizeTrait,
@@ -227,7 +227,9 @@ mod tests {
         #[cfg(not(feature = "unicode_expressions"))]
         test_function!(
             CharacterLengthFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("josé"))))],
+            &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                String::from("josé")
+            )))],
             internal_err!(
                 "function character_length requires compilation with feature flag: unicode_expressions."
             ),
diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs
index fa68e539600b0..a25c37266c2ca 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -19,14 +19,14 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    new_null_array, ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
-    OffsetSizeTrait, PrimitiveArray,
+    ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
+    PrimitiveArray, new_null_array,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 
 use crate::utils::utf8_to_int_type;
 use datafusion_common::{
-    exec_err, internal_err, utils::take_function_args, Result, ScalarValue,
+    Result, ScalarValue, exec_err, internal_err, utils::take_function_args,
 };
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
@@ -149,25 +149,27 @@ impl ScalarUDFImpl for FindInSetFunc {
                                 let string_array = str_array.as_string::<i32>();
                                 find_in_set_right_literal::<Int32Type, _>(
                                     string_array,
-                                    str_list,
+                                    &str_list,
                                 )
                             }
                             DataType::LargeUtf8 => {
                                 let string_array = str_array.as_string::<i64>();
                                 find_in_set_right_literal::<Int64Type, _>(
                                     string_array,
-                                    str_list,
+                                    &str_list,
                                 )
                             }
                             DataType::Utf8View => {
                                 let string_array = str_array.as_string_view();
                                 find_in_set_right_literal::<Int32Type, _>(
                                     string_array,
-                                    str_list,
+                                    &str_list,
                                 )
                             }
                             other => {
-                                exec_err!("Unsupported data type {other:?} for function find_in_set")
+                                exec_err!(
+                                    "Unsupported data type {other:?} for function find_in_set"
+                                )
                             }
                         };
                         Arc::new(result?)
@@ -194,18 +196,26 @@ impl ScalarUDFImpl for FindInSetFunc {
                         let result = match str_list_array.data_type() {
                             DataType::Utf8 => {
                                 let str_list = str_list_array.as_string::<i32>();
-                                find_in_set_left_literal::<Int32Type, _>(string, str_list)
+                                find_in_set_left_literal::<Int32Type, _>(
+                                    &string, str_list,
+                                )
                             }
                             DataType::LargeUtf8 => {
                                 let str_list = str_list_array.as_string::<i64>();
-                                find_in_set_left_literal::<Int64Type, _>(string, str_list)
+                                find_in_set_left_literal::<Int64Type, _>(
+                                    &string, str_list,
+                                )
                             }
                             DataType::Utf8View => {
                                 let str_list = str_list_array.as_string_view();
-                                find_in_set_left_literal::<Int32Type, _>(string, str_list)
+                                find_in_set_left_literal::<Int32Type, _>(
+                                    &string, str_list,
+                                )
                             }
                             other => {
-                                exec_err!("Unsupported data type {other:?} for function find_in_set")
+                                exec_err!(
+                                    "Unsupported data type {other:?} for function find_in_set"
+                                )
                             }
                         };
                         Arc::new(result?)
@@ -216,7 +226,7 @@ impl ScalarUDFImpl for FindInSetFunc {
 
             // both inputs are arrays
             (ColumnarValue::Array(base_array), ColumnarValue::Array(exp_array)) => {
-                let res = find_in_set(base_array, exp_array)?;
+                let res = find_in_set(&base_array, &exp_array)?;
 
                 Ok(ColumnarValue::Array(res))
             }
@@ -234,7 +244,7 @@ impl ScalarUDFImpl for FindInSetFunc {
 /// Returns a value in the range of 1 to N if the string `str` is in the string list `strlist`
 /// consisting of N substrings. A string list is a string composed of substrings separated by `,`
 /// characters.
-fn find_in_set(str: ArrayRef, str_list: ArrayRef) -> Result<ArrayRef> {
+fn find_in_set(str: &ArrayRef, str_list: &ArrayRef) -> Result<ArrayRef> {
     match str.data_type() {
         DataType::Utf8 => {
             let string_array = str.as_string::<i32>();
@@ -257,10 +267,7 @@ fn find_in_set(str: ArrayRef, str_list: ArrayRef) -> Result<ArrayRef> {
     }
 }
 
-pub fn find_in_set_general<'a, T, V>(
-    string_array: V,
-    str_list_array: V,
-) -> Result<ArrayRef>
+fn find_in_set_general<'a, T, V>(string_array: V, str_list_array: V) -> Result<ArrayRef>
 where
     T: ArrowPrimitiveType,
     T::Native: OffsetSizeTrait,
@@ -289,10 +296,7 @@ where
     Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
-fn find_in_set_left_literal<'a, T, V>(
-    string: String,
-    str_list_array: V,
-) -> Result<ArrayRef>
+fn find_in_set_left_literal<'a, T, V>(string: &str, str_list_array: V) -> Result<ArrayRef>
 where
     T: ArrowPrimitiveType,
     T::Native: OffsetSizeTrait,
@@ -318,7 +322,7 @@ where
 
 fn find_in_set_right_literal<'a, T, V>(
     string_array: V,
-    str_list: Vec<&str>,
+    str_list: &[&str],
 ) -> Result<ArrayRef>
 where
     T: ArrowPrimitiveType,
diff --git a/datafusion/functions/src/unicode/initcap.rs b/datafusion/functions/src/unicode/initcap.rs
index 62862fbe78980..929b0c316951b 100644
--- a/datafusion/functions/src/unicode/initcap.rs
+++ b/datafusion/functions/src/unicode/initcap.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::DataType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass,
     Volatility,
diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs
index fceb2a131a2b0..ecff8f8699506 100644
--- a/datafusion/functions/src/unicode/left.rs
+++ b/datafusion/functions/src/unicode/left.rs
@@ -26,11 +26,11 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::Result;
 use datafusion_common::cast::{
     as_generic_string_array, as_int64_array, as_string_view_array,
 };
 use datafusion_common::exec_err;
-use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -122,7 +122,7 @@ impl ScalarUDFImpl for LeftFunc {
 /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
 /// left('abcde', 2) = 'ab'
 /// The implementation uses UTF-8 code points as characters
-pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let n_array = as_int64_array(&args[1])?;
 
     if args[0].data_type() == &DataType::Utf8View {
diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs
index 621dbd4970f26..527aaf1389aa6 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -19,17 +19,17 @@ use std::any::Any;
 use std::fmt::Write;
 use std::sync::Arc;
 
+use DataType::{LargeUtf8, Utf8, Utf8View};
 use arrow::array::{
     Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
     OffsetSizeTrait, StringArrayType, StringViewArray,
 };
 use arrow::datatypes::DataType;
 use unicode_segmentation::UnicodeSegmentation;
-use DataType::{LargeUtf8, Utf8, Utf8View};
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -129,7 +129,7 @@ impl ScalarUDFImpl for LPadFunc {
 /// Extends the string to length 'length' by prepending the characters fill (a space by default).
 /// If the string is already longer than length then it is truncated (on the right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() <= 1 || args.len() > 3 {
         return exec_err!(
             "lpad was called with {} arguments. It requires at least 2 and at most 3.",
@@ -141,7 +141,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     match (args.len(), args[0].data_type()) {
         (2, Utf8View) => lpad_impl::<&StringViewArray, &GenericStringArray<i32>, T>(
-            args[0].as_string_view(),
+            &args[0].as_string_view(),
             length_array,
             None,
         ),
@@ -149,14 +149,14 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             &GenericStringArray<T>,
             &GenericStringArray<T>,
             T,
-        >(args[0].as_string::<T>(), length_array, None),
+        >(&args[0].as_string::<T>(), length_array, None),
         (3, Utf8View) => lpad_with_replace::<&StringViewArray, T>(
-            args[0].as_string_view(),
+            &args[0].as_string_view(),
             length_array,
             &args[2],
         ),
         (3, Utf8 | LargeUtf8) => lpad_with_replace::<&GenericStringArray<T>, T>(
-            args[0].as_string::<T>(),
+            &args[0].as_string::<T>(),
             length_array,
             &args[2],
         ),
@@ -165,7 +165,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 fn lpad_with_replace<'a, V, T: OffsetSizeTrait>(
-    string_array: V,
+    string_array: &V,
     length_array: &Int64Array,
     fill_array: &'a ArrayRef,
 ) -> Result<ArrayRef>
@@ -195,7 +195,7 @@ where
 }
 
 fn lpad_impl<'a, V, V2, T>(
-    string_array: V,
+    string_array: &V,
     length_array: &Int64Array,
     fill_array: Option<V2>,
 ) -> Result<ArrayRef>
@@ -526,9 +526,13 @@ mod tests {
         );
 
         #[cfg(not(feature = "unicode_expressions"))]
-        test_lpad!(Some("josé".into()), ScalarValue::Int64(Some(5i64)), internal_err!(
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(5i64)),
+            internal_err!(
                 "function lpad requires compilation with feature flag: unicode_expressions."
-        ));
+            )
+        );
 
         Ok(())
     }
diff --git a/datafusion/functions/src/unicode/planner.rs b/datafusion/functions/src/unicode/planner.rs
index e4f29be3d13dc..38c82486416a6 100644
--- a/datafusion/functions/src/unicode/planner.rs
+++ b/datafusion/functions/src/unicode/planner.rs
@@ -17,9 +17,9 @@
 
 //! SQL planning extensions like [`UnicodeFunctionPlanner`]
 
+use datafusion_expr::Expr;
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::planner::{ExprPlanner, PlannerResult};
-use datafusion_expr::Expr;
 
 #[derive(Default, Debug)]
 pub struct UnicodeFunctionPlanner;
diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs
index 500e762ec250b..71a1e6842f8f3 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -19,16 +19,16 @@ use std::any::Any;
 use std::sync::Arc;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use DataType::{LargeUtf8, Utf8, Utf8View};
 use arrow::array::{
     Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait, StringArrayType,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
-use DataType::{LargeUtf8, Utf8, Utf8View};
 
 #[user_doc(
     doc_section(label = "String Functions"),
@@ -106,16 +106,16 @@ impl ScalarUDFImpl for ReverseFunc {
 
 /// Reverses the order of the characters in the string `reverse('abcde') = 'edcba'`.
 /// The implementation uses UTF-8 code points as characters
-pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args[0].data_type() == &Utf8View {
-        reverse_impl::<T, _>(args[0].as_string_view())
+        reverse_impl::<T, _>(&args[0].as_string_view())
     } else {
-        reverse_impl::<T, _>(args[0].as_string::<T>())
+        reverse_impl::<T, _>(&args[0].as_string::<T>())
     }
 }
 
 fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
-    string_array: V,
+    string_array: &V,
 ) -> Result<ArrayRef> {
     let mut builder = GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
 
diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs
index c492f606e9c5b..ac98a3f202a5b 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::cmp::{max, Ordering};
+use std::cmp::{Ordering, max};
 use std::sync::Arc;
 
 use arrow::array::{
@@ -26,11 +26,11 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::Result;
 use datafusion_common::cast::{
     as_generic_string_array, as_int64_array, as_string_view_array,
 };
 use datafusion_common::exec_err;
-use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -122,7 +122,7 @@ impl ScalarUDFImpl for RightFunc {
 /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
 /// right('abcde', 2) = 'de'
 /// The implementation uses UTF-8 code points as characters
-pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let n_array = as_int64_array(&args[1])?;
     if args[0].data_type() == &DataType::Utf8View {
         // string_view_right(args)
diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs
index 6ec78b07980b8..a3c2d501c9127 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -16,14 +16,15 @@
 // under the License.
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use DataType::{LargeUtf8, Utf8, Utf8View};
 use arrow::array::{
     ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
     OffsetSizeTrait, StringArrayType, StringViewArray,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::cast::as_int64_array;
 use datafusion_common::DataFusionError;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::cast::as_int64_array;
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -33,7 +34,6 @@ use std::any::Any;
 use std::fmt::Write;
 use std::sync::Arc;
 use unicode_segmentation::UnicodeSegmentation;
-use DataType::{LargeUtf8, Utf8, Utf8View};
 
 #[user_doc(
     doc_section(label = "String Functions"),
@@ -145,7 +145,7 @@ impl ScalarUDFImpl for RPadFunc {
     }
 }
 
-pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
+fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
     args: &[ArrayRef],
 ) -> Result<ArrayRef> {
     if args.len() < 2 || args.len() > 3 {
@@ -163,21 +163,21 @@ pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
     ) {
         (2, Utf8View, _) => {
             rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
-                args[0].as_string_view(),
+                &args[0].as_string_view(),
                 length_array,
                 None,
             )
         }
         (3, Utf8View, Some(Utf8View)) => {
             rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
-                args[0].as_string_view(),
+                &args[0].as_string_view(),
                 length_array,
                 Some(args[2].as_string_view()),
             )
         }
         (3, Utf8View, Some(Utf8 | LargeUtf8)) => {
             rpad_impl::<&StringViewArray, &GenericStringArray<FillArrayLen>, StringArrayLen>(
-                args[0].as_string_view(),
+                &args[0].as_string_view(),
                 length_array,
                 Some(args[2].as_string::<FillArrayLen>()),
             )
@@ -187,7 +187,7 @@ pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
             &StringViewArray,
             StringArrayLen,
         >(
-            args[0].as_string::<StringArrayLen>(),
+            &args[0].as_string::<StringArrayLen>(),
             length_array,
             Some(args[2].as_string_view()),
         ),
@@ -196,7 +196,7 @@ pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
             &GenericStringArray<FillArrayLen>,
             StringArrayLen,
         >(
-            args[0].as_string::<StringArrayLen>(),
+            &args[0].as_string::<StringArrayLen>(),
             length_array,
             args.get(2).map(|arg| arg.as_string::<FillArrayLen>()),
         ),
@@ -205,8 +205,8 @@ pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
 
 /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
 /// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
-    string_array: StringArrType,
+fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
+    string_array: &StringArrType,
     length_array: &Int64Array,
     fill_array: Option<FillArrType>,
 ) -> Result<ArrayRef>
diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs
index 4f238b2644bdf..1f980de9e8834 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::{
     ArrowNativeType, DataType, Field, FieldRef, Int32Type, Int64Type,
 };
 use datafusion_common::types::logical_string;
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::{
     Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass,
     Volatility,
@@ -130,47 +130,47 @@ fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
         (DataType::Utf8, DataType::Utf8) => {
             let string_array = args[0].as_string::<i32>();
             let substring_array = args[1].as_string::<i32>();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
         (DataType::Utf8, DataType::Utf8View) => {
             let string_array = args[0].as_string::<i32>();
             let substring_array = args[1].as_string_view();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
         (DataType::Utf8, DataType::LargeUtf8) => {
             let string_array = args[0].as_string::<i32>();
             let substring_array = args[1].as_string::<i64>();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
         (DataType::LargeUtf8, DataType::Utf8) => {
             let string_array = args[0].as_string::<i64>();
             let substring_array = args[1].as_string::<i32>();
-            calculate_strpos::<_, _, Int64Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int64Type>(&string_array, &substring_array)
         }
         (DataType::LargeUtf8, DataType::Utf8View) => {
             let string_array = args[0].as_string::<i64>();
             let substring_array = args[1].as_string_view();
-            calculate_strpos::<_, _, Int64Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int64Type>(&string_array, &substring_array)
         }
         (DataType::LargeUtf8, DataType::LargeUtf8) => {
             let string_array = args[0].as_string::<i64>();
             let substring_array = args[1].as_string::<i64>();
-            calculate_strpos::<_, _, Int64Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int64Type>(&string_array, &substring_array)
         }
         (DataType::Utf8View, DataType::Utf8View) => {
             let string_array = args[0].as_string_view();
             let substring_array = args[1].as_string_view();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
         (DataType::Utf8View, DataType::Utf8) => {
             let string_array = args[0].as_string_view();
             let substring_array = args[1].as_string::<i32>();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
         (DataType::Utf8View, DataType::LargeUtf8) => {
             let string_array = args[0].as_string_view();
             let substring_array = args[1].as_string::<i64>();
-            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+            calculate_strpos::<_, _, Int32Type>(&string_array, &substring_array)
         }
 
         other => {
@@ -183,8 +183,8 @@ fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
 /// strpos('high', 'ig') = 2
 /// The implementation uses UTF-8 code points as characters
 fn calculate_strpos<'a, V1, V2, T: ArrowPrimitiveType>(
-    string_array: V1,
-    substring_array: V2,
+    string_array: &V1,
+    substring_array: &V2,
 ) -> Result<ArrayRef>
 where
     V1: StringArrayType<'a, Item = &'a str>,
diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs
index 46b3cc63d0b6d..cc1d53b3aad67 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -27,9 +27,13 @@ use arrow::array::{
 use arrow::buffer::ScalarBuffer;
 use arrow::datatypes::DataType;
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, plan_err, Result};
+use datafusion_common::types::{
+    NativeType, logical_int32, logical_int64, logical_string,
+};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
+    TypeSignatureClass, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -44,7 +48,7 @@ use datafusion_macros::user_doc;
 | substr(Utf8("datafusion"),Int64(5),Int64(3)) |
 +----------------------------------------------+
 | fus                                          |
-+----------------------------------------------+ 
++----------------------------------------------+
 ```"#,
     standard_argument(name = "str", prefix = "String"),
     argument(
@@ -70,14 +74,30 @@ impl Default for SubstrFunc {
 
 impl SubstrFunc {
     pub fn new() -> Self {
+        let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string()));
+        let int64 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int64()),
+            vec![TypeSignatureClass::Native(logical_int32())],
+            NativeType::Int64,
+        );
         Self {
-            signature: Signature::user_defined(Volatility::Immutable)
-                .with_parameter_names(vec![
-                    "str".to_string(),
-                    "start_pos".to_string(),
-                    "length".to_string(),
-                ])
-                .expect("valid parameter names"),
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![string.clone(), int64.clone()]),
+                    TypeSignature::Coercible(vec![
+                        string.clone(),
+                        int64.clone(),
+                        int64.clone(),
+                    ]),
+                ],
+                Volatility::Immutable,
+            )
+            .with_parameter_names(vec![
+                "str".to_string(),
+                "start_pos".to_string(),
+                "length".to_string(),
+            ])
+            .expect("valid parameter names"),
             aliases: vec![String::from("substring")],
         }
     }
@@ -112,72 +132,6 @@ impl ScalarUDFImpl for SubstrFunc {
         &self.aliases
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() < 2 || arg_types.len() > 3 {
-            return plan_err!(
-                "The {} function requires 2 or 3 arguments, but got {}.",
-                self.name(),
-                arg_types.len()
-            );
-        }
-        let first_data_type = match &arg_types[0] {
-            DataType::Null => Ok(DataType::Utf8),
-            DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 => Ok(arg_types[0].clone()),
-            DataType::Dictionary(key_type, value_type) => {
-                if key_type.is_integer() {
-                    match value_type.as_ref() {
-                        DataType::Null => Ok(DataType::Utf8),
-                        DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 => Ok(*value_type.clone()),
-                        _ => plan_err!(
-                                "The first argument of the {} function can only be a string, but got {:?}.",
-                                self.name(),
-                                arg_types[0]
-                        ),
-                    }
-                } else {
-                    plan_err!(
-                        "The first argument of the {} function can only be a string, but got {:?}.",
-                        self.name(),
-                        arg_types[0]
-                    )
-                }
-            }
-            _ => plan_err!(
-                "The first argument of the {} function can only be a string, but got {:?}.",
-                self.name(),
-                arg_types[0]
-            )
-        }?;
-
-        if ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[1]) {
-            return plan_err!(
-                "The second argument of the {} function can only be an integer, but got {:?}.",
-                self.name(),
-                arg_types[1]
-            );
-        }
-
-        if arg_types.len() == 3
-            && ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[2])
-        {
-            return plan_err!(
-                "The third argument of the {} function can only be an integer, but got {:?}.",
-                self.name(),
-                arg_types[2]
-            );
-        }
-
-        if arg_types.len() == 2 {
-            Ok(vec![first_data_type.to_owned(), DataType::Int64])
-        } else {
-            Ok(vec![
-                first_data_type.to_owned(),
-                DataType::Int64,
-                DataType::Int64,
-            ])
-        }
-    }
-
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
@@ -187,7 +141,7 @@ impl ScalarUDFImpl for SubstrFunc {
 /// substr('alphabet', 3) = 'phabet'
 /// substr('alphabet', 3, 2) = 'ph'
 /// The implementation uses UTF-8 code points as characters
-pub fn substr(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn substr(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => {
             let string_array = args[0].as_string::<i32>();
@@ -410,7 +364,7 @@ fn string_view_substr(
         other => {
             return exec_err!(
                 "substr was called with {other} arguments. It requires 2 or 3."
-            )
+            );
         }
     }
 
@@ -516,7 +470,7 @@ mod tests {
     use arrow::array::{Array, StringViewArray};
     use arrow::datatypes::DataType::Utf8View;
 
-    use datafusion_common::{exec_err, Result, ScalarValue};
+    use datafusion_common::{Result, ScalarValue, exec_err};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
     use crate::unicode::substr::SubstrFunc;
diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs
index a7ee7388f9013..cd9d0702b4976 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -25,7 +25,7 @@ use arrow::array::{
 use arrow::datatypes::{DataType, Int32Type, Int64Type};
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
-use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_common::{Result, exec_err, utils::take_function_args};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -169,7 +169,7 @@ fn substr_index(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-pub fn substr_index_general<
+fn substr_index_general<
     'a,
     T: ArrowPrimitiveType,
     V: ArrayAccessor<Item = &'a str>,
diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs
index 911b8d311996e..db785f4f8836a 100644
--- a/datafusion/functions/src/unicode/translate.rs
+++ b/datafusion/functions/src/unicode/translate.rs
@@ -26,7 +26,7 @@ use datafusion_common::HashMap;
 use unicode_segmentation::UnicodeSegmentation;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs
index ffa238162b1ba..e4980728b18a0 100644
--- a/datafusion/functions/src/utils.rs
+++ b/datafusion/functions/src/utils.rs
@@ -17,11 +17,11 @@
 
 use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
 use arrow::compute::try_binary;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, DecimalType};
 use arrow::error::ArrowError;
 use datafusion_common::{DataFusionError, Result, ScalarValue};
-use datafusion_expr::function::Hint;
 use datafusion_expr::ColumnarValue;
+use datafusion_expr::function::Hint;
 use std::sync::Arc;
 
 /// Creates a function to identify the optimal return type of a string function given
@@ -134,8 +134,8 @@ pub fn calculate_binary_math<L, R, O, F>(
     fun: F,
 ) -> Result<Arc<PrimitiveArray<O>>>
 where
-    R: ArrowPrimitiveType,
     L: ArrowPrimitiveType,
+    R: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
     F: Fn(L::Native, R::Native) -> Result<O::Native, ArrowError>,
     R::Native: TryFrom<ScalarValue>,
@@ -144,14 +144,20 @@ where
     let right = right.cast_to(&R::DATA_TYPE, None)?;
     let result = match right {
         ColumnarValue::Scalar(scalar) => {
-            let right = R::Native::try_from(scalar.clone()).map_err(|_| {
-                DataFusionError::NotImplemented(format!(
-                    "Cannot convert scalar value {} to {}",
-                    &scalar,
-                    R::DATA_TYPE
-                ))
-            })?;
-            left.try_unary::<_, O, _>(|lvalue| fun(lvalue, right))?
+            if scalar.is_null() {
+                // Null scalar is castable to any numeric, creating a non-null expression.
+                // Provide null array explicitly to make result null
+                PrimitiveArray::<O>::new_null(1)
+            } else {
+                let right = R::Native::try_from(scalar.clone()).map_err(|_| {
+                    DataFusionError::NotImplemented(format!(
+                        "Cannot convert scalar value {} to {}",
+                        &scalar,
+                        R::DATA_TYPE
+                    ))
+                })?;
+                left.try_unary::<_, O, _>(|lvalue| fun(lvalue, right))?
+            }
         }
         ColumnarValue::Array(right) => {
             let right = right.as_primitive::<R>();
@@ -161,6 +167,36 @@ where
     Ok(Arc::new(result) as _)
 }
 
+/// Computes a binary math function for input arrays using a specified function
+/// and apply rescaling to given precision and scale.
+/// Generic types:
+/// - `L`: Left array decimal type
+/// - `R`: Right array primitive type
+/// - `O`: Output array decimal type
+/// - `F`: Functor computing `fun(l: L, r: R) -> Result<OutputType>`
+pub fn calculate_binary_decimal_math<L, R, O, F>(
+    left: &dyn Array,
+    right: &ColumnarValue,
+    fun: F,
+    precision: u8,
+    scale: i8,
+) -> Result<Arc<PrimitiveArray<O>>>
+where
+    L: DecimalType,
+    R: ArrowPrimitiveType,
+    O: DecimalType,
+    F: Fn(L::Native, R::Native) -> Result<O::Native, ArrowError>,
+    R::Native: TryFrom<ScalarValue>,
+{
+    let result_array = calculate_binary_math::<L, R, O, F>(left, right, fun)?;
+    Ok(Arc::new(
+        result_array
+            .as_ref()
+            .clone()
+            .with_precision_and_scale(precision, scale)?,
+    ))
+}
+
 /// Converts Decimal128 components (value and scale) to an unscaled i128
 pub fn decimal128_to_i128(value: i128, scale: i8) -> Result<i128, ArrowError> {
     if scale < 0 {
@@ -179,6 +215,40 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> Result<i128, ArrowError> {
     }
 }
 
+pub fn decimal32_to_i32(value: i32, scale: i8) -> Result<i32, ArrowError> {
+    if scale < 0 {
+        Err(ArrowError::ComputeError(
+            "Negative scale is not supported".into(),
+        ))
+    } else if scale == 0 {
+        Ok(value)
+    } else {
+        match 10_i32.checked_pow(scale as u32) {
+            Some(divisor) => Ok(value / divisor),
+            None => Err(ArrowError::ComputeError(format!(
+                "Cannot get a power of {scale}"
+            ))),
+        }
+    }
+}
+
+pub fn decimal64_to_i64(value: i64, scale: i8) -> Result<i64, ArrowError> {
+    if scale < 0 {
+        Err(ArrowError::ComputeError(
+            "Negative scale is not supported".into(),
+        ))
+    } else if scale == 0 {
+        Ok(value)
+    } else {
+        match i64::from(10).checked_pow(scale as u32) {
+            Some(divisor) => Ok(value / divisor),
+            None => Err(ArrowError::ComputeError(format!(
+                "Cannot get a power of {scale}"
+            ))),
+        }
+    }
+}
+
 #[cfg(test)]
 pub mod test {
     /// $FUNC ScalarUDFImpl to test
@@ -294,7 +364,7 @@ pub mod test {
     }
 
     use arrow::datatypes::DataType;
-    #[allow(unused_imports)]
+    use itertools::Either;
     pub(crate) use test_function;
 
     use super::*;
@@ -337,4 +407,106 @@ pub mod test {
             }
         }
     }
+
+    #[test]
+    fn test_decimal32_to_i32() {
+        let cases: [(i32, i8, Either<i32, String>); _] = [
+            (123, 0, Either::Left(123)),
+            (1230, 1, Either::Left(123)),
+            (123000, 3, Either::Left(123)),
+            (1234567, 2, Either::Left(12345)),
+            (-1234567, 2, Either::Left(-12345)),
+            (1, 0, Either::Left(1)),
+            (
+                123,
+                -3,
+                Either::Right("Negative scale is not supported".into()),
+            ),
+            (
+                123,
+                i8::MAX,
+                Either::Right("Cannot get a power of 127".into()),
+            ),
+            (999999999, 0, Either::Left(999999999)),
+            (999999999, 3, Either::Left(999999)),
+        ];
+
+        for (value, scale, expected) in cases {
+            match decimal32_to_i32(value, scale) {
+                Ok(actual) => {
+                    let expected_value =
+                        expected.left().expect("Got value but expected none");
+                    assert_eq!(
+                        actual, expected_value,
+                        "{value} and {scale} vs {expected_value:?}"
+                    );
+                }
+                Err(ArrowError::ComputeError(msg)) => {
+                    assert_eq!(
+                        msg,
+                        expected.right().expect("Got error but expected value")
+                    );
+                }
+                Err(_) => {
+                    assert!(expected.is_right())
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_decimal64_to_i64() {
+        let cases: [(i64, i8, Either<i64, String>); _] = [
+            (123, 0, Either::Left(123)),
+            (1234567890, 2, Either::Left(12345678)),
+            (-1234567890, 2, Either::Left(-12345678)),
+            (
+                123,
+                -3,
+                Either::Right("Negative scale is not supported".into()),
+            ),
+            (
+                123,
+                i8::MAX,
+                Either::Right("Cannot get a power of 127".into()),
+            ),
+            (
+                999999999999999999i64,
+                0,
+                Either::Left(999999999999999999i64),
+            ),
+            (
+                999999999999999999i64,
+                3,
+                Either::Left(999999999999999999i64 / 1000),
+            ),
+            (
+                -999999999999999999i64,
+                3,
+                Either::Left(-999999999999999999i64 / 1000),
+            ),
+        ];
+
+        for (value, scale, expected) in cases {
+            match decimal64_to_i64(value, scale) {
+                Ok(actual) => {
+                    let expected_value =
+                        expected.left().expect("Got value but expected none");
+                    assert_eq!(
+                        actual, expected_value,
+                        "{value} and {scale} vs {expected_value:?}"
+                    );
+                }
+                Err(ArrowError::ComputeError(msg)) => {
+                    assert_eq!(
+                        msg,
+                        expected.right().expect("Got error but expected value")
+                    );
+                }
+                Err(_) => {
+                    assert!(expected.is_right())
+                }
+            }
+        }
+    }
 }
diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml
index 35714bfe960ba..53691edf5a979 100644
--- a/datafusion/macros/Cargo.toml
+++ b/datafusion/macros/Cargo.toml
@@ -46,4 +46,4 @@ proc-macro = true
 [dependencies]
 datafusion-doc = { workspace = true }
 quote = "1.0.41"
-syn = { version = "2.0.108", features = ["full"] }
+syn = { version = "2.0.111", features = ["full"] }
diff --git a/datafusion/macros/src/user_doc.rs b/datafusion/macros/src/user_doc.rs
index 58c2cc2b1b2ac..27f73fd955380 100644
--- a/datafusion/macros/src/user_doc.rs
+++ b/datafusion/macros/src/user_doc.rs
@@ -20,12 +20,13 @@
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
 )]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![deny(clippy::allow_attributes)]
 
 extern crate proc_macro;
 use datafusion_doc::scalar_doc_sections::doc_sections_const;
 use proc_macro::TokenStream;
 use quote::quote;
-use syn::{parse_macro_input, DeriveInput, LitStr};
+use syn::{DeriveInput, LitStr, parse_macro_input};
 
 /// This procedural macro is intended to parse a rust custom attribute and create user documentation
 /// from it by constructing a `DocumentBuilder()` automatically. The `Documentation` can be
diff --git a/datafusion/optimizer/benches/projection_unnecessary.rs b/datafusion/optimizer/benches/projection_unnecessary.rs
index bdc59de4820b7..2082ed6a37515 100644
--- a/datafusion/optimizer/benches/projection_unnecessary.rs
+++ b/datafusion/optimizer/benches/projection_unnecessary.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use arrow::datatypes::{DataType, Field, Schema};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ToDFSchema;
 use datafusion_common::{Column, TableReference};
-use datafusion_expr::{logical_plan::LogicalPlan, projection_schema, Expr};
+use datafusion_expr::{Expr, logical_plan::LogicalPlan, projection_schema};
 use datafusion_optimizer::optimize_projections::is_projection_unnecessary;
 use std::hint::black_box;
 use std::sync::Arc;
diff --git a/datafusion/optimizer/src/analyzer/function_rewrite.rs b/datafusion/optimizer/src/analyzer/function_rewrite.rs
index c6bf14ebce2e3..9faa60d939fe3 100644
--- a/datafusion/optimizer/src/analyzer/function_rewrite.rs
+++ b/datafusion/optimizer/src/analyzer/function_rewrite.rs
@@ -23,9 +23,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{DFSchema, Result};
 
 use crate::utils::NamePreserver;
+use datafusion_expr::LogicalPlan;
 use datafusion_expr::expr_rewriter::FunctionRewrite;
 use datafusion_expr::utils::merge_schema;
-use datafusion_expr::LogicalPlan;
 use std::sync::Arc;
 
 /// Analyzer rule that invokes [`FunctionRewrite`]s on expressions
diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs
index 272692f983683..ddb3b828f01dd 100644
--- a/datafusion/optimizer/src/analyzer/mod.rs
+++ b/datafusion/optimizer/src/analyzer/mod.rs
@@ -22,9 +22,9 @@ use std::sync::Arc;
 
 use log::debug;
 
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::instant::Instant;
-use datafusion_common::Result;
 use datafusion_expr::expr_rewriter::FunctionRewrite;
 use datafusion_expr::{InvariantLevel, LogicalPlan};
 
diff --git a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
index 6381db63122dd..747c54e2cd26d 100644
--- a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
+++ b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
@@ -28,14 +28,14 @@ use arrow::datatypes::DataType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{
-    internal_datafusion_err, plan_err, Column, DFSchema, Result, ScalarValue,
+    Column, DFSchema, Result, ScalarValue, internal_datafusion_err, plan_err,
 };
 use datafusion_expr::expr::{AggregateFunction, Alias};
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::utils::grouping_set_to_exprlist;
 use datafusion_expr::{
-    bitwise_and, bitwise_or, bitwise_shift_left, bitwise_shift_right, cast, Aggregate,
-    Expr, Projection,
+    Aggregate, Expr, Projection, bitwise_and, bitwise_or, bitwise_shift_left,
+    bitwise_shift_right, cast,
 };
 use itertools::Itertools;
 
@@ -150,7 +150,7 @@ fn analyze_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
 fn is_grouping_function(expr: &Expr) -> bool {
     // TODO: Do something better than name here should grouping be a built
     // in expression?
-    matches!(expr, Expr::AggregateFunction(AggregateFunction { ref func, .. }) if func.name() == "grouping")
+    matches!(expr, Expr::AggregateFunction(AggregateFunction { func, .. }) if func.name() == "grouping")
 }
 
 fn contains_grouping_function(exprs: &[Expr]) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 4fb0f8553b4ba..0545ffb3b1add 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -17,21 +17,22 @@
 
 //! Optimizer rule for type validation and coercion
 
-use std::sync::Arc;
-
+use arrow::compute::can_cast_types;
 use datafusion_expr::binary::BinaryTypeCoercer;
-use itertools::{izip, Itertools as _};
-
-use arrow::datatypes::{DataType, Field, IntervalUnit, Schema};
+use itertools::{Itertools as _, izip};
+use std::sync::Arc;
 
 use crate::analyzer::AnalyzerRule;
 use crate::utils::NamePreserver;
+
+use arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
+use arrow::temporal_conversions::SECONDS_IN_DAY;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion_common::{
+    Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference,
     exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err,
-    plan_err, Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
-    TableReference,
+    plan_err,
 };
 use datafusion_expr::expr::{
     self, AggregateFunctionParams, Alias, Between, BinaryExpr, Case, Exists, InList,
@@ -50,9 +51,9 @@ use datafusion_expr::type_coercion::other::{
 use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_utf8view_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
-    is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    AggregateUDF, Expr, ExprSchemable, Join, Limit, LogicalPlan, Operator, Projection,
-    ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    AggregateUDF, Cast, Expr, ExprSchemable, Join, Limit, LogicalPlan, Operator,
+    Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, lit, not,
 };
 
 /// Performs type coercion by determining the schema
@@ -290,17 +291,150 @@ impl<'a> TypeCoercionRewriter<'a> {
         right: Expr,
         right_schema: &DFSchema,
     ) -> Result<(Expr, Expr)> {
-        let (left_type, right_type) = BinaryTypeCoercer::new(
-            &left.get_type(left_schema)?,
+        let left_data_type = left.get_type(left_schema)?;
+        let right_data_type = right.get_type(right_schema)?;
+        let (left_type, right_type) =
+            BinaryTypeCoercer::new(&left_data_type, &op, &right_data_type)
+                .get_input_types()?;
+        let left_cast_ok = can_cast_types(&left_data_type, &left_type);
+        let right_cast_ok = can_cast_types(&right_data_type, &right_type);
+
+        // handle special cases for
+        // * Date +/- int => Date
+        // * Date + time => Timestamp
+        let left_expr = if !left_cast_ok {
+            Self::coerce_date_time_math_op(
+                left,
+                &op,
+                &left_data_type,
+                &left_type,
+                &right_type,
+            )?
+        } else {
+            left.cast_to(&left_type, left_schema)?
+        };
+
+        let right_expr = if !right_cast_ok {
+            Self::coerce_date_time_math_op(
+                right,
+                &op,
+                &right_data_type,
+                &right_type,
+                &left_type,
+            )?
+        } else {
+            right.cast_to(&right_type, right_schema)?
+        };
+
+        Ok((left_expr, right_expr))
+    }
+
+    fn coerce_date_time_math_op(
+        expr: Expr,
+        op: &Operator,
+        left_current_type: &DataType,
+        left_target_type: &DataType,
+        right_target_type: &DataType,
+    ) -> Result<Expr, DataFusionError> {
+        use DataType::*;
+
+        fn cast(expr: Expr, target_type: DataType) -> Expr {
+            Expr::Cast(Cast::new(Box::new(expr), target_type))
+        }
+
+        fn time_to_nanos(
+            expr: Expr,
+            expr_type: &DataType,
+        ) -> Result<Expr, DataFusionError> {
+            let expr = match expr_type {
+                Time32(TimeUnit::Second) => {
+                    cast(cast(expr, Int32), Int64)
+                        * lit(ScalarValue::Int64(Some(1_000_000_000)))
+                }
+                Time32(TimeUnit::Millisecond) => {
+                    cast(cast(expr, Int32), Int64)
+                        * lit(ScalarValue::Int64(Some(1_000_000)))
+                }
+                Time64(TimeUnit::Microsecond) => {
+                    cast(expr, Int64) * lit(ScalarValue::Int64(Some(1_000)))
+                }
+                Time64(TimeUnit::Nanosecond) => cast(expr, Int64),
+                t => return internal_err!("Unexpected time data type {t}"),
+            };
+
+            Ok(expr)
+        }
+
+        let e = match (
             &op,
-            &right.get_type(right_schema)?,
-        )
-        .get_input_types()?;
+            &left_current_type,
+            &left_target_type,
+            &right_target_type,
+        ) {
+            // int +/- date => date
+            (
+                Operator::Plus | Operator::Minus,
+                Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64,
+                Interval(IntervalUnit::MonthDayNano),
+                Date32 | Date64,
+            ) => {
+                // cast to i64 first
+                let expr = match *left_current_type {
+                    Int64 => expr,
+                    _ => cast(expr, Int64),
+                };
+                // next, multiply by 86400 to get seconds
+                let expr = expr * lit(ScalarValue::from(SECONDS_IN_DAY));
+                // cast to duration
+                let expr = cast(expr, Duration(TimeUnit::Second));
+                // finally cast to interval
+                cast(expr, Interval(IntervalUnit::MonthDayNano))
+            }
+            // These might seem to be a bit convoluted, however for arrow to do date + time arithmetic
+            // date must be cast to Timestamp(Nanosecond) and time cast to Duration(Nanosecond)
+            // (they must be the same timeunit).
+            //
+            // For Time32/64 we first need to cast to an Int64, convert that to nanoseconds based
+            // on the time unit, then cast that to duration.
+            //
+            // Time + date -> timestamp or
+            (
+                Operator::Plus | Operator::Minus,
+                Time32(_) | Time64(_),
+                Duration(TimeUnit::Nanosecond),
+                Timestamp(TimeUnit::Nanosecond, None),
+            ) => {
+                // cast to int64, convert to nanoseconds
+                let expr = time_to_nanos(expr, left_current_type)?;
+                // cast to duration
+                cast(expr, Duration(TimeUnit::Nanosecond))
+            }
+            // Similar to above, for arrow to do time - time we need to convert to an interval.
+            // To do that we first need to cast to an Int64, convert that to nanoseconds based
+            // on the time unit, then cast that to duration, then finally cast to an interval.
+            //
+            // Time - time -> timestamp
+            (
+                Operator::Plus | Operator::Minus,
+                Time32(_) | Time64(_),
+                Interval(IntervalUnit::MonthDayNano),
+                Interval(IntervalUnit::MonthDayNano),
+            ) => {
+                // cast to int64, convert to nanoseconds
+                let expr = time_to_nanos(expr, left_current_type)?;
+                // cast to duration
+                let expr = cast(expr, Duration(TimeUnit::Nanosecond));
+                // finally cast to interval
+                cast(expr, Interval(IntervalUnit::MonthDayNano))
+            }
+            _ => {
+                return plan_err!(
+                    "Cannot automatically convert {left_current_type} to {left_target_type}"
+                );
+            }
+        };
 
-        Ok((
-            left.cast_to(&left_type, left_schema)?,
-            right.cast_to(&right_type, right_schema)?,
-        ))
+        Ok(e)
     }
 }
 
@@ -480,7 +614,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
                     get_coerce_type_for_list(&expr_data_type, &list_data_types);
                 match result_type {
                     None => plan_err!(
-                        "Can not find compatible types to compare {expr_data_type} with [{}]", list_data_types.iter().join(", ")
+                        "Can not find compatible types to compare {expr_data_type} with [{}]",
+                        list_data_types.iter().join(", ")
                     ),
                     Some(coerced_type) => {
                         // find the coerced type
@@ -491,9 +626,9 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
                                 list_expr.cast_to(&coerced_type, self.schema)
                             })
                             .collect::<Result<Vec<_>>>()?;
-                        Ok(Transformed::yes(Expr::InList(InList ::new(
-                             Box::new(cast_expr),
-                             cast_list_expr,
+                        Ok(Transformed::yes(Expr::InList(InList::new(
+                            Box::new(cast_expr),
+                            cast_list_expr,
                             negated,
                         ))))
                     }
@@ -1119,10 +1254,10 @@ mod test {
     use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, TimeUnit};
     use insta::assert_snapshot;
 
+    use crate::analyzer::Analyzer;
     use crate::analyzer::type_coercion::{
-        coerce_case_expression, TypeCoercion, TypeCoercionRewriter,
+        TypeCoercion, TypeCoercionRewriter, coerce_case_expression,
     };
-    use crate::analyzer::Analyzer;
     use crate::assert_analyzed_plan_with_config_eq_snapshot;
     use datafusion_common::config::ConfigOptions;
     use datafusion_common::tree_node::{TransformedResult, TreeNode};
@@ -1131,10 +1266,10 @@ mod test {
     use datafusion_expr::logical_plan::{EmptyRelation, Projection, Sort};
     use datafusion_expr::test::function_stub::avg_udaf;
     use datafusion_expr::{
-        cast, col, create_udaf, is_true, lit, AccumulatorFactoryFunction, AggregateUDF,
-        BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Filter, LogicalPlan,
-        Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
-        SimpleAggregateUDF, Subquery, Union, Volatility,
+        AccumulatorFactoryFunction, AggregateUDF, BinaryExpr, Case, ColumnarValue, Expr,
+        ExprSchemable, Filter, LogicalPlan, Operator, ScalarFunctionArgs, ScalarUDF,
+        ScalarUDFImpl, Signature, SimpleAggregateUDF, Subquery, Union, Volatility, cast,
+        col, create_udaf, is_true, lit,
     };
     use datafusion_functions_aggregate::average::AvgAccumulator;
     use datafusion_sql::TableReference;
@@ -1305,7 +1440,7 @@ mod test {
             true,
             plan.clone(),
             @r"
-        Projection: CAST(a AS LargeUtf8)
+        Projection: CAST(a AS LargeUtf8) AS a
           EmptyRelation: rows=0
         "
         )?;
@@ -1341,7 +1476,7 @@ mod test {
             true,
             plan.clone(),
             @r"
-        Projection: CAST(a AS LargeUtf8)
+        Projection: CAST(a AS LargeUtf8) AS a
           EmptyRelation: rows=0
         "
         )?;
@@ -1371,7 +1506,7 @@ mod test {
             true,
             sort_plan.clone(),
             @r"
-        Projection: CAST(a AS LargeUtf8)
+        Projection: CAST(a AS LargeUtf8) AS a
           Sort: a ASC NULLS FIRST
             Projection: a
               EmptyRelation: rows=0
@@ -1400,7 +1535,7 @@ mod test {
             true,
             plan.clone(),
             @r"
-        Projection: CAST(a AS LargeUtf8)
+        Projection: CAST(a AS LargeUtf8) AS a
           Sort: a ASC NULLS FIRST
             Projection: a
               EmptyRelation: rows=0
@@ -1436,7 +1571,7 @@ mod test {
             true,
             plan.clone(),
             @r"
-        Projection: CAST(a AS LargeBinary)
+        Projection: CAST(a AS LargeBinary) AS a
           EmptyRelation: rows=0
         "
         )?;
@@ -1493,7 +1628,7 @@ mod test {
             true,
             sort_plan.clone(),
             @r"
-        Projection: CAST(a AS LargeBinary)
+        Projection: CAST(a AS LargeBinary) AS a
           Sort: a ASC NULLS FIRST
             Projection: a
               EmptyRelation: rows=0
@@ -1524,7 +1659,7 @@ mod test {
             true,
             plan.clone(),
             @r"
-        Projection: CAST(a AS LargeBinary)
+        Projection: CAST(a AS LargeBinary) AS a
           Sort: a ASC NULLS FIRST
             Projection: a
               EmptyRelation: rows=0
@@ -1882,7 +2017,7 @@ mod test {
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
         assert_type_coercion_error(
             plan,
-            "Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean"
+            "Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean",
         )?;
 
         // is not true
@@ -2028,7 +2163,7 @@ mod test {
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
         assert_type_coercion_error(
             plan,
-            "Cannot infer common argument type for comparison operation Utf8 IS DISTINCT FROM Boolean"
+            "Cannot infer common argument type for comparison operation Utf8 IS DISTINCT FROM Boolean",
         )?;
 
         // is not unknown
@@ -2465,7 +2600,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: a = CAST(CAST(a AS Map("key_value": Struct("key": Utf8, "value": nullable Float64), unsorted)) AS Map("entries": Struct("key": Utf8, "value": nullable Float64), unsorted))
+        Projection: a = CAST(CAST(a AS Map("key_value": non-null Struct("key": non-null Utf8, "value": Float64), unsorted)) AS Map("entries": non-null Struct("key": non-null Utf8, "value": Float64), unsorted))
           EmptyRelation: rows=0
         "#
         )
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 2510068494591..d9273a8f60fb2 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -27,14 +27,14 @@ use crate::optimizer::ApplyOrder;
 use crate::utils::NamePreserver;
 use datafusion_common::alias::AliasGenerator;
 
-use datafusion_common::cse::{CSEController, FoundCommonNodes, CSE};
+use datafusion_common::cse::{CSE, CSEController, FoundCommonNodes};
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{qualified_name, Column, DFSchema, DFSchemaRef, Result};
+use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, qualified_name};
 use datafusion_expr::expr::{Alias, ScalarFunction};
 use datafusion_expr::logical_plan::{
     Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
 };
-use datafusion_expr::{col, BinaryExpr, Case, Expr, Operator, SortExpr};
+use datafusion_expr::{BinaryExpr, Case, Expr, Operator, SortExpr, col};
 
 const CSE_PREFIX: &str = "__common_expr";
 
@@ -814,11 +814,11 @@ mod test {
     use std::iter;
 
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_expr::logical_plan::{table_scan, JoinType};
+    use datafusion_expr::logical_plan::{JoinType, table_scan};
     use datafusion_expr::{
-        grouping_set, is_null, not, AccumulatorFactoryFunction, AggregateUDF,
-        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
-        SimpleAggregateUDF, Volatility,
+        AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarFunctionArgs,
+        ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF, Volatility,
+        grouping_set, is_null, not,
     };
     use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};
 
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 63236787743a4..e8a9c8c83ae93 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -26,15 +26,15 @@ use crate::simplify_expressions::ExprSimplifier;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
 };
-use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue};
+use datafusion_common::{Column, DFSchemaRef, HashMap, Result, ScalarValue, plan_err};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::utils::{
     collect_subquery_cols, conjunction, find_join_exprs, split_conjunction,
 };
 use datafusion_expr::{
-    expr, lit, BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan,
-    LogicalPlanBuilder, Operator,
+    BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan, LogicalPlanBuilder,
+    Operator, expr, lit,
 };
 use datafusion_physical_expr::execution_props::ExecutionProps;
 
diff --git a/datafusion/optimizer/src/decorrelate_lateral_join.rs b/datafusion/optimizer/src/decorrelate_lateral_join.rs
index 7d2072ad1ce99..a8c751ff46288 100644
--- a/datafusion/optimizer/src/decorrelate_lateral_join.rs
+++ b/datafusion/optimizer/src/decorrelate_lateral_join.rs
@@ -22,12 +22,12 @@ use std::collections::BTreeSet;
 use crate::decorrelate::PullUpCorrelatedExpr;
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_expr::{lit, Join};
+use datafusion_expr::{Join, lit};
 
+use datafusion_common::Result;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::Result;
 use datafusion_expr::logical_plan::JoinType;
 use datafusion_expr::utils::conjunction;
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
@@ -37,7 +37,7 @@ use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
 pub struct DecorrelateLateralJoin {}
 
 impl DecorrelateLateralJoin {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self::default()
     }
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 9e4e44b00770a..c8acb044876c4 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -27,14 +27,14 @@ use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_err, plan_err, Column, Result};
+use datafusion_common::{Column, Result, assert_or_internal_err, plan_err};
 use datafusion_expr::expr::{Exists, InSubquery};
 use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::utils::{conjunction, expr_to_columns, split_conjunction_owned};
 use datafusion_expr::{
-    exists, in_subquery, lit, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter,
-    LogicalPlan, LogicalPlanBuilder, Operator,
+    BinaryExpr, Expr, Filter, LogicalPlan, LogicalPlanBuilder, Operator, exists,
+    in_subquery, lit, not, not_exists, not_in_subquery,
 };
 
 use log::debug;
@@ -44,7 +44,7 @@ use log::debug;
 pub struct DecorrelatePredicateSubquery {}
 
 impl DecorrelatePredicateSubquery {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self::default()
     }
@@ -79,11 +79,10 @@ impl OptimizerRule for DecorrelatePredicateSubquery {
                 .into_iter()
                 .partition(has_subquery);
 
-        if with_subqueries.is_empty() {
-            return internal_err!(
-                "can not find expected subqueries in DecorrelatePredicateSubquery"
-            );
-        }
+        assert_or_internal_err!(
+            !with_subqueries.is_empty(),
+            "can not find expected subqueries in DecorrelatePredicateSubquery"
+        );
 
         // iterate through all exists clauses in predicate, turning each into a join
         let mut cur_input = Arc::unwrap_or_clone(filter.input);
@@ -365,8 +364,8 @@ fn build_join(
             })),
         ) => {
             let right_col = create_col_from_scalar_expr(right.deref(), alias)?;
-            let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
-            in_predicate
+
+            Expr::eq(left.deref().clone(), Expr::Column(right_col))
         }
         (None, None) => lit(true),
         _ => return Ok(None),
@@ -614,7 +613,7 @@ mod tests {
 
         assert_optimized_plan_equal!(
                 plan,
-                @r###"
+                @r"
         Projection: customer.c_custkey [c_custkey:Int64]
           LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8]
             LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]
@@ -625,7 +624,7 @@ mod tests {
             SubqueryAlias: __correlated_sq_2 [o_custkey:Int64]
               Projection: orders.o_custkey [o_custkey:Int64]
                 TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]
-        "###    
+        "    
         )
     }
 
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index ae1d7df46d52e..770291566346c 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -27,13 +27,13 @@ use datafusion_expr::logical_plan::{
     Filter, Join, JoinConstraint, JoinType, LogicalPlan, Projection,
 };
 use datafusion_expr::utils::{can_hash, find_valid_equijoin_key_pair};
-use datafusion_expr::{and, build_join_schema, ExprSchemable, Operator};
+use datafusion_expr::{ExprSchemable, Operator, and, build_join_schema};
 
 #[derive(Default, Debug)]
 pub struct EliminateCrossJoin;
 
 impl EliminateCrossJoin {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -276,10 +276,9 @@ fn can_flatten_join_inputs(plan: &LogicalPlan) -> bool {
             join_type: JoinType::Inner,
             ..
         }) = child
+            && !can_flatten_join_inputs(child)
         {
-            if !can_flatten_join_inputs(child) {
-                return false;
-            }
+            return false;
         }
     }
     true
@@ -316,10 +315,10 @@ fn find_inner_join(
             )?;
 
             // Save join keys
-            if let Some((valid_l, valid_r)) = key_pair {
-                if can_hash(&valid_l.get_type(left_input.schema())?) {
-                    join_keys.push((valid_l, valid_r));
-                }
+            if let Some((valid_l, valid_r)) = key_pair
+                && can_hash(&valid_l.get_type(left_input.schema())?)
+            {
+                join_keys.push((valid_l, valid_r));
             }
         }
 
@@ -449,9 +448,9 @@ mod tests {
     use crate::test::*;
 
     use datafusion_expr::{
+        Operator::{And, Or},
         binary_expr, col, lit,
         logical_plan::builder::LogicalPlanBuilder,
-        Operator::{And, Or},
     };
     use insta::assert_snapshot;
 
diff --git a/datafusion/optimizer/src/eliminate_duplicated_expr.rs b/datafusion/optimizer/src/eliminate_duplicated_expr.rs
index a6651df938a70..113c92c2c8e99 100644
--- a/datafusion/optimizer/src/eliminate_duplicated_expr.rs
+++ b/datafusion/optimizer/src/eliminate_duplicated_expr.rs
@@ -19,8 +19,8 @@
 
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
+use datafusion_common::tree_node::Transformed;
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{Aggregate, Expr, Sort, SortExpr};
 use std::hash::{Hash, Hasher};
@@ -32,7 +32,7 @@ use indexmap::IndexSet;
 pub struct EliminateDuplicatedExpr;
 
 impl EliminateDuplicatedExpr {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -118,9 +118,9 @@ impl OptimizerRule for EliminateDuplicatedExpr {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::test::*;
-    use crate::OptimizerContext;
     use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder};
     use std::sync::Arc;
 
diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs
index 1b763d6f8957b..8be5fb0857a9e 100644
--- a/datafusion/optimizer/src/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -34,7 +34,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 pub struct EliminateFilter;
 
 impl EliminateFilter {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -81,10 +81,10 @@ impl OptimizerRule for EliminateFilter {
 mod tests {
     use std::sync::Arc;
 
-    use crate::assert_optimized_plan_eq_snapshot;
     use crate::OptimizerContext;
+    use crate::assert_optimized_plan_eq_snapshot;
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{col, lit, logical_plan::builder::LogicalPlanBuilder, Expr};
+    use datafusion_expr::{Expr, col, lit, logical_plan::builder::LogicalPlanBuilder};
 
     use crate::eliminate_filter::EliminateFilter;
     use crate::test::*;
diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs
index 4e16fc0aa159c..e93edc62403a9 100644
--- a/datafusion/optimizer/src/eliminate_group_by_constant.rs
+++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs
@@ -19,8 +19,8 @@
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
-use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
+use datafusion_common::tree_node::Transformed;
 use datafusion_expr::{Aggregate, Expr, LogicalPlan, LogicalPlanBuilder, Volatility};
 
 /// Optimizer rule that removes constant expressions from `GROUP BY` clause
@@ -115,16 +115,16 @@ fn is_constant_expression(expr: &Expr) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::test::*;
-    use crate::OptimizerContext;
 
     use arrow::datatypes::DataType;
     use datafusion_common::Result;
     use datafusion_expr::expr::ScalarFunction;
     use datafusion_expr::{
-        col, lit, ColumnarValue, LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF,
-        ScalarUDFImpl, Signature, TypeSignature,
+        ColumnarValue, LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
+        Signature, TypeSignature, col, lit,
     };
 
     use datafusion_functions_aggregate::expr_fn::count;
diff --git a/datafusion/optimizer/src/eliminate_join.rs b/datafusion/optimizer/src/eliminate_join.rs
index 412bbea2ae92c..885910c1e4182 100644
--- a/datafusion/optimizer/src/eliminate_join.rs
+++ b/datafusion/optimizer/src/eliminate_join.rs
@@ -22,8 +22,8 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::JoinType::Inner;
 use datafusion_expr::{
-    logical_plan::{EmptyRelation, LogicalPlan},
     Expr,
+    logical_plan::{EmptyRelation, LogicalPlan},
 };
 
 /// Eliminates joins when join condition is false.
@@ -74,9 +74,9 @@ impl OptimizerRule for EliminateJoin {
 
 #[cfg(test)]
 mod tests {
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::eliminate_join::EliminateJoin;
-    use crate::OptimizerContext;
     use datafusion_common::Result;
     use datafusion_expr::JoinType::Inner;
     use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};
diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs
index 8e25d3246f6c2..e9ba535c96b97 100644
--- a/datafusion/optimizer/src/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -18,8 +18,8 @@
 //! [`EliminateLimit`] eliminates `LIMIT` when possible
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
+use datafusion_common::tree_node::Transformed;
 use datafusion_expr::logical_plan::{EmptyRelation, FetchType, LogicalPlan, SkipType};
 use std::sync::Arc;
 
@@ -34,7 +34,7 @@ use std::sync::Arc;
 pub struct EliminateLimit;
 
 impl EliminateLimit {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -77,7 +77,7 @@ impl OptimizerRule for EliminateLimit {
                 } else if matches!(limit.get_skip_type()?, SkipType::Literal(0)) {
                     // If fetch is `None` and skip is 0, then Limit takes no effect and
                     // we can remove it. Its input also can be Limit, so we should apply again.
-                    #[allow(clippy::used_underscore_binding)]
+                    #[expect(clippy::used_underscore_binding)]
                     return self.rewrite(Arc::unwrap_or_clone(limit.input), _config);
                 }
                 Ok(Transformed::no(LogicalPlan::Limit(limit)))
@@ -90,12 +90,12 @@ impl OptimizerRule for EliminateLimit {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::test::*;
     use crate::OptimizerContext;
+    use crate::test::*;
     use datafusion_common::Column;
     use datafusion_expr::{
         col,
-        logical_plan::{builder::LogicalPlanBuilder, JoinType},
+        logical_plan::{JoinType, builder::LogicalPlanBuilder},
     };
     use std::sync::Arc;
 
diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs
deleted file mode 100644
index 3e027811420c4..0000000000000
--- a/datafusion/optimizer/src/eliminate_one_union.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`EliminateOneUnion`]  eliminates single element `Union`
-
-use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::{tree_node::Transformed, Result};
-use datafusion_expr::logical_plan::{LogicalPlan, Union};
-use std::sync::Arc;
-
-use crate::optimizer::ApplyOrder;
-
-#[derive(Default, Debug)]
-/// An optimization rule that eliminates union with one element.
-pub struct EliminateOneUnion;
-
-impl EliminateOneUnion {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl OptimizerRule for EliminateOneUnion {
-    fn name(&self) -> &str {
-        "eliminate_one_union"
-    }
-
-    fn supports_rewrite(&self) -> bool {
-        true
-    }
-
-    fn rewrite(
-        &self,
-        plan: LogicalPlan,
-        _config: &dyn OptimizerConfig,
-    ) -> Result<Transformed<LogicalPlan>> {
-        match plan {
-            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok(
-                Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
-            ),
-            _ => Ok(Transformed::no(plan)),
-        }
-    }
-
-    fn apply_order(&self) -> Option<ApplyOrder> {
-        Some(ApplyOrder::TopDown)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::test::*;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::ToDFSchema;
-    use datafusion_expr::{
-        expr_rewriter::coerce_plan_expr_for_schema, logical_plan::table_scan,
-    };
-    use std::sync::Arc;
-
-    fn schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("key", DataType::Utf8, false),
-            Field::new("value", DataType::Int32, false),
-        ])
-    }
-
-    fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
-        assert_optimized_plan_with_rules(
-            vec![Arc::new(EliminateOneUnion::new())],
-            plan,
-            expected,
-            true,
-        )
-    }
-
-    #[test]
-    fn eliminate_nothing() -> Result<()> {
-        let plan_builder = table_scan(Some("table"), &schema(), None)?;
-
-        let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?;
-
-        let expected = "\
-        Union\
-        \n  TableScan: table\
-        \n  TableScan: table";
-        assert_optimized_plan_equal(plan, expected)
-    }
-
-    #[test]
-    fn eliminate_one_union() -> Result<()> {
-        let table_plan = coerce_plan_expr_for_schema(
-            table_scan(Some("table"), &schema(), None)?.build()?,
-            &schema().to_dfschema()?,
-        )?;
-        let schema = Arc::clone(table_plan.schema());
-        let single_union_plan = LogicalPlan::Union(Union {
-            inputs: vec![Arc::new(table_plan)],
-            schema,
-        });
-
-        let expected = "TableScan: table";
-        assert_optimized_plan_equal(single_union_plan, expected)
-    }
-}
diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs
index 45877642f2766..2c78051c14134 100644
--- a/datafusion/optimizer/src/eliminate_outer_join.rs
+++ b/datafusion/optimizer/src/eliminate_outer_join.rs
@@ -52,7 +52,7 @@ use std::sync::Arc;
 pub struct EliminateOuterJoin;
 
 impl EliminateOuterJoin {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -304,15 +304,15 @@ fn extract_non_nullable_columns(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::test::*;
-    use crate::OptimizerContext;
     use arrow::datatypes::DataType;
     use datafusion_expr::{
+        Operator::{And, Or},
         binary_expr, cast, col, lit,
         logical_plan::builder::LogicalPlanBuilder,
         try_cast,
-        Operator::{And, Or},
     };
 
     macro_rules! assert_optimized_plan_equal {
diff --git a/datafusion/optimizer/src/extract_equijoin_predicate.rs b/datafusion/optimizer/src/extract_equijoin_predicate.rs
index c76de942de805..a623faf8a2ff0 100644
--- a/datafusion/optimizer/src/extract_equijoin_predicate.rs
+++ b/datafusion/optimizer/src/extract_equijoin_predicate.rs
@@ -19,7 +19,7 @@
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
-use datafusion_common::{internal_err, DFSchema};
+use datafusion_common::{DFSchema, assert_or_internal_err};
 use datafusion_common::{NullEquality, Result};
 use datafusion_expr::utils::split_conjunction_owned;
 use datafusion_expr::utils::{can_hash, find_valid_equijoin_key_pair};
@@ -42,7 +42,7 @@ type EquijoinPredicate = (Expr, Expr);
 pub struct ExtractEquijoinPredicate;
 
 impl ExtractEquijoinPredicate {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -223,13 +223,12 @@ fn split_op_and_other_join_predicates(
     right_schema: &DFSchema,
     operator: Operator,
 ) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> {
-    if !matches!(operator, Operator::Eq | Operator::IsNotDistinctFrom) {
-        return internal_err!(
-            "split_op_and_other_join_predicates only supports 'Eq' or 'IsNotDistinctFrom' operators, \
-            but received: {:?}",
-            operator
-        );
-    }
+    assert_or_internal_err!(
+        matches!(operator, Operator::Eq | Operator::IsNotDistinctFrom),
+        "split_op_and_other_join_predicates only supports 'Eq' or 'IsNotDistinctFrom' operators, \
+        but received: {:?}",
+        operator
+    );
 
     let exprs = split_conjunction_owned(filter);
 
@@ -274,7 +273,7 @@ mod tests {
     use crate::test::*;
     use arrow::datatypes::DataType;
     use datafusion_expr::{
-        col, lit, logical_plan::builder::LogicalPlanBuilder, JoinType,
+        JoinType, col, lit, logical_plan::builder::LogicalPlanBuilder,
     };
     use std::sync::Arc;
 
diff --git a/datafusion/optimizer/src/filter_null_join_keys.rs b/datafusion/optimizer/src/filter_null_join_keys.rs
index 8ad7fa53c0e33..c8f419d3e543e 100644
--- a/datafusion/optimizer/src/filter_null_join_keys.rs
+++ b/datafusion/optimizer/src/filter_null_join_keys.rs
@@ -23,7 +23,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::{NullEquality, Result};
 use datafusion_expr::utils::conjunction;
-use datafusion_expr::{logical_plan::Filter, Expr, ExprSchemable, LogicalPlan};
+use datafusion_expr::{Expr, ExprSchemable, LogicalPlan, logical_plan::Filter};
 use std::sync::Arc;
 
 /// The FilterNullJoinKeys rule will identify joins with equi-join conditions
@@ -108,12 +108,12 @@ fn create_not_null_predicate(filters: Vec<Expr>) -> Expr {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::assert_optimized_plan_eq_snapshot;
     use crate::OptimizerContext;
+    use crate::assert_optimized_plan_eq_snapshot;
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::Column;
     use datafusion_expr::logical_plan::table_scan;
-    use datafusion_expr::{col, lit, JoinType, LogicalPlanBuilder};
+    use datafusion_expr::{JoinType, LogicalPlanBuilder, col, lit};
 
     macro_rules! assert_optimized_plan_equal {
         (
diff --git a/datafusion/optimizer/src/join_key_set.rs b/datafusion/optimizer/src/join_key_set.rs
index 0a97173b30966..de795c0aeacfa 100644
--- a/datafusion/optimizer/src/join_key_set.rs
+++ b/datafusion/optimizer/src/join_key_set.rs
@@ -157,7 +157,7 @@ impl Equivalent<(Expr, Expr)> for ExprPair<'_> {
 #[cfg(test)]
 mod test {
     use crate::join_key_set::JoinKeySet;
-    use datafusion_expr::{col, Expr};
+    use datafusion_expr::{Expr, col};
 
     #[test]
     fn test_insert() {
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 07ef2a46cba99..a1a59cb348876 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -23,8 +23,7 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
-// https://github.com/apache/datafusion/issues/18503
-#![deny(clippy::needless_pass_by_value)]
+#![deny(clippy::allow_attributes)]
 #![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! # DataFusion Optimizer
@@ -51,12 +50,17 @@ pub mod eliminate_filter;
 pub mod eliminate_group_by_constant;
 pub mod eliminate_join;
 pub mod eliminate_limit;
-pub mod eliminate_nested_union;
-pub mod eliminate_one_union;
+#[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
+pub mod eliminate_nested_union {
+    use crate::optimize_unions::OptimizeUnions;
+    #[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
+    pub type EliminateNestedUnion = OptimizeUnions;
+}
 pub mod eliminate_outer_join;
 pub mod extract_equijoin_predicate;
 pub mod filter_null_join_keys;
 pub mod optimize_projections;
+pub mod optimize_unions;
 pub mod optimizer;
 pub mod propagate_empty_relation;
 pub mod push_down_filter;
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 5db71417bc8fd..548eadffa242e 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -25,13 +25,13 @@ use std::collections::HashSet;
 use std::sync::Arc;
 
 use datafusion_common::{
-    get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, Column,
-    DFSchema, HashMap, JoinType, Result,
+    Column, DFSchema, HashMap, JoinType, Result, assert_eq_or_internal_err,
+    get_required_group_by_exprs_indices, internal_datafusion_err, internal_err,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::{
-    logical_plan::LogicalPlan, Aggregate, Distinct, EmptyRelation, Expr, Projection,
-    TableScan, Unnest, Window,
+    Aggregate, Distinct, EmptyRelation, Expr, Projection, TableScan, Unnest, Window,
+    logical_plan::LogicalPlan,
 };
 
 use crate::optimize_projections::required_indices::RequiredIndices;
@@ -77,7 +77,7 @@ use datafusion_common::tree_node::{
 pub struct OptimizeProjections {}
 
 impl OptimizeProjections {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -138,7 +138,7 @@ fn optimize_projections(
         LogicalPlan::Projection(proj) => {
             return merge_consecutive_projections(proj)?.transform_data(|proj| {
                 rewrite_projection_given_requirements(proj, config, &indices)
-            })
+            });
         }
         LogicalPlan::Aggregate(aggregate) => {
             // Split parent requirements to GROUP BY and aggregate sections:
@@ -341,11 +341,14 @@ fn optimize_projections(
                 return Ok(Transformed::no(plan));
             };
             let children = extension.node.inputs();
-            if children.len() != necessary_children_indices.len() {
-                return internal_err!("Inconsistent length between children and necessary children indices. \
-                Make sure `.necessary_children_exprs` implementation of the `UserDefinedLogicalNode` is \
-                consistent with actual children length for the node.");
-            }
+            assert_eq_or_internal_err!(
+                children.len(),
+                necessary_children_indices.len(),
+                "Inconsistent length between children and necessary children indices. \
+                Make sure `.necessary_children_exprs` implementation of the \
+                `UserDefinedLogicalNode` is consistent with actual children length \
+                for the node."
+            );
             children
                 .into_iter()
                 .zip(necessary_children_indices)
@@ -432,11 +435,11 @@ fn optimize_projections(
     // Required indices are currently ordered (child0, child1, ...)
     // but the loop pops off the last element, so we need to reverse the order
     child_required_indices.reverse();
-    if child_required_indices.len() != plan.inputs().len() {
-        return internal_err!(
-            "OptimizeProjection: child_required_indices length mismatch with plan inputs"
-        );
-    }
+    assert_eq_or_internal_err!(
+        child_required_indices.len(),
+        plan.inputs().len(),
+        "OptimizeProjection: child_required_indices length mismatch with plan inputs"
+    );
 
     // Rewrite children of the plan
     let transformed_plan = plan.map_children(|child| {
@@ -879,12 +882,11 @@ pub fn is_projection_unnecessary(
 /// subqueries like scalar, EXISTS, or IN. These cases prevent projection
 /// pushdown for now because we cannot safely reason about their column usage.
 fn plan_contains_other_subqueries(plan: &LogicalPlan, cte_name: &str) -> bool {
-    if let LogicalPlan::SubqueryAlias(alias) = plan {
-        if alias.alias.table() != cte_name
-            && !subquery_alias_targets_recursive_cte(alias.input.as_ref(), cte_name)
-        {
-            return true;
-        }
+    if let LogicalPlan::SubqueryAlias(alias) = plan
+        && alias.alias.table() != cte_name
+        && !subquery_alias_targets_recursive_cte(alias.input.as_ref(), cte_name)
+    {
+        return true;
     }
 
     let mut found = false;
@@ -954,14 +956,15 @@ mod tests {
     };
     use datafusion_expr::ExprFunctionExt;
     use datafusion_expr::{
-        binary_expr, build_join_schema,
+        BinaryExpr, Expr, Extension, Like, LogicalPlan, Operator, Projection,
+        UserDefinedLogicalNodeCore, WindowFunctionDefinition, binary_expr,
+        build_join_schema,
         builder::table_scan_with_filters,
         col,
         expr::{self, Cast},
         lit,
         logical_plan::{builder::LogicalPlanBuilder, table_scan},
-        not, try_cast, when, BinaryExpr, Expr, Extension, Like, LogicalPlan, Operator,
-        Projection, UserDefinedLogicalNodeCore, WindowFunctionDefinition,
+        not, try_cast, when,
     };
     use insta::assert_snapshot;
 
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/optimize_unions.rs
similarity index 79%
rename from datafusion/optimizer/src/eliminate_nested_union.rs
rename to datafusion/optimizer/src/optimize_unions.rs
index f8f93727cd9ba..900757b9a0607 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/optimize_unions.rs
@@ -15,30 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union`
+//! [`OptimizeUnions`]: removes `Union` nodes in the logical plan.
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
+use datafusion_common::tree_node::Transformed;
 use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
-use datafusion_expr::{Distinct, LogicalPlan, Union};
+use datafusion_expr::{Distinct, LogicalPlan, Projection, Union};
 use itertools::Itertools;
 use std::sync::Arc;
 
 #[derive(Default, Debug)]
-/// An optimization rule that replaces nested unions with a single union.
-pub struct EliminateNestedUnion;
+/// An optimization rule that
+/// 1. replaces nested unions with a single union.
+/// 2. removes unions with a single input.
+pub struct OptimizeUnions;
 
-impl EliminateNestedUnion {
-    #[allow(missing_docs)]
+impl OptimizeUnions {
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
 }
 
-impl OptimizerRule for EliminateNestedUnion {
+impl OptimizerRule for OptimizeUnions {
     fn name(&self) -> &str {
-        "eliminate_nested_union"
+        "optimize_unions"
     }
 
     fn apply_order(&self) -> Option<ApplyOrder> {
@@ -55,6 +57,9 @@ impl OptimizerRule for EliminateNestedUnion {
         _config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         match plan {
+            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok(
+                Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
+            ),
             LogicalPlan::Union(Union { inputs, schema }) => {
                 let inputs = inputs
                     .into_iter()
@@ -100,6 +105,38 @@ fn extract_plans_from_union(plan: Arc<LogicalPlan>) -> Vec<LogicalPlan> {
             .into_iter()
             .map(Arc::unwrap_or_clone)
             .collect::<Vec<_>>(),
+        // While unnesting, unwrap a Projection whose input is a nested Union,
+        // flatten the inner Union, and push the same Projection down onto
+        // each of the nested Union’s children.
+        //
+        // Example:
+        //   Union { Projection { Union { plan1, plan2 } }, plan3 }
+        //     => Union { Projection { plan1 }, Projection { plan2 }, plan3 }
+        LogicalPlan::Projection(Projection {
+            expr,
+            input,
+            schema,
+            ..
+        }) => match Arc::unwrap_or_clone(input) {
+            LogicalPlan::Union(Union { inputs, .. }) => inputs
+                .into_iter()
+                .map(Arc::unwrap_or_clone)
+                .map(|plan| {
+                    LogicalPlan::Projection(
+                        Projection::try_new_with_schema(
+                            expr.clone(),
+                            Arc::new(plan),
+                            Arc::clone(&schema),
+                        )
+                        .unwrap(),
+                    )
+                })
+                .collect::<Vec<_>>(),
+
+            plan => vec![LogicalPlan::Projection(
+                Projection::try_new_with_schema(expr, Arc::new(plan), schema).unwrap(),
+            )],
+        },
         plan => vec![plan],
     }
 }
@@ -114,10 +151,10 @@ fn extract_plan_from_distinct(plan: Arc<LogicalPlan>) -> Arc<LogicalPlan> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::analyzer::type_coercion::TypeCoercion;
+    use crate::OptimizerContext;
     use crate::analyzer::Analyzer;
+    use crate::analyzer::type_coercion::TypeCoercion;
     use crate::assert_optimized_plan_eq_snapshot;
-    use crate::OptimizerContext;
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{col, logical_plan::table_scan};
@@ -139,7 +176,7 @@ mod tests {
             let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())])
                 .execute_and_check($plan, &options, |_, _| {})?;
             let optimizer_ctx = OptimizerContext::new().with_max_passes(1);
-            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(EliminateNestedUnion::new())];
+            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(OptimizeUnions::new())];
             assert_optimized_plan_eq_snapshot!(
                 optimizer_ctx,
                 rules,
@@ -326,6 +363,27 @@ mod tests {
         ")
     }
 
+    #[test]
+    fn eliminate_nested_union_in_projection() -> Result<()> {
+        let plan_builder = table_scan(Some("table"), &schema(), None)?;
+
+        let plan = plan_builder
+            .clone()
+            .union(plan_builder.clone().build()?)?
+            .project(vec![col("id").alias("table_id"), col("key"), col("value")])?
+            .union(plan_builder.build()?)?
+            .build()?;
+
+        assert_optimized_plan_equal!(plan, @r"
+        Union
+          Projection: id AS table_id, key, value
+            TableScan: table
+          Projection: id AS table_id, key, value
+            TableScan: table
+          TableScan: table
+        ")
+    }
+
     #[test]
     fn eliminate_nested_union_with_type_cast_projection() -> Result<()> {
         let table_1 = table_scan(
@@ -420,4 +478,26 @@ mod tests {
               TableScan: table_1
         ")
     }
+
+    #[test]
+    fn eliminate_one_union() -> Result<()> {
+        let plan = table_scan(Some("table"), &schema(), None)?.build()?;
+        let schema = Arc::clone(plan.schema());
+        // note it is not possible to create a single input union via
+        // LogicalPlanBuilder so create it manually here
+        let plan = LogicalPlan::Union(Union {
+            inputs: vec![Arc::new(plan)],
+            schema,
+        });
+
+        // Note we can't use the same assert_optimized_plan_equal as creating a
+        // single input union is not possible via LogicalPlanBuilder and other passes
+        // throw errors / don't handle the schema correctly.
+        assert_optimized_plan_eq_snapshot!(
+            OptimizerContext::new().with_max_passes(1),
+            vec![Arc::new(OptimizeUnions::new())],
+            plan,
+            @"TableScan: table"
+        )
+    }
 }
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 084152d40e92c..ededcec0a47c9 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -22,14 +22,14 @@ use std::sync::Arc;
 
 use chrono::{DateTime, Utc};
 use datafusion_expr::registry::FunctionRegistry;
-use datafusion_expr::{assert_expected_schema, InvariantLevel};
+use datafusion_expr::{InvariantLevel, assert_expected_schema};
 use log::{debug, warn};
 
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::instant::Instant;
 use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result};
+use datafusion_common::{DFSchema, DataFusionError, HashSet, Result, internal_err};
 use datafusion_expr::logical_plan::LogicalPlan;
 
 use crate::common_subexpr_eliminate::CommonSubexprEliminate;
@@ -41,12 +41,11 @@ use crate::eliminate_filter::EliminateFilter;
 use crate::eliminate_group_by_constant::EliminateGroupByConstant;
 use crate::eliminate_join::EliminateJoin;
 use crate::eliminate_limit::EliminateLimit;
-use crate::eliminate_nested_union::EliminateNestedUnion;
-use crate::eliminate_one_union::EliminateOneUnion;
 use crate::eliminate_outer_join::EliminateOuterJoin;
 use crate::extract_equijoin_predicate::ExtractEquijoinPredicate;
 use crate::filter_null_join_keys::FilterNullJoinKeys;
 use crate::optimize_projections::OptimizeProjections;
+use crate::optimize_unions::OptimizeUnions;
 use crate::plan_signature::LogicalPlanSignature;
 use crate::propagate_empty_relation::PropagateEmptyRelation;
 use crate::push_down_filter::PushDownFilter;
@@ -228,7 +227,7 @@ impl Optimizer {
     /// Create a new optimizer using the recommended list of rules
     pub fn new() -> Self {
         let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
-            Arc::new(EliminateNestedUnion::new()),
+            Arc::new(OptimizeUnions::new()),
             Arc::new(SimplifyExpressions::new()),
             Arc::new(ReplaceDistinctWithAggregate::new()),
             Arc::new(EliminateJoin::new()),
@@ -241,8 +240,6 @@ impl Optimizer {
             Arc::new(EliminateCrossJoin::new()),
             Arc::new(EliminateLimit::new()),
             Arc::new(PropagateEmptyRelation::new()),
-            // Must be after PropagateEmptyRelation
-            Arc::new(EliminateOneUnion::new()),
             Arc::new(FilterNullJoinKeys::default()),
             Arc::new(EliminateOuterJoin::new()),
             // Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit
@@ -291,9 +288,7 @@ impl TreeNodeRewriter for Rewriter<'_> {
 
     fn f_down(&mut self, node: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
         if self.apply_order == ApplyOrder::TopDown {
-            {
-                self.rule.rewrite(node, self.config)
-            }
+            self.rule.rewrite(node, self.config)
         } else {
             Ok(Transformed::no(node))
         }
@@ -301,9 +296,7 @@ impl TreeNodeRewriter for Rewriter<'_> {
 
     fn f_up(&mut self, node: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
         if self.apply_order == ApplyOrder::BottomUp {
-            {
-                self.rule.rewrite(node, self.config)
-            }
+            self.rule.rewrite(node, self.config)
         } else {
             Ok(Transformed::no(node))
         }
@@ -467,10 +460,10 @@ mod tests {
 
     use datafusion_common::tree_node::Transformed;
     use datafusion_common::{
-        assert_contains, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result,
+        DFSchema, DFSchemaRef, DataFusionError, Result, assert_contains, plan_err,
     };
     use datafusion_expr::logical_plan::EmptyRelation;
-    use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder, Projection};
+    use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, Projection, col, lit};
 
     use crate::optimizer::Optimizer;
     use crate::test::test_table_scan;
diff --git a/datafusion/optimizer/src/plan_signature.rs b/datafusion/optimizer/src/plan_signature.rs
index 73e6b418272a9..6f46d7b663342 100644
--- a/datafusion/optimizer/src/plan_signature.rs
+++ b/datafusion/optimizer/src/plan_signature.rs
@@ -89,7 +89,7 @@ mod tests {
     use std::sync::Arc;
 
     use datafusion_common::{DFSchema, Result};
-    use datafusion_expr::{lit, LogicalPlan};
+    use datafusion_expr::{LogicalPlan, lit};
 
     use crate::plan_signature::get_node_number;
 
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index 4db3215dfb76a..b59295b1d717a 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -19,9 +19,9 @@
 
 use std::sync::Arc;
 
-use datafusion_common::tree_node::Transformed;
 use datafusion_common::JoinType;
-use datafusion_common::{plan_err, Result};
+use datafusion_common::tree_node::Transformed;
+use datafusion_common::{Result, plan_err};
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{EmptyRelation, Projection, Union};
 
@@ -33,7 +33,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 pub struct PropagateEmptyRelation;
 
 impl PropagateEmptyRelation {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -140,10 +140,10 @@ impl OptimizerRule for PropagateEmptyRelation {
                 }
             }
             LogicalPlan::Aggregate(ref agg) => {
-                if !agg.group_expr.is_empty() {
-                    if let Some(empty_plan) = empty_child(&plan)? {
-                        return Ok(Transformed::yes(empty_plan));
-                    }
+                if !agg.group_expr.is_empty()
+                    && let Some(empty_plan) = empty_child(&plan)?
+                {
+                    return Ok(Transformed::yes(empty_plan));
                 }
                 Ok(Transformed::no(LogicalPlan::Aggregate(agg.clone())))
             }
@@ -239,17 +239,17 @@ mod tests {
     use datafusion_common::{Column, DFSchema, JoinType};
     use datafusion_expr::logical_plan::table_scan;
     use datafusion_expr::{
-        binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, Operator,
+        Operator, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder,
     };
 
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::eliminate_filter::EliminateFilter;
-    use crate::eliminate_nested_union::EliminateNestedUnion;
+    use crate::optimize_unions::OptimizeUnions;
     use crate::test::{
         assert_optimized_plan_with_rules, test_table_scan, test_table_scan_fields,
         test_table_scan_with_name,
     };
-    use crate::OptimizerContext;
 
     use super::*;
 
@@ -277,7 +277,7 @@ mod tests {
         assert_optimized_plan_with_rules(
             vec![
                 Arc::new(EliminateFilter::new()),
-                Arc::new(EliminateNestedUnion::new()),
+                Arc::new(OptimizeUnions::new()),
                 Arc::new(PropagateEmptyRelation::new()),
             ],
             plan,
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 1c0790b3e3acd..755ffdbafc869 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -28,7 +28,8 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    internal_err, plan_err, qualified_name, Column, DFSchema, Result,
+    Column, DFSchema, Result, assert_eq_or_internal_err, assert_or_internal_err,
+    internal_err, plan_err, qualified_name,
 };
 use datafusion_expr::expr::WindowFunction;
 use datafusion_expr::expr_rewriter::replace_col;
@@ -37,7 +38,7 @@ use datafusion_expr::utils::{
     conjunction, expr_to_columns, split_conjunction, split_conjunction_owned,
 };
 use datafusion_expr::{
-    and, or, BinaryExpr, Expr, Filter, Operator, Projection, TableProviderFilterPushDown,
+    BinaryExpr, Expr, Filter, Operator, Projection, TableProviderFilterPushDown, and, or,
 };
 
 use crate::optimizer::ApplyOrder;
@@ -765,8 +766,9 @@ impl OptimizerRule for PushDownFilter {
     fn rewrite(
         &self,
         plan: LogicalPlan,
-        _config: &dyn OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
+        let _ = config.options();
         if let LogicalPlan::Join(join) = plan {
             return push_down_join(join, None);
         };
@@ -810,8 +812,7 @@ impl OptimizerRule for PushDownFilter {
                     new_predicate,
                     child_filter.input,
                 )?);
-                #[allow(clippy::used_underscore_binding)]
-                self.rewrite(new_filter, _config)
+                self.rewrite(new_filter, config)
             }
             LogicalPlan::Repartition(repartition) => {
                 let new_filter =
@@ -1135,12 +1136,13 @@ impl OptimizerRule for PushDownFilter {
                 let supported_filters = scan
                     .source
                     .supports_filters_pushdown(non_volatile_filters.as_slice())?;
-                if non_volatile_filters.len() != supported_filters.len() {
-                    return internal_err!(
-                        "Vec returned length: {} from supports_filters_pushdown is not the same size as the filters passed, which length is: {}",
-                        supported_filters.len(),
-                        non_volatile_filters.len());
-                }
+                assert_eq_or_internal_err!(
+                    non_volatile_filters.len(),
+                    supported_filters.len(),
+                    "Vec returned length: {} from supports_filters_pushdown is not the same size as the filters passed, which length is: {}",
+                    supported_filters.len(),
+                    non_volatile_filters.len()
+                );
 
                 // Compose scan filters from non-volatile filters of `Exact` or `Inexact` pushdown type
                 let zip = non_volatile_filters.into_iter().zip(supported_filters);
@@ -1370,15 +1372,13 @@ fn insert_below(
     })?;
 
     // make sure we did the actual replacement
-    if new_child.is_some() {
-        return internal_err!("node had no  inputs");
-    }
+    assert_or_internal_err!(new_child.is_none(), "node had no inputs");
 
     Ok(transformed_plan)
 }
 
 impl PushDownFilter {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -1435,17 +1435,17 @@ mod tests {
     use datafusion_expr::expr::{ScalarFunction, WindowFunction};
     use datafusion_expr::logical_plan::table_scan;
     use datafusion_expr::{
-        col, in_list, in_subquery, lit, ColumnarValue, ExprFunctionExt, Extension,
-        LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
-        TableSource, TableType, UserDefinedLogicalNodeCore, Volatility,
-        WindowFunctionDefinition,
+        ColumnarValue, ExprFunctionExt, Extension, LogicalPlanBuilder,
+        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TableSource, TableType,
+        UserDefinedLogicalNodeCore, Volatility, WindowFunctionDefinition, col, in_list,
+        in_subquery, lit,
     };
 
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::optimizer::Optimizer;
     use crate::simplify_expressions::SimplifyExpressions;
     use crate::test::*;
-    use crate::OptimizerContext;
     use datafusion_expr::test::function_stub::sum;
     use insta::assert_snapshot;
 
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 80d4a2de6679d..7b302adf22acc 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -23,11 +23,11 @@ use std::sync::Arc;
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
+use datafusion_common::Result;
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::utils::combine_limit;
-use datafusion_common::Result;
 use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan};
-use datafusion_expr::{lit, FetchType, SkipType};
+use datafusion_expr::{FetchType, SkipType, lit};
 
 /// Optimization rule that tries to push down `LIMIT`.
 //. It will push down through projection, limits (taking the smaller limit)
@@ -35,7 +35,7 @@ use datafusion_expr::{lit, FetchType, SkipType};
 pub struct PushDownLimit {}
 
 impl PushDownLimit {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -50,8 +50,9 @@ impl OptimizerRule for PushDownLimit {
     fn rewrite(
         &self,
         plan: LogicalPlan,
-        _config: &dyn OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
+        let _ = config.options();
         let LogicalPlan::Limit(mut limit) = plan else {
             return Ok(Transformed::no(plan));
         };
@@ -81,8 +82,7 @@ impl OptimizerRule for PushDownLimit {
             });
 
             // recursively reapply the rule on the new plan
-            #[allow(clippy::used_underscore_binding)]
-            return self.rewrite(plan, _config);
+            return self.rewrite(plan, config);
         }
 
         // no fetch to push, so return the original plan
@@ -281,8 +281,8 @@ mod test {
     use crate::OptimizerContext;
     use datafusion_common::DFSchemaRef;
     use datafusion_expr::{
-        col, exists, logical_plan::builder::LogicalPlanBuilder, Expr, Extension,
-        UserDefinedLogicalNodeCore,
+        Expr, Extension, UserDefinedLogicalNodeCore, col, exists,
+        logical_plan::builder::LogicalPlanBuilder,
     };
     use datafusion_functions_aggregate::expr_fn::max;
 
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 215f5e240d5de..f24ca1f119b61 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -25,8 +25,8 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Column, Result};
 use datafusion_expr::expr_rewriter::normalize_cols;
 use datafusion_expr::utils::expand_wildcard;
-use datafusion_expr::{col, lit, ExprFunctionExt, Limit, LogicalPlanBuilder};
 use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
+use datafusion_expr::{ExprFunctionExt, Limit, LogicalPlanBuilder, col, lit};
 
 /// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
 ///
@@ -69,7 +69,7 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 pub struct ReplaceDistinctWithAggregate {}
 
 impl ReplaceDistinctWithAggregate {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -214,7 +214,7 @@ mod tests {
     use crate::OptimizerContext;
     use datafusion_common::Result;
     use datafusion_expr::{
-        col, logical_plan::builder::LogicalPlanBuilder, table_scan, Expr,
+        Expr, col, logical_plan::builder::LogicalPlanBuilder, table_scan,
     };
     use datafusion_functions_aggregate::sum::sum;
 
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 48d1182527013..975c234b38836 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -30,11 +30,11 @@ use datafusion_common::alias::AliasGenerator;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
 };
-use datafusion_common::{internal_err, plan_err, Column, Result, ScalarValue};
+use datafusion_common::{Column, Result, ScalarValue, assert_or_internal_err, plan_err};
 use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::utils::conjunction;
-use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion_expr::{EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder, expr};
 
 /// Optimizer rule for rewriting subquery filters to joins
 /// and places additional projection on top of the filter, to preserve
@@ -43,7 +43,7 @@ use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder
 pub struct ScalarSubqueryToJoin {}
 
 impl ScalarSubqueryToJoin {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self::default()
     }
@@ -94,9 +94,10 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     config.alias_generator(),
                 )?;
 
-                if subqueries.is_empty() {
-                    return internal_err!("Expected subqueries not found in filter");
-                }
+                assert_or_internal_err!(
+                    !subqueries.is_empty(),
+                    "Expected subqueries not found in filter"
+                );
 
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = filter.input.as_ref().clone();
@@ -154,9 +155,10 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     all_subqueries.extend(subqueries);
                     expr_to_rewrite_expr_map.insert(expr, rewrite_exprs);
                 }
-                if all_subqueries.is_empty() {
-                    return internal_err!("Expected subqueries not found in projection");
-                }
+                assert_or_internal_err!(
+                    !all_subqueries.is_empty(),
+                    "Expected subqueries not found in projection"
+                );
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = projection.input.as_ref().clone();
                 for (subquery, alias) in all_subqueries {
@@ -164,29 +166,25 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                         build_join(&subquery, &cur_input, &alias)?
                     {
                         cur_input = optimized_subquery;
-                        if !expr_check_map.is_empty() {
-                            if let Some(expr) = subquery_to_expr_map.get(&subquery) {
-                                if let Some(rewrite_expr) =
-                                    expr_to_rewrite_expr_map.get(expr)
-                                {
-                                    let new_expr = rewrite_expr
-                                        .clone()
-                                        .transform_up(|expr| {
-                                            // replace column references with entry in map, if it exists
-                                            if let Some(map_expr) =
-                                                expr.try_as_col().and_then(|col| {
-                                                    expr_check_map.get(&col.name)
-                                                })
-                                            {
-                                                Ok(Transformed::yes(map_expr.clone()))
-                                            } else {
-                                                Ok(Transformed::no(expr))
-                                            }
-                                        })
-                                        .data()?;
-                                    expr_to_rewrite_expr_map.insert(expr, new_expr);
-                                }
-                            }
+                        if !expr_check_map.is_empty()
+                            && let Some(expr) = subquery_to_expr_map.get(&subquery)
+                            && let Some(rewrite_expr) = expr_to_rewrite_expr_map.get(expr)
+                        {
+                            let new_expr = rewrite_expr
+                                .clone()
+                                .transform_up(|expr| {
+                                    // replace column references with entry in map, if it exists
+                                    if let Some(map_expr) = expr
+                                        .try_as_col()
+                                        .and_then(|col| expr_check_map.get(&col.name))
+                                    {
+                                        Ok(Transformed::yes(map_expr.clone()))
+                                    } else {
+                                        Ok(Transformed::no(expr))
+                                    }
+                                })
+                                .data()?;
+                            expr_to_rewrite_expr_map.insert(expr, new_expr);
                         }
                     } else {
                         // if we can't handle all of the subqueries then bail for now
@@ -415,7 +413,7 @@ mod tests {
     use datafusion_expr::test::function_stub::sum;
 
     use crate::assert_optimized_plan_eq_display_indent_snapshot;
-    use datafusion_expr::{col, lit, out_ref_col, scalar_subquery, Between};
+    use datafusion_expr::{Between, col, lit, out_ref_col, scalar_subquery};
     use datafusion_functions_aggregate::min_max::{max, min};
 
     macro_rules! assert_optimized_plan_equal {
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index c7912bbf70b05..01de44cee1f60 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -18,7 +18,7 @@
 //! Expression simplification API
 
 use arrow::{
-    array::{new_null_array, AsArray},
+    array::{AsArray, new_null_array},
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
@@ -28,36 +28,37 @@ use std::ops::Not;
 use std::sync::Arc;
 
 use datafusion_common::{
+    DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err,
+};
+use datafusion_common::{
+    HashMap,
     cast::{as_large_list_array, as_list_array},
     metadata::FieldMetadata,
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
 };
-use datafusion_common::{
-    exec_datafusion_err, internal_err, DFSchema, DataFusionError, Result, ScalarValue,
-};
 use datafusion_expr::{
-    and, binary::BinaryTypeCoercer, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like,
-    Operator, Volatility,
+    BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, and,
+    binary::BinaryTypeCoercer, lit, or,
 };
+use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
 use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
 use datafusion_expr::{
     expr::{InList, InSubquery},
     utils::{iter_conjunction, iter_conjunction_owned},
 };
-use datafusion_expr::{simplify::ExprSimplifyResult, Cast, TryCast};
 use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
 
 use super::inlist_simplifier::ShortenInListSimplifier;
 use super::utils::*;
 use crate::analyzer::type_coercion::TypeCoercionRewriter;
-use crate::simplify_expressions::guarantees::GuaranteeRewriter;
+use crate::simplify_expressions::SimplifyInfo;
 use crate::simplify_expressions::regex::simplify_regex_expr;
 use crate::simplify_expressions::unwrap_cast::{
     is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
     is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
     unwrap_cast_in_comparison_for_binary,
 };
-use crate::simplify_expressions::SimplifyInfo;
+use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
 use datafusion_expr_common::casts::try_cast_literal_to_type;
 use indexmap::IndexSet;
 use regex::Regex;
@@ -200,7 +201,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         since = "48.0.0",
         note = "Use `simplify_with_cycle_count_transformed` instead"
     )]
-    #[allow(unused_mut)]
+    #[expect(unused_mut)]
     pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
         let (transformed, cycle_count) =
             self.simplify_with_cycle_count_transformed(expr)?;
@@ -226,7 +227,8 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         let mut simplifier = Simplifier::new(&self.info);
         let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
         let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
-        let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees);
+        let guarantees_map: HashMap<&Expr, &NullableInterval> =
+            self.guarantees.iter().map(|(k, v)| (k, v)).collect();
 
         if self.canonicalize {
             expr = expr.rewrite(&mut Canonicalizer::new()).data()?
@@ -243,7 +245,9 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
             } = expr
                 .rewrite(&mut const_evaluator)?
                 .transform_data(|expr| expr.rewrite(&mut simplifier))?
-                .transform_data(|expr| expr.rewrite(&mut guarantee_rewriter))?;
+                .transform_data(|expr| {
+                    rewrite_with_guarantees_map(expr, &guarantees_map)
+                })?;
             expr = data;
             num_cycles += 1;
             // Track if any transformation occurred
@@ -492,7 +496,6 @@ impl TreeNodeRewriter for Canonicalizer {
     }
 }
 
-#[allow(rustdoc::private_intra_doc_links)]
 /// Partially evaluate `Expr`s so constant subtrees are evaluated at plan time.
 ///
 /// Note it does not handle algebraic rewrites such as `(a or false)`
@@ -517,7 +520,6 @@ struct ConstEvaluator<'a> {
     input_batch: RecordBatch,
 }
 
-#[allow(dead_code)]
 /// The simplify result of ConstEvaluator
 enum ConstSimplifyResult {
     // Expr was simplified and contains the new expression
@@ -576,10 +578,9 @@ impl TreeNodeRewriter for ConstEvaluator<'_> {
                     // This provides clearer error messages and fails fast.
                     if let Expr::Cast(Cast { ref expr, .. })
                     | Expr::TryCast(TryCast { ref expr, .. }) = expr
+                        && matches!(expr.as_ref(), Expr::Literal(_, _))
                     {
-                        if matches!(expr.as_ref(), Expr::Literal(_, _)) {
-                            return Err(err);
-                        }
+                        return Err(err);
                     }
                     // For other expressions (like CASE, COALESCE), preserve the original
                     // to allow short-circuit evaluation at execution time
@@ -706,7 +707,10 @@ impl<'a> ConstEvaluator<'a> {
             ColumnarValue::Array(a) => {
                 if a.len() != 1 {
                     ConstSimplifyResult::SimplifyRuntimeError(
-                        exec_datafusion_err!("Could not evaluate the expression, found a result of length {}", a.len()),
+                        exec_datafusion_err!(
+                            "Could not evaluate the expression, found a result of length {}",
+                            a.len()
+                        ),
                         expr,
                     )
                 } else if as_list_array(&a).is_ok() {
@@ -1046,7 +1050,9 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
                         right: left_right,
                     }))
                 } else {
-                    return internal_err!("can_reduce_to_equal_statement should only be called with a BinaryExpr");
+                    return internal_err!(
+                        "can_reduce_to_equal_statement should only be called with a BinaryExpr"
+                    );
                 }
             }
 
@@ -2155,7 +2161,7 @@ mod tests {
     use crate::simplify_expressions::SimplifyContext;
     use crate::test::test_table_scan_with_name;
     use arrow::datatypes::FieldRef;
-    use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema};
+    use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains};
     use datafusion_expr::{
         expr::WindowFunction,
         function::{
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
deleted file mode 100644
index 515fd29003af9..0000000000000
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ /dev/null
@@ -1,476 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Simplifier implementation for [`ExprSimplifier::with_guarantees()`]
-//!
-//! [`ExprSimplifier::with_guarantees()`]: crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
-
-use std::{borrow::Cow, collections::HashMap};
-
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
-use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
-
-/// Rewrite expressions to incorporate guarantees.
-///
-/// Guarantees are a mapping from an expression (which currently is always a
-/// column reference) to a [NullableInterval]. The interval represents the known
-/// possible values of the column. Using these known values, expressions are
-/// rewritten so they can be simplified using `ConstEvaluator` and `Simplifier`.
-///
-/// For example, if we know that a column is not null and has values in the
-/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
-///
-/// See a full example in [`ExprSimplifier::with_guarantees()`].
-///
-/// [`ExprSimplifier::with_guarantees()`]: crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
-pub struct GuaranteeRewriter<'a> {
-    guarantees: HashMap<&'a Expr, &'a NullableInterval>,
-}
-
-impl<'a> GuaranteeRewriter<'a> {
-    pub fn new(
-        guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
-    ) -> Self {
-        Self {
-            // TODO: Clippy wants the "map" call removed, but doing so generates
-            //       a compilation error. Remove the clippy directive once this
-            //       issue is fixed.
-            #[allow(clippy::map_identity)]
-            guarantees: guarantees.into_iter().map(|(k, v)| (k, v)).collect(),
-        }
-    }
-}
-
-impl TreeNodeRewriter for GuaranteeRewriter<'_> {
-    type Node = Expr;
-
-    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
-        if self.guarantees.is_empty() {
-            return Ok(Transformed::no(expr));
-        }
-
-        match &expr {
-            Expr::IsNull(inner) => match self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => Ok(Transformed::yes(lit(true))),
-                Some(NullableInterval::NotNull { .. }) => {
-                    Ok(Transformed::yes(lit(false)))
-                }
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::IsNotNull(inner) => match self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => Ok(Transformed::yes(lit(false))),
-                Some(NullableInterval::NotNull { .. }) => Ok(Transformed::yes(lit(true))),
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::Between(Between {
-                expr: inner,
-                negated,
-                low,
-                high,
-            }) => {
-                if let (Some(interval), Expr::Literal(low, _), Expr::Literal(high, _)) = (
-                    self.guarantees.get(inner.as_ref()),
-                    low.as_ref(),
-                    high.as_ref(),
-                ) {
-                    let expr_interval = NullableInterval::NotNull {
-                        values: Interval::try_new(low.clone(), high.clone())?,
-                    };
-
-                    let contains = expr_interval.contains(*interval)?;
-
-                    if contains.is_certainly_true() {
-                        Ok(Transformed::yes(lit(!negated)))
-                    } else if contains.is_certainly_false() {
-                        Ok(Transformed::yes(lit(*negated)))
-                    } else {
-                        Ok(Transformed::no(expr))
-                    }
-                } else {
-                    Ok(Transformed::no(expr))
-                }
-            }
-
-            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-                // The left or right side of expression might either have a guarantee
-                // or be a literal. Either way, we can resolve them to a NullableInterval.
-                let left_interval = self
-                    .guarantees
-                    .get(left.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = left.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-                let right_interval = self
-                    .guarantees
-                    .get(right.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = right.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-
-                match (left_interval, right_interval) {
-                    (Some(left_interval), Some(right_interval)) => {
-                        let result =
-                            left_interval.apply_operator(op, right_interval.as_ref())?;
-                        if result.is_certainly_true() {
-                            Ok(Transformed::yes(lit(true)))
-                        } else if result.is_certainly_false() {
-                            Ok(Transformed::yes(lit(false)))
-                        } else {
-                            Ok(Transformed::no(expr))
-                        }
-                    }
-                    _ => Ok(Transformed::no(expr)),
-                }
-            }
-
-            // Columns (if interval is collapsed to a single value)
-            Expr::Column(_) => {
-                if let Some(interval) = self.guarantees.get(&expr) {
-                    Ok(Transformed::yes(interval.single_value().map_or(expr, lit)))
-                } else {
-                    Ok(Transformed::no(expr))
-                }
-            }
-
-            Expr::InList(InList {
-                expr: inner,
-                list,
-                negated,
-            }) => {
-                if let Some(interval) = self.guarantees.get(inner.as_ref()) {
-                    // Can remove items from the list that don't match the guarantee
-                    let new_list: Vec<Expr> = list
-                        .iter()
-                        .filter_map(|expr| {
-                            if let Expr::Literal(item, _) = expr {
-                                match interval
-                                    .contains(NullableInterval::from(item.clone()))
-                                {
-                                    // If we know for certain the value isn't in the column's interval,
-                                    // we can skip checking it.
-                                    Ok(interval) if interval.is_certainly_false() => None,
-                                    Ok(_) => Some(Ok(expr.clone())),
-                                    Err(e) => Some(Err(e)),
-                                }
-                            } else {
-                                Some(Ok(expr.clone()))
-                            }
-                        })
-                        .collect::<Result<_, DataFusionError>>()?;
-
-                    Ok(Transformed::yes(Expr::InList(InList {
-                        expr: inner.clone(),
-                        list: new_list,
-                        negated: *negated,
-                    })))
-                } else {
-                    Ok(Transformed::no(expr))
-                }
-            }
-
-            _ => Ok(Transformed::no(expr)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use arrow::datatypes::DataType;
-    use datafusion_common::tree_node::{TransformedResult, TreeNode};
-    use datafusion_common::ScalarValue;
-    use datafusion_expr::{col, Operator};
-
-    #[test]
-    fn test_null_handling() {
-        // IsNull / IsNotNull can be rewritten to true / false
-        let guarantees = [
-            // Note: AlwaysNull case handled by test_column_single_value test,
-            // since it's a special case of a column with a single value.
-            (
-                col("x"),
-                NullableInterval::NotNull {
-                    values: Interval::make_unbounded(&DataType::Boolean).unwrap(),
-                },
-            ),
-        ];
-        let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
-        // x IS NULL => guaranteed false
-        let expr = col("x").is_null();
-        let output = expr.rewrite(&mut rewriter).data().unwrap();
-        assert_eq!(output, lit(false));
-
-        // x IS NOT NULL => guaranteed true
-        let expr = col("x").is_not_null();
-        let output = expr.rewrite(&mut rewriter).data().unwrap();
-        assert_eq!(output, lit(true));
-    }
-
-    fn validate_simplified_cases<T>(rewriter: &mut GuaranteeRewriter, cases: &[(Expr, T)])
-    where
-        ScalarValue: From<T>,
-        T: Clone,
-    {
-        for (expr, expected_value) in cases {
-            let output = expr.clone().rewrite(rewriter).data().unwrap();
-            let expected = lit(ScalarValue::from(expected_value.clone()));
-            assert_eq!(
-                output, expected,
-                "{expr} simplified to {output}, but expected {expected}"
-            );
-        }
-    }
-
-    fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) {
-        for expr in cases {
-            let output = expr.clone().rewrite(rewriter).data().unwrap();
-            assert_eq!(
-                &output, expr,
-                "{expr} was simplified to {output}, but expected it to be unchanged"
-            );
-        }
-    }
-
-    #[test]
-    fn test_inequalities_non_null_unbounded() {
-        let guarantees = [
-            // y ∈ [2021-01-01, ∞) (not null)
-            (
-                col("x"),
-                NullableInterval::NotNull {
-                    values: Interval::try_new(
-                        ScalarValue::Date32(Some(18628)),
-                        ScalarValue::Date32(None),
-                    )
-                    .unwrap(),
-                },
-            ),
-        ];
-        let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
-        // (original_expr, expected_simplification)
-        let simplified_cases = &[
-            (col("x").lt(lit(ScalarValue::Date32(Some(18628)))), false),
-            (col("x").lt_eq(lit(ScalarValue::Date32(Some(17000)))), false),
-            (col("x").gt(lit(ScalarValue::Date32(Some(18627)))), true),
-            (col("x").gt_eq(lit(ScalarValue::Date32(Some(18628)))), true),
-            (col("x").eq(lit(ScalarValue::Date32(Some(17000)))), false),
-            (col("x").not_eq(lit(ScalarValue::Date32(Some(17000)))), true),
-            (
-                col("x").between(
-                    lit(ScalarValue::Date32(Some(16000))),
-                    lit(ScalarValue::Date32(Some(17000))),
-                ),
-                false,
-            ),
-            (
-                col("x").not_between(
-                    lit(ScalarValue::Date32(Some(16000))),
-                    lit(ScalarValue::Date32(Some(17000))),
-                ),
-                true,
-            ),
-            (
-                Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(col("x")),
-                    op: Operator::IsDistinctFrom,
-                    right: Box::new(lit(ScalarValue::Null)),
-                }),
-                true,
-            ),
-            (
-                Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(col("x")),
-                    op: Operator::IsDistinctFrom,
-                    right: Box::new(lit(ScalarValue::Date32(Some(17000)))),
-                }),
-                true,
-            ),
-        ];
-
-        validate_simplified_cases(&mut rewriter, simplified_cases);
-
-        let unchanged_cases = &[
-            col("x").lt(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").lt_eq(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").gt(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").gt_eq(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").eq(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").not_eq(lit(ScalarValue::Date32(Some(19000)))),
-            col("x").between(
-                lit(ScalarValue::Date32(Some(18000))),
-                lit(ScalarValue::Date32(Some(19000))),
-            ),
-            col("x").not_between(
-                lit(ScalarValue::Date32(Some(18000))),
-                lit(ScalarValue::Date32(Some(19000))),
-            ),
-        ];
-
-        validate_unchanged_cases(&mut rewriter, unchanged_cases);
-    }
-
-    #[test]
-    fn test_inequalities_maybe_null() {
-        let guarantees = [
-            // x ∈ ("abc", "def"]? (maybe null)
-            (
-                col("x"),
-                NullableInterval::MaybeNull {
-                    values: Interval::try_new(
-                        ScalarValue::from("abc"),
-                        ScalarValue::from("def"),
-                    )
-                    .unwrap(),
-                },
-            ),
-        ];
-        let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
-        // (original_expr, expected_simplification)
-        let simplified_cases = &[
-            (
-                Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(col("x")),
-                    op: Operator::IsDistinctFrom,
-                    right: Box::new(lit("z")),
-                }),
-                true,
-            ),
-            (
-                Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(col("x")),
-                    op: Operator::IsNotDistinctFrom,
-                    right: Box::new(lit("z")),
-                }),
-                false,
-            ),
-        ];
-
-        validate_simplified_cases(&mut rewriter, simplified_cases);
-
-        let unchanged_cases = &[
-            col("x").lt(lit("z")),
-            col("x").lt_eq(lit("z")),
-            col("x").gt(lit("a")),
-            col("x").gt_eq(lit("a")),
-            col("x").eq(lit("abc")),
-            col("x").not_eq(lit("a")),
-            col("x").between(lit("a"), lit("z")),
-            col("x").not_between(lit("a"), lit("z")),
-            Expr::BinaryExpr(BinaryExpr {
-                left: Box::new(col("x")),
-                op: Operator::IsDistinctFrom,
-                right: Box::new(lit(ScalarValue::Null)),
-            }),
-        ];
-
-        validate_unchanged_cases(&mut rewriter, unchanged_cases);
-    }
-
-    #[test]
-    fn test_column_single_value() {
-        let scalars = [
-            ScalarValue::Null,
-            ScalarValue::Int32(Some(1)),
-            ScalarValue::Boolean(Some(true)),
-            ScalarValue::Boolean(None),
-            ScalarValue::from("abc"),
-            ScalarValue::LargeUtf8(Some("def".to_string())),
-            ScalarValue::Date32(Some(18628)),
-            ScalarValue::Date32(None),
-            ScalarValue::Decimal128(Some(1000), 19, 2),
-        ];
-
-        for scalar in scalars {
-            let guarantees = [(col("x"), NullableInterval::from(scalar.clone()))];
-            let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
-            let output = col("x").rewrite(&mut rewriter).data().unwrap();
-            assert_eq!(output, Expr::Literal(scalar.clone(), None));
-        }
-    }
-
-    #[test]
-    fn test_in_list() {
-        let guarantees = [
-            // x ∈ [1, 10] (not null)
-            (
-                col("x"),
-                NullableInterval::NotNull {
-                    values: Interval::try_new(
-                        ScalarValue::Int32(Some(1)),
-                        ScalarValue::Int32(Some(10)),
-                    )
-                    .unwrap(),
-                },
-            ),
-        ];
-        let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
-        // These cases should be simplified so the list doesn't contain any
-        // values the guarantee says are outside the range.
-        // (column_name, starting_list, negated, expected_list)
-        let cases = &[
-            // x IN (9, 11) => x IN (9)
-            ("x", vec![9, 11], false, vec![9]),
-            // x IN (10, 2) => x IN (10, 2)
-            ("x", vec![10, 2], false, vec![10, 2]),
-            // x NOT IN (9, 11) => x NOT IN (9)
-            ("x", vec![9, 11], true, vec![9]),
-            // x NOT IN (0, 22) => x NOT IN ()
-            ("x", vec![0, 22], true, vec![]),
-        ];
-
-        for (column_name, starting_list, negated, expected_list) in cases {
-            let expr = col(*column_name).in_list(
-                starting_list
-                    .iter()
-                    .map(|v| lit(ScalarValue::Int32(Some(*v))))
-                    .collect(),
-                *negated,
-            );
-            let output = expr.clone().rewrite(&mut rewriter).data().unwrap();
-            let expected_list = expected_list
-                .iter()
-                .map(|v| lit(ScalarValue::Int32(Some(*v))))
-                .collect();
-            assert_eq!(
-                output,
-                Expr::InList(InList {
-                    expr: Box::new(col(*column_name)),
-                    list: expected_list,
-                    negated: *negated,
-                })
-            );
-        }
-    }
-}
diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs
index a1c1dc17d2945..17112d4f0ae24 100644
--- a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs
@@ -19,10 +19,10 @@
 
 use super::THRESHOLD_INLINE_INLIST;
 
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion_common::Result;
-use datafusion_expr::expr::InList;
+use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion_expr::Expr;
+use datafusion_expr::expr::InList;
 
 pub(super) struct ShortenInListSimplifier {}
 
@@ -43,52 +43,50 @@ impl TreeNodeRewriter for ShortenInListSimplifier {
             ref list,
             negated,
         }) = expr
+            && !list.is_empty()
+            && (
+                // For lists with only 1 value we allow more complex expressions to be simplified
+                // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
+                // for more than one we avoid repeating this potentially expensive
+                // expressions
+                list.len() == 1
+                    || list.len() <= THRESHOLD_INLINE_INLIST
+                        && expr.try_as_col().is_some()
+            )
         {
-            if !list.is_empty()
-                && (
-                    // For lists with only 1 value we allow more complex expressions to be simplified
-                    // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
-                    // for more than one we avoid repeating this potentially expensive
-                    // expressions
-                    list.len() == 1
-                        || list.len() <= THRESHOLD_INLINE_INLIST
-                            && expr.try_as_col().is_some()
-                )
-            {
-                let first_val = list[0].clone();
-                if negated {
-                    return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
-                        (*expr.clone()).not_eq(first_val),
-                        |acc, y| {
-                            // Note that `A and B and C and D` is a left-deep tree structure
-                            // as such we want to maintain this structure as much as possible
-                            // to avoid reordering the expression during each optimization
-                            // pass.
-                            //
-                            // Left-deep tree structure for `A and B and C and D`:
-                            // ```
-                            //        &
-                            //       / \
-                            //      &   D
-                            //     / \
-                            //    &   C
-                            //   / \
-                            //  A   B
-                            // ```
-                            //
-                            // The code below maintain the left-deep tree structure.
-                            acc.and((*expr.clone()).not_eq(y))
-                        },
-                    )));
-                } else {
-                    return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
-                        (*expr.clone()).eq(first_val),
-                        |acc, y| {
-                            // Same reasoning as above
-                            acc.or((*expr.clone()).eq(y))
-                        },
-                    )));
-                }
+            let first_val = list[0].clone();
+            if negated {
+                return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
+                    (*expr.clone()).not_eq(first_val),
+                    |acc, y| {
+                        // Note that `A and B and C and D` is a left-deep tree structure
+                        // as such we want to maintain this structure as much as possible
+                        // to avoid reordering the expression during each optimization
+                        // pass.
+                        //
+                        // Left-deep tree structure for `A and B and C and D`:
+                        // ```
+                        //        &
+                        //       / \
+                        //      &   D
+                        //     / \
+                        //    &   C
+                        //   / \
+                        //  A   B
+                        // ```
+                        //
+                        // The code below maintain the left-deep tree structure.
+                        acc.and((*expr.clone()).not_eq(y))
+                    },
+                )));
+            } else {
+                return Ok(Transformed::yes(list.iter().skip(1).cloned().fold(
+                    (*expr.clone()).eq(first_val),
+                    |acc, y| {
+                        // Same reasoning as above
+                        acc.or((*expr.clone()).eq(y))
+                    },
+                )));
             }
         }
 
diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs
index 7ae38eec9a3ad..e238fca32689d 100644
--- a/datafusion/optimizer/src/simplify_expressions/mod.rs
+++ b/datafusion/optimizer/src/simplify_expressions/mod.rs
@@ -19,7 +19,6 @@
 //! [`ExprSimplifier`] simplifies individual `Expr`s.
 
 pub mod expr_simplifier;
-mod guarantees;
 mod inlist_simplifier;
 mod regex;
 pub mod simplify_exprs;
@@ -35,4 +34,4 @@ pub use simplify_exprs::*;
 pub use simplify_predicates::simplify_predicates;
 
 // Export for test in datafusion/core/tests/optimizer_integration.rs
-pub use guarantees::GuaranteeRewriter;
+pub use datafusion_expr::expr_rewriter::GuaranteeRewriter;
diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs b/datafusion/optimizer/src/simplify_expressions/regex.rs
index 82c5ea3d8d820..d388aaf74cdac 100644
--- a/datafusion/optimizer/src/simplify_expressions/regex.rs
+++ b/datafusion/optimizer/src/simplify_expressions/regex.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion_common::{DataFusionError, Result, ScalarValue};
-use datafusion_expr::{lit, BinaryExpr, Expr, Like, Operator};
+use datafusion_expr::{BinaryExpr, Expr, Like, Operator, lit};
 use regex_syntax::hir::{Capture, Hir, HirKind, Literal, Look};
 
 /// Maximum number of regex alternations (`foo|bar|...`) that will be expanded into multiple `LIKE` expressions.
@@ -68,10 +68,10 @@ pub fn simplify_regex_expr(
             Ok(hir) => {
                 let kind = hir.kind();
                 if let HirKind::Alternation(alts) = kind {
-                    if alts.len() <= MAX_REGEX_ALTERNATIONS_EXPANSION {
-                        if let Some(expr) = lower_alt(&mode, &left, alts) {
-                            return Ok(expr);
-                        }
+                    if alts.len() <= MAX_REGEX_ALTERNATIONS_EXPANSION
+                        && let Some(expr) = lower_alt(&mode, &left, alts)
+                    {
+                        return Ok(expr);
                     }
                 } else if let Some(expr) = lower_simple(&mode, &left, &hir) {
                     return Ok(expr);
@@ -287,11 +287,11 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {
             let mut literals = Vec::with_capacity(alters.len());
             for hir in alters {
                 let mut is_safe = false;
-                if let HirKind::Literal(l) = hir.kind() {
-                    if let Some(safe_literal) = str_from_literal(l).map(lit) {
-                        literals.push(safe_literal);
-                        is_safe = true;
-                    }
+                if let HirKind::Literal(l) = hir.kind()
+                    && let Some(safe_literal) = str_from_literal(l).map(lit)
+                {
+                    literals.push(safe_literal);
+                    is_safe = true;
                 }
 
                 if !is_safe {
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 4faf9389cfac4..1b25c5ce8a632 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -21,11 +21,11 @@ use std::sync::Arc;
 
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result};
+use datafusion_expr::Expr;
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::utils::merge_schema;
-use datafusion_expr::Expr;
 
 use crate::optimizer::ApplyOrder;
 use crate::utils::NamePreserver;
@@ -143,7 +143,7 @@ impl SimplifyExpressions {
 }
 
 impl SimplifyExpressions {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -161,9 +161,9 @@ mod tests {
     use datafusion_expr::*;
     use datafusion_functions_aggregate::expr_fn::{max, min};
 
+    use crate::OptimizerContext;
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::test::{assert_fields_eq, test_table_scan_with_name};
-    use crate::OptimizerContext;
 
     use super::*;
 
@@ -219,7 +219,7 @@ mod tests {
 
         assert_optimized_plan_equal!(
             table_scan,
-            @ r"TableScan: test projection=[a], full_filters=[Boolean(true)]"
+            @ "TableScan: test projection=[a], full_filters=[Boolean(true)]"
         )
     }
 
@@ -252,10 +252,10 @@ mod tests {
         assert_optimized_plan_equal!(
             plan,
             @ r"
-            Filter: test.b > Int32(1)
-              Projection: test.a
-                TableScan: test
-            "
+        Filter: test.b > Int32(1)
+          Projection: test.a
+            TableScan: test
+        "
         )
     }
 
@@ -270,10 +270,10 @@ mod tests {
         assert_optimized_plan_equal!(
             plan,
             @ r"
-            Filter: test.b > Int32(1)
-              Projection: test.a
-                TableScan: test
-            "
+        Filter: test.b > Int32(1)
+          Projection: test.a
+            TableScan: test
+        "
         )
     }
 
@@ -492,8 +492,7 @@ mod tests {
             .build()?;
 
         let actual = get_optimized_plan_formatted(plan, &time);
-        let expected =
-            "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\
+        let expected = "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\
                         \n  TableScan: test";
 
         assert_eq!(expected, actual);
diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
index b1f3b006e0cfc..b2349db8c4605 100644
--- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
+++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
@@ -55,10 +55,10 @@
 //! ```
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, tree_node::Transformed};
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::{lit, BinaryExpr};
-use datafusion_expr::{simplify::SimplifyInfo, Cast, Expr, Operator, TryCast};
+use datafusion_common::{internal_err, tree_node::Transformed};
+use datafusion_expr::{BinaryExpr, lit};
+use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyInfo};
 use datafusion_expr_common::casts::{is_supported_type, try_cast_literal_to_type};
 
 pub(super) fn unwrap_cast_in_comparison_for_binary<S: SimplifyInfo>(
diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs
index 35e256f3064e3..1f214e3d365c9 100644
--- a/datafusion/optimizer/src/simplify_expressions/utils.rs
+++ b/datafusion/optimizer/src/simplify_expressions/utils.rs
@@ -18,11 +18,11 @@
 //! Utility functions for expression simplification
 
 use arrow::datatypes::i256;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::{
+    Case, Expr, Like, Operator,
     expr::{Between, BinaryExpr, InList},
     expr_fn::{and, bitwise_and, bitwise_or, or},
-    Case, Expr, Like, Operator,
 };
 
 pub static POWS_OF_TEN: [i128; 38] = [
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index e9a23c7c4dc50..05edd230daccb 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -23,15 +23,14 @@ use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::{
-    internal_err, tree_node::Transformed, DataFusionError, HashSet, Result,
+    DataFusionError, HashSet, Result, assert_eq_or_internal_err, tree_node::Transformed,
 };
 use datafusion_expr::builder::project;
 use datafusion_expr::expr::AggregateFunctionParams;
 use datafusion_expr::{
-    col,
+    Expr, col,
     expr::AggregateFunction,
     logical_plan::{Aggregate, LogicalPlan},
-    Expr,
 };
 
 /// single distinct to group by optimizer rule
@@ -56,7 +55,7 @@ pub struct SingleDistinctToGroupBy {}
 const SINGLE_DISTINCT_ALIAS: &str = "alias1";
 
 impl SingleDistinctToGroupBy {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -183,15 +182,21 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                     .map(|aggr_expr| match aggr_expr {
                         Expr::AggregateFunction(AggregateFunction {
                             func,
-                            params: AggregateFunctionParams { mut args, distinct, .. }
+                            params:
+                                AggregateFunctionParams {
+                                    mut args, distinct, ..
+                                },
                         }) => {
                             if distinct {
-                                if args.len() != 1 {
-                                    return internal_err!("DISTINCT aggregate should have exactly one argument");
-                                }
+                                assert_eq_or_internal_err!(
+                                    args.len(),
+                                    1,
+                                    "DISTINCT aggregate should have exactly one argument"
+                                );
                                 let arg = args.swap_remove(0);
 
-                                if group_fields_set.insert(arg.schema_name().to_string()) {
+                                if group_fields_set.insert(arg.schema_name().to_string())
+                                {
                                     inner_group_exprs
                                         .push(arg.alias(SINGLE_DISTINCT_ALIAS));
                                 }
@@ -282,8 +287,8 @@ mod tests {
     use super::*;
     use crate::assert_optimized_plan_eq_display_indent_snapshot;
     use crate::test::*;
-    use datafusion_expr::expr::GroupingSet;
     use datafusion_expr::ExprFunctionExt;
+    use datafusion_expr::expr::GroupingSet;
     use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::expr_fn::{count, count_distinct, max, min, sum};
diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs
index 6e0b734bb9280..a45983950496d 100644
--- a/datafusion/optimizer/src/test/mod.rs
+++ b/datafusion/optimizer/src/test/mod.rs
@@ -20,8 +20,8 @@ use crate::optimizer::Optimizer;
 use crate::{OptimizerContext, OptimizerRule};
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{assert_contains, Result};
-use datafusion_expr::{logical_plan::table_scan, LogicalPlan, LogicalPlanBuilder};
+use datafusion_common::{Result, assert_contains};
+use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, logical_plan::table_scan};
 use std::sync::Arc;
 
 pub mod user_defined;
diff --git a/datafusion/optimizer/src/test/user_defined.rs b/datafusion/optimizer/src/test/user_defined.rs
index a39f90b5da5db..878ce274d5ed6 100644
--- a/datafusion/optimizer/src/test/user_defined.rs
+++ b/datafusion/optimizer/src/test/user_defined.rs
@@ -19,8 +19,8 @@
 
 use datafusion_common::DFSchemaRef;
 use datafusion_expr::{
-    logical_plan::{Extension, UserDefinedLogicalNodeCore},
     Expr, LogicalPlan,
+    logical_plan::{Extension, UserDefinedLogicalNodeCore},
 };
 use std::{
     fmt::{self, Debug},
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 81763fa0552fb..7e038d2392022 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -20,14 +20,14 @@
 use std::collections::{BTreeSet, HashMap, HashSet};
 
 use crate::analyzer::type_coercion::TypeCoercionRewriter;
-use arrow::array::{new_null_array, Array, RecordBatch};
+use arrow::array::{Array, RecordBatch, new_null_array};
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::tree_node::{TransformedResult, TreeNode};
 use datafusion_common::{Column, DFSchema, Result, ScalarValue};
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr_rewriter::replace_col;
-use datafusion_expr::{logical_plan::LogicalPlan, ColumnarValue, Expr};
+use datafusion_expr::{ColumnarValue, Expr, logical_plan::LogicalPlan};
 use datafusion_physical_expr::create_physical_expr;
 use log::{debug, trace};
 use std::sync::Arc;
@@ -154,7 +154,7 @@ fn coerce(expr: Expr, schema: &DFSchema) -> Result<Expr> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use datafusion_expr::{binary_expr, case, col, in_list, is_null, lit, Operator};
+    use datafusion_expr::{Operator, binary_expr, case, col, in_list, is_null, lit};
 
     #[test]
     fn expr_is_restrict_null_predicate() -> Result<()> {
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index c0f48b8ebfc40..36a6df54ddaf0 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -22,7 +22,7 @@ use std::sync::Arc;
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{plan_err, Result, TableReference};
+use datafusion_common::{Result, TableReference, plan_err};
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::test::function_stub::sum_udaf;
 use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF};
@@ -67,22 +67,22 @@ fn recursive_cte_with_nested_subquery() -> Result<()> {
 
     assert_snapshot!(
         format!("{plan}"),
-        @r#"
-        SubqueryAlias: numbers
-          Projection: sub.id AS id, sub.level AS level
-            RecursiveQuery: is_distinct=false
-              Projection: sub.id, sub.level
-                SubqueryAlias: sub
-                  Projection: test.col_int32 AS id, Int64(1) AS level
-                    TableScan: test
-              Projection: t.col_int32, numbers.level + Int64(1)
-                Inner Join: CAST(t.col_int32 AS Int64) = CAST(numbers.id AS Int64) + Int64(1)
-                  SubqueryAlias: t
-                    Filter: CAST(test.col_int32 AS Int64) IS NOT NULL
-                      TableScan: test
-                  Filter: CAST(numbers.id AS Int64) + Int64(1) IS NOT NULL
-                    TableScan: numbers
-        "#
+        @r"
+    SubqueryAlias: numbers
+      Projection: sub.id AS id, sub.level AS level
+        RecursiveQuery: is_distinct=false
+          Projection: sub.id, sub.level
+            SubqueryAlias: sub
+              Projection: test.col_int32 AS id, Int64(1) AS level
+                TableScan: test
+          Projection: t.col_int32, numbers.level + Int64(1)
+            Inner Join: CAST(t.col_int32 AS Int64) = CAST(numbers.id AS Int64) + Int64(1)
+              SubqueryAlias: t
+                Filter: CAST(test.col_int32 AS Int64) IS NOT NULL
+                  TableScan: test
+              Filter: CAST(numbers.id AS Int64) + Int64(1) IS NOT NULL
+                TableScan: numbers
+    "
     );
 
     Ok(())
@@ -95,10 +95,10 @@ fn case_when() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END
-  TableScan: test projection=[col_int32]
-"#
+    @r"
+    Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END
+      TableScan: test projection=[col_int32]
+    "
     );
 
     let sql = "SELECT CASE WHEN col_uint32 > 0 THEN 1 ELSE 0 END FROM test";
@@ -106,10 +106,10 @@ Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END
 
     assert_snapshot!(
         format!("{plan}"),
-    @r#"
+    @r"
     Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_uint32 > Int64(0) THEN Int64(1) ELSE Int64(0) END
       TableScan: test projection=[col_uint32]
-    "#
+    "
     );
     Ok(())
 }
@@ -149,11 +149,11 @@ fn case_when_aggregate() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n
-          Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]]
-            TableScan: test projection=[col_int32, col_utf8]
-        "#
+    @r"
+    Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n
+      Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]]
+        TableScan: test projection=[col_int32, col_utf8]
+    "
     );
     Ok(())
 }
@@ -165,11 +165,11 @@ fn unsigned_target_type() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
+    @r"
     Projection: test.col_utf8
       Filter: test.col_uint32 > UInt32(0)
         TableScan: test projection=[col_uint32, col_utf8]
-    "#
+    "
     );
     Ok(())
 }
@@ -182,10 +182,10 @@ fn distribute_by() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Repartition: DistributeBy(test.col_utf8)
-          TableScan: test projection=[col_int32, col_utf8]
-        "#
+    @r"
+    Repartition: DistributeBy(test.col_utf8)
+      TableScan: test projection=[col_int32, col_utf8]
+    "
     );
     Ok(())
 }
@@ -200,16 +200,16 @@ fn semi_join_with_join_filter() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Projection: test.col_utf8
-          LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
+    @r"
+    Projection: test.col_utf8
+      LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
+        Filter: test.col_int32 IS NOT NULL
+          TableScan: test projection=[col_int32, col_uint32, col_utf8]
+        SubqueryAlias: __correlated_sq_1
+          SubqueryAlias: t2
             Filter: test.col_int32 IS NOT NULL
-              TableScan: test projection=[col_int32, col_uint32, col_utf8]
-            SubqueryAlias: __correlated_sq_1
-              SubqueryAlias: t2
-                Filter: test.col_int32 IS NOT NULL
-                  TableScan: test projection=[col_int32, col_uint32]
-        "#
+              TableScan: test projection=[col_int32, col_uint32]
+    "
     );
     Ok(())
 }
@@ -224,15 +224,15 @@ fn anti_join_with_join_filter() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-Projection: test.col_utf8
-  LeftAnti Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
-    TableScan: test projection=[col_int32, col_uint32, col_utf8]
-    SubqueryAlias: __correlated_sq_1
-      SubqueryAlias: t2
-        Filter: test.col_int32 IS NOT NULL
-          TableScan: test projection=[col_int32, col_uint32]
-"#
+    @r"
+    Projection: test.col_utf8
+      LeftAnti Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
+        TableScan: test projection=[col_int32, col_uint32, col_utf8]
+        SubqueryAlias: __correlated_sq_1
+          SubqueryAlias: t2
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32, col_uint32]
+    "
     );
     Ok(())
 }
@@ -245,16 +245,16 @@ fn where_exists_distinct() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32
-  Filter: test.col_int32 IS NOT NULL
-    TableScan: test projection=[col_int32]
-  SubqueryAlias: __correlated_sq_1
-    Aggregate: groupBy=[[t2.col_int32]], aggr=[[]]
-      SubqueryAlias: t2
-        Filter: test.col_int32 IS NOT NULL
-          TableScan: test projection=[col_int32]
-"#
+    @r"
+    LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32
+      Filter: test.col_int32 IS NOT NULL
+        TableScan: test projection=[col_int32]
+      SubqueryAlias: __correlated_sq_1
+        Aggregate: groupBy=[[t2.col_int32]], aggr=[[]]
+          SubqueryAlias: t2
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32]
+    "
 
     );
     Ok(())
@@ -269,15 +269,17 @@ fn intersect() -> Result<()> {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8
-  Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]
-    LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8
-      Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]
-        TableScan: test projection=[col_int32, col_utf8]
+    @r"
+    LeftSemi Join: left.col_int32 = test.col_int32, left.col_utf8 = test.col_utf8
+      Aggregate: groupBy=[[left.col_int32, left.col_utf8]], aggr=[[]]
+        LeftSemi Join: left.col_int32 = right.col_int32, left.col_utf8 = right.col_utf8
+          Aggregate: groupBy=[[left.col_int32, left.col_utf8]], aggr=[[]]
+            SubqueryAlias: left
+              TableScan: test projection=[col_int32, col_utf8]
+          SubqueryAlias: right
+            TableScan: test projection=[col_int32, col_utf8]
       TableScan: test projection=[col_int32, col_utf8]
-  TableScan: test projection=[col_int32, col_utf8]
-"#
+    "
     );
     Ok(())
 }
@@ -291,11 +293,11 @@ fn between_date32_plus_interval() -> Result<()> {
     assert_snapshot!(
     format!("{plan}"),
     @r#"
-Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
-  Projection:
-    Filter: test.col_date32 >= Date32("1998-03-18") AND test.col_date32 <= Date32("1998-06-16")
-      TableScan: test projection=[col_date32]
-"#
+    Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+      Projection:
+        Filter: test.col_date32 >= Date32("1998-03-18") AND test.col_date32 <= Date32("1998-06-16")
+          TableScan: test projection=[col_date32]
+    "#
     );
     Ok(())
 }
@@ -309,11 +311,11 @@ fn between_date64_plus_interval() -> Result<()> {
     assert_snapshot!(
     format!("{plan}"),
     @r#"
-        Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
-          Projection:
-            Filter: test.col_date64 >= Date64("1998-03-18") AND test.col_date64 <= Date64("1998-06-16")
-              TableScan: test projection=[col_date64]
-        "#
+    Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+      Projection:
+        Filter: test.col_date64 >= Date64("1998-03-18") AND test.col_date64 <= Date64("1998-06-16")
+          TableScan: test projection=[col_date64]
+    "#
     );
     Ok(())
 }
@@ -337,16 +339,16 @@ fn join_keys_in_subquery_alias() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Inner Join: a.col_int32 = b.key
-          SubqueryAlias: a
-            Filter: test.col_int32 IS NOT NULL
-              TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
-          SubqueryAlias: b
-            Projection: test.col_int32 AS key
-              Filter: test.col_int32 IS NOT NULL
-                TableScan: test projection=[col_int32]
-        "#
+    @r"
+    Inner Join: a.col_int32 = b.key
+      SubqueryAlias: a
+        Filter: test.col_int32 IS NOT NULL
+          TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+      SubqueryAlias: b
+        Projection: test.col_int32 AS key
+          Filter: test.col_int32 IS NOT NULL
+            TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -357,20 +359,20 @@ fn join_keys_in_subquery_alias_1() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Inner Join: a.col_int32 = b.key
-          SubqueryAlias: a
+    @r"
+    Inner Join: a.col_int32 = b.key
+      SubqueryAlias: a
+        Filter: test.col_int32 IS NOT NULL
+          TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+      SubqueryAlias: b
+        Projection: test.col_int32 AS key
+          Inner Join: test.col_int32 = c.col_int32
             Filter: test.col_int32 IS NOT NULL
-              TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
-          SubqueryAlias: b
-            Projection: test.col_int32 AS key
-              Inner Join: test.col_int32 = c.col_int32
-                Filter: test.col_int32 IS NOT NULL
-                  TableScan: test projection=[col_int32]
-                SubqueryAlias: c
-                  Filter: test.col_int32 IS NOT NULL
-                    TableScan: test projection=[col_int32]
-        "#
+              TableScan: test projection=[col_int32]
+            SubqueryAlias: c
+              Filter: test.col_int32 IS NOT NULL
+                TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -381,12 +383,12 @@ fn push_down_filter_groupby_expr_contains_alias() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Projection: test.col_int32 + test.col_uint32 AS c, count(Int64(1)) AS count(*)
-          Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[count(Int64(1))]]
-            Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3)
-              TableScan: test projection=[col_int32, col_uint32]
-        "#
+    @r"
+    Projection: test.col_int32 + test.col_uint32 AS c, count(Int64(1)) AS count(*)
+      Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[count(Int64(1))]]
+        Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3)
+          TableScan: test projection=[col_int32, col_uint32]
+    "
     );
 }
 
@@ -398,14 +400,14 @@ fn test_same_name_but_not_ambiguous() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        LeftSemi Join: t1.col_int32 = t2.col_int32
-          Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]
-            SubqueryAlias: t1
-              TableScan: test projection=[col_int32]
-          SubqueryAlias: t2
-            TableScan: test projection=[col_int32]
-        "#
+    @r"
+    LeftSemi Join: t1.col_int32 = t2.col_int32
+      Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]
+        SubqueryAlias: t1
+          TableScan: test projection=[col_int32]
+      SubqueryAlias: t2
+        TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -420,10 +422,10 @@ fn eliminate_nested_filters() {
 
     assert_snapshot!(
           format!("{plan}"),
-          @r#"
-Filter: test.col_int32 > Int32(0)
-  TableScan: test projection=[col_int32]
-  "#
+          @r"
+    Filter: test.col_int32 > Int32(0)
+      TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -438,11 +440,11 @@ fn eliminate_redundant_null_check_on_count() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Projection: test.col_int32, count(Int64(1)) AS count(*) AS c
-          Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]
-            TableScan: test projection=[col_int32]
-        "#
+    @r"
+    Projection: test.col_int32, count(Int64(1)) AS count(*) AS c
+      Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]
+        TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -466,13 +468,13 @@ fn test_propagate_empty_relation_inner_join_and_unions() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-Union
-  TableScan: test projection=[col_int32]
-  TableScan: test projection=[col_int32]
-  Filter: test.col_int32 < Int32(0)
-    TableScan: test projection=[col_int32]
-  "#);
+    @r"
+    Union
+      TableScan: test projection=[col_int32]
+      TableScan: test projection=[col_int32]
+      Filter: test.col_int32 < Int32(0)
+        TableScan: test projection=[col_int32]
+    ");
 }
 
 #[test]
@@ -483,10 +485,10 @@ fn select_wildcard_with_repeated_column_but_is_aliased() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32
-          TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
-        "#
+    @r"
+    Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32
+      TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+    "
     );
 }
 
@@ -507,16 +509,16 @@ fn select_correlated_predicate_subquery_with_uppercase_ident() {
 
     assert_snapshot!(
     format!("{plan}"),
-    @r#"
-        LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32
-          Filter: test.col_int32 IS NOT NULL
-            TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
-          SubqueryAlias: __correlated_sq_1
-            SubqueryAlias: T1
-              Projection: test.col_int32 AS COL_INT32
-                Filter: test.col_int32 IS NOT NULL
-                  TableScan: test projection=[col_int32]
-        "#
+    @r"
+    LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32
+      Filter: test.col_int32 IS NOT NULL
+        TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+      SubqueryAlias: __correlated_sq_1
+        SubqueryAlias: T1
+          Projection: test.col_int32 AS COL_INT32
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32]
+    "
     );
 }
 
@@ -536,14 +538,15 @@ fn recursive_cte_projection_pushdown() -> Result<()> {
     // columns from the base table and recursive table, eliminating unused columns
     assert_snapshot!(
         format!("{plan}"),
-        @r#"SubqueryAlias: nodes
-  RecursiveQuery: is_distinct=false
-    Projection: test.col_int32 AS id
-      TableScan: test projection=[col_int32]
-    Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
-      Filter: nodes.id < Int32(3)
-        TableScan: nodes projection=[id]
-"#
+        @r"
+    SubqueryAlias: nodes
+      RecursiveQuery: is_distinct=false
+        Projection: test.col_int32 AS id
+          TableScan: test projection=[col_int32]
+        Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id
+          Filter: nodes.id < Int32(3)
+            TableScan: nodes projection=[id]
+    "
     );
     Ok(())
 }
@@ -559,14 +562,16 @@ fn recursive_cte_with_aliased_self_reference() -> Result<()> {
 
     assert_snapshot!(
         format!("{plan}"),
-        @r#"SubqueryAlias: nodes
-  RecursiveQuery: is_distinct=false
-    Projection: test.col_int32 AS id
-      TableScan: test projection=[col_int32]
-    Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32)
-      SubqueryAlias: child
-        Filter: nodes.id < Int32(3)
-          TableScan: nodes projection=[id]"#,
+        @r"
+    SubqueryAlias: nodes
+      RecursiveQuery: is_distinct=false
+        Projection: test.col_int32 AS id
+          TableScan: test projection=[col_int32]
+        Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id
+          SubqueryAlias: child
+            Filter: nodes.id < Int32(3)
+              TableScan: nodes projection=[id]
+    ",
     );
     Ok(())
 }
@@ -586,15 +591,16 @@ fn recursive_cte_with_unused_columns() -> Result<()> {
     // even when they're defined in the CTE but not actually needed
     assert_snapshot!(
         format!("{plan}"),
-        @r#"SubqueryAlias: series
-  RecursiveQuery: is_distinct=false
-    Projection: Int64(1) AS n
-      Filter: test.col_int32 = Int32(1)
-        TableScan: test projection=[col_int32]
-    Projection: series.n + Int64(1)
-      Filter: series.n < Int64(3)
-        TableScan: series projection=[n]
-"#
+        @r"
+    SubqueryAlias: series
+      RecursiveQuery: is_distinct=false
+        Projection: Int64(1) AS n
+          Filter: test.col_int32 = Int32(1)
+            TableScan: test projection=[col_int32]
+        Projection: series.n + Int64(1)
+          Filter: series.n < Int64(3)
+            TableScan: series projection=[n]
+    "
     );
     Ok(())
 }
@@ -618,15 +624,16 @@ fn recursive_cte_projection_pushdown_baseline() -> Result<()> {
     // and only the needed column is selected from the recursive table
     assert_snapshot!(
         format!("{plan}"),
-        @r#"SubqueryAlias: countdown
-  RecursiveQuery: is_distinct=false
-    Projection: test.col_int32 AS n
-      Filter: test.col_int32 = Int32(5)
-        TableScan: test projection=[col_int32]
-    Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32)
-      Filter: countdown.n > Int32(1)
-        TableScan: countdown projection=[n]
-"#
+        @r"
+    SubqueryAlias: countdown
+      RecursiveQuery: is_distinct=false
+        Projection: test.col_int32 AS n
+          Filter: test.col_int32 = Int32(5)
+            TableScan: test projection=[col_int32]
+        Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n
+          Filter: countdown.n > Int32(1)
+            TableScan: countdown projection=[n]
+    "
     );
     Ok(())
 }
diff --git a/datafusion/physical-expr-adapter/src/lib.rs b/datafusion/physical-expr-adapter/src/lib.rs
index 12ea0025e2667..d7c750e4a1a1c 100644
--- a/datafusion/physical-expr-adapter/src/lib.rs
+++ b/datafusion/physical-expr-adapter/src/lib.rs
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
 )]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Physical expression schema adaptation utilities for DataFusion
 
@@ -27,5 +30,5 @@ pub mod schema_rewriter;
 
 pub use schema_rewriter::{
     DefaultPhysicalExprAdapter, DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter,
-    PhysicalExprAdapterFactory,
+    PhysicalExprAdapterFactory, replace_columns_with_literals,
 };
diff --git a/datafusion/physical-expr-adapter/src/schema_rewriter.rs b/datafusion/physical-expr-adapter/src/schema_rewriter.rs
index 61cc97dae300e..83727ac092044 100644
--- a/datafusion/physical-expr-adapter/src/schema_rewriter.rs
+++ b/datafusion/physical-expr-adapter/src/schema_rewriter.rs
@@ -15,47 +15,82 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Physical expression schema rewriting utilities
+//! Physical expression schema rewriting utilities: [`PhysicalExprAdapter`],
+//! [`PhysicalExprAdapterFactory`], default implementations,
+//! and [`replace_columns_with_literals`].
 
+use std::borrow::Borrow;
+use std::collections::HashMap;
+use std::hash::Hash;
 use std::sync::Arc;
 
 use arrow::compute::can_cast_types;
-use arrow::datatypes::{DataType, FieldRef, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion_common::{
-    exec_err,
+    Result, ScalarValue, exec_err,
+    nested_struct::validate_struct_compatibility,
     tree_node::{Transformed, TransformedResult, TreeNode},
-    Result, ScalarValue,
 };
 use datafusion_functions::core::getfield::GetFieldFunc;
+use datafusion_physical_expr::expressions::CastColumnExpr;
 use datafusion_physical_expr::{
-    expressions::{self, CastExpr, Column},
     ScalarFunctionExpr,
+    expressions::{self, Column},
 };
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
-/// Trait for adapting physical expressions to match a target schema.
+/// Replace column references in the given physical expression with literal values.
 ///
-/// This is used in file scans to rewrite expressions so that they can be evaluated
-/// against the physical schema of the file being scanned. It allows for handling
-/// differences between logical and physical schemas, such as type mismatches or missing columns.
+/// Some use cases for this include:
+/// - Partition column pruning: When scanning partitioned data, partition column references
+///   can be replaced with their literal values for the specific partition being scanned.
+/// - Constant folding: In some cases, columns that can be proven to be constant
+///   from statistical analysis may be replaced with their literal values to optimize expression evaluation.
+/// - Filling in non-null default values: in a custom [`PhysicalExprAdapter`] implementation,
+///   column references can be replaced with default literal values instead of nulls.
 ///
-/// ## Overview
+/// # Arguments
+/// - `expr`: The physical expression in which to replace column references.
+/// - `replacements`: A mapping from column names to their corresponding literal `ScalarValue`s.
+///   Accepts various HashMap types including `HashMap<&str, &ScalarValue>`,
+///   `HashMap<String, ScalarValue>`, `HashMap<String, &ScalarValue>`, etc.
 ///
-/// The `PhysicalExprAdapter` allows rewriting physical expressions to match different schemas, including:
+/// # Returns
+/// - `Result<Arc<dyn PhysicalExpr>>`: The rewritten physical expression with columns replaced by literals.
+pub fn replace_columns_with_literals<K, V>(
+    expr: Arc<dyn PhysicalExpr>,
+    replacements: &HashMap<K, V>,
+) -> Result<Arc<dyn PhysicalExpr>>
+where
+    K: Borrow<str> + Eq + Hash,
+    V: Borrow<ScalarValue>,
+{
+    expr.transform_down(|expr| {
+        if let Some(column) = expr.as_any().downcast_ref::<Column>()
+            && let Some(replacement_value) = replacements.get(column.name())
+        {
+            return Ok(Transformed::yes(expressions::lit(
+                replacement_value.borrow().clone(),
+            )));
+        }
+        Ok(Transformed::no(expr))
+    })
+    .data()
+}
+
+/// Trait for adapting [`PhysicalExpr`] expressions to match a target schema.
 ///
-/// - **Type casting**: When logical and physical schemas have different types, expressions are
-///   automatically wrapped with cast operations. For example, `lit(ScalarValue::Int32(123)) = int64_column`
-///   gets rewritten to `lit(ScalarValue::Int32(123)) = cast(int64_column, 'Int32')`.
-///   Note that this does not attempt to simplify such expressions - that is done by shared simplifiers.
+/// This is used in file scans to rewrite expressions so that they can be
+/// evaluated against the physical schema of the file being scanned. It allows
+/// for handling differences between logical and physical schemas, such as type
+/// mismatches or missing columns common in [Schema evolution] scenarios.
 ///
-/// - **Missing columns**: When a column exists in the logical schema but not in the physical schema,
-///   references to it are replaced with null literals.
+/// [Schema evolution]: https://www.dremio.com/wiki/schema-evolution/
 ///
-/// - **Struct field access**: Expressions like `struct_column.field_that_is_missing_in_schema` are
-///   rewritten to `null` when the field doesn't exist in the physical schema.
+/// ## Default Implementations
 ///
-/// - **Partition columns**: Partition column references can be replaced with their literal values
-///   when scanning specific partitions.
+/// The default implementation [`DefaultPhysicalExprAdapter`]  handles common
+/// cases.
 ///
 /// ## Custom Implementations
 ///
@@ -92,17 +127,6 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 ///             Ok(Transformed::no(expr))
 ///         }).data()
 ///     }
-///
-///     fn with_partition_values(
-///         &self,
-///         partition_values: Vec<(FieldRef, ScalarValue)>,
-///     ) -> Arc<dyn PhysicalExprAdapter> {
-///         // For simplicity, this example ignores partition values
-///         Arc::new(CustomPhysicalExprAdapter {
-///             logical_file_schema: self.logical_file_schema.clone(),
-///             physical_file_schema: self.physical_file_schema.clone(),
-///         })
-///     }
 /// }
 ///
 /// #[derive(Debug)]
@@ -135,14 +159,15 @@ pub trait PhysicalExprAdapter: Send + Sync + std::fmt::Debug {
     ///
     /// Returns:
     /// - `Arc<dyn PhysicalExpr>`: The rewritten physical expression that can be evaluated against the physical schema.
+    ///
+    /// See Also:
+    /// - [`replace_columns_with_literals`]: for replacing partition column references with their literal values.
     fn rewrite(&self, expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>>;
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter>;
 }
 
+/// Creates instances of [`PhysicalExprAdapter`] for given logical and physical schemas.
+///
+/// See [`DefaultPhysicalExprAdapterFactory`] for the default implementation.
 pub trait PhysicalExprAdapterFactory: Send + Sync + std::fmt::Debug {
     /// Create a new instance of the physical expression adapter.
     fn create(
@@ -164,20 +189,39 @@ impl PhysicalExprAdapterFactory for DefaultPhysicalExprAdapterFactory {
         Arc::new(DefaultPhysicalExprAdapter {
             logical_file_schema,
             physical_file_schema,
-            partition_values: Vec::new(),
         })
     }
 }
 
-/// Default implementation for rewriting physical expressions to match different schemas.
+/// Default implementation of [`PhysicalExprAdapter`] for rewriting physical
+/// expressions to match different schemas.
+///
+/// ## Overview
+///
+///  [`DefaultPhysicalExprAdapter`] rewrites physical expressions to match
+///  different schemas, including:
+///
+/// - **Type casting**: When logical and physical schemas have different types, expressions are
+///   automatically wrapped with cast operations. For example, `lit(ScalarValue::Int32(123)) = int64_column`
+///   gets rewritten to `lit(ScalarValue::Int32(123)) = cast(int64_column, 'Int32')`.
+///   Note that this does not attempt to simplify such expressions - that is done by shared simplifiers.
+///
+/// - **Missing columns**: When a column exists in the logical schema but not in the physical schema,
+///   references to it are replaced with null literals.
+///
+/// - **Struct field access**: Expressions like `struct_column.field_that_is_missing_in_schema` are
+///   rewritten to `null` when the field doesn't exist in the physical schema.
+///
+/// - **Default column values**: Partition column references can be replaced with their literal values
+///   when scanning specific partitions. See [`replace_columns_with_literals`] for more details.
 ///
 /// # Example
 ///
 /// ```rust
-/// use datafusion_physical_expr_adapter::{DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory};
-/// use arrow::datatypes::Schema;
-/// use std::sync::Arc;
-///
+/// # use datafusion_physical_expr_adapter::{DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory};
+/// # use arrow::datatypes::Schema;
+/// # use std::sync::Arc;
+/// #
 /// # fn example(
 /// #     predicate: std::sync::Arc<dyn datafusion_physical_expr_common::physical_expr::PhysicalExpr>,
 /// #     physical_file_schema: &Schema,
@@ -193,7 +237,6 @@ impl PhysicalExprAdapterFactory for DefaultPhysicalExprAdapterFactory {
 pub struct DefaultPhysicalExprAdapter {
     logical_file_schema: SchemaRef,
     physical_file_schema: SchemaRef,
-    partition_values: Vec<(FieldRef, ScalarValue)>,
 }
 
 impl DefaultPhysicalExprAdapter {
@@ -205,7 +248,6 @@ impl DefaultPhysicalExprAdapter {
         Self {
             logical_file_schema,
             physical_file_schema,
-            partition_values: Vec::new(),
         }
     }
 }
@@ -215,27 +257,15 @@ impl PhysicalExprAdapter for DefaultPhysicalExprAdapter {
         let rewriter = DefaultPhysicalExprAdapterRewriter {
             logical_file_schema: &self.logical_file_schema,
             physical_file_schema: &self.physical_file_schema,
-            partition_fields: &self.partition_values,
         };
         expr.transform(|expr| rewriter.rewrite_expr(Arc::clone(&expr)))
             .data()
     }
-
-    fn with_partition_values(
-        &self,
-        partition_values: Vec<(FieldRef, ScalarValue)>,
-    ) -> Arc<dyn PhysicalExprAdapter> {
-        Arc::new(DefaultPhysicalExprAdapter {
-            partition_values,
-            ..self.clone()
-        })
-    }
 }
 
 struct DefaultPhysicalExprAdapterRewriter<'a> {
     logical_file_schema: &'a Schema,
     physical_file_schema: &'a Schema,
-    partition_fields: &'a [(FieldRef, ScalarValue)],
 }
 
 impl<'a> DefaultPhysicalExprAdapterRewriter<'a> {
@@ -346,10 +376,6 @@ impl<'a> DefaultPhysicalExprAdapterRewriter<'a> {
         {
             Ok(field) => field,
             Err(e) => {
-                // If the column is a partition field, we can use the partition value
-                if let Some(partition_value) = self.get_partition_value(column.name()) {
-                    return Ok(Transformed::yes(expressions::lit(partition_value)));
-                }
                 // This can be hit if a custom rewrite injected a reference to a column that doesn't exist in the logical schema.
                 // For example, a pre-computed column that is kept only in the physical schema.
                 // If the column exists in the physical schema, we can still use it.
@@ -370,25 +396,24 @@ impl<'a> DefaultPhysicalExprAdapterRewriter<'a> {
         };
 
         // Check if the column exists in the physical schema
-        let physical_column_index =
-            match self.physical_file_schema.index_of(column.name()) {
-                Ok(index) => index,
-                Err(_) => {
-                    if !logical_field.is_nullable() {
-                        return exec_err!(
+        let physical_column_index = match self
+            .physical_file_schema
+            .index_of(column.name())
+        {
+            Ok(index) => index,
+            Err(_) => {
+                if !logical_field.is_nullable() {
+                    return exec_err!(
                         "Non-nullable column '{}' is missing from the physical schema",
                         column.name()
                     );
-                    }
-                    // If the column is missing from the physical schema fill it in with nulls as `SchemaAdapter` would do.
-                    // TODO: do we need to sync this with what the `SchemaAdapter` actually does?
-                    // While the default implementation fills in nulls in theory a custom `SchemaAdapter` could do something else!
-                    // See https://github.com/apache/datafusion/issues/16527
-                    let null_value =
-                        ScalarValue::Null.cast_to(logical_field.data_type())?;
-                    return Ok(Transformed::yes(expressions::lit(null_value)));
                 }
-            };
+                // If the column is missing from the physical schema fill it in with nulls.
+                // For a different behavior, provide a custom `PhysicalExprAdapter` implementation.
+                let null_value = ScalarValue::Null.cast_to(logical_field.data_type())?;
+                return Ok(Transformed::yes(expressions::lit(null_value)));
+            }
+        };
         let physical_field = self.physical_file_schema.field(physical_column_index);
 
         let column = match (
@@ -413,42 +438,52 @@ impl<'a> DefaultPhysicalExprAdapterRewriter<'a> {
         // TODO: add optimization to move the cast from the column to literal expressions in the case of `col = 123`
         // since that's much cheaper to evalaute.
         // See https://github.com/apache/datafusion/issues/15780#issuecomment-2824716928
-        let is_compatible =
-            can_cast_types(physical_field.data_type(), logical_field.data_type());
-        if !is_compatible {
-            return exec_err!(
-                "Cannot cast column '{}' from '{}' (physical data type) to '{}' (logical data type)",
-                column.name(),
-                physical_field.data_type(),
-                logical_field.data_type()
-            );
+        //
+        // For struct types, use validate_struct_compatibility which handles:
+        // - Missing fields in source (filled with nulls)
+        // - Extra fields in source (ignored)
+        // - Recursive validation of nested structs
+        // For non-struct types, use Arrow's can_cast_types
+        match (physical_field.data_type(), logical_field.data_type()) {
+            (DataType::Struct(physical_fields), DataType::Struct(logical_fields)) => {
+                validate_struct_compatibility(physical_fields, logical_fields)?;
+            }
+            _ => {
+                let is_compatible =
+                    can_cast_types(physical_field.data_type(), logical_field.data_type());
+                if !is_compatible {
+                    return exec_err!(
+                        "Cannot cast column '{}' from '{}' (physical data type) to '{}' (logical data type)",
+                        column.name(),
+                        physical_field.data_type(),
+                        logical_field.data_type()
+                    );
+                }
+            }
         }
 
-        let cast_expr = Arc::new(CastExpr::new(
+        let cast_expr = Arc::new(CastColumnExpr::new(
             Arc::new(column),
-            logical_field.data_type().clone(),
+            Arc::new(physical_field.clone()),
+            Arc::new(logical_field.clone()),
             None,
         ));
 
         Ok(Transformed::yes(cast_expr))
     }
-
-    fn get_partition_value(&self, column_name: &str) -> Option<ScalarValue> {
-        self.partition_fields
-            .iter()
-            .find(|(field, _)| field.name() == column_name)
-            .map(|(_, value)| value.clone())
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::{RecordBatch, RecordBatchOptions};
-    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion_common::{assert_contains, record_batch, Result, ScalarValue};
+    use arrow::array::{
+        BooleanArray, Int32Array, Int64Array, RecordBatch, RecordBatchOptions,
+        StringArray, StringViewArray, StructArray,
+    };
+    use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
+    use datafusion_common::{Result, ScalarValue, assert_contains, record_batch};
     use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{col, lit, CastExpr, Column, Literal};
+    use datafusion_physical_expr::expressions::{Column, Literal, col, lit};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use itertools::Itertools;
     use std::sync::Arc;
@@ -479,7 +514,7 @@ mod tests {
         let result = adapter.rewrite(column_expr).unwrap();
 
         // Should be wrapped in a cast expression
-        assert!(result.as_any().downcast_ref::<CastExpr>().is_some());
+        assert!(result.as_any().downcast_ref::<CastColumnExpr>().is_some());
     }
 
     #[test]
@@ -510,9 +545,10 @@ mod tests {
         println!("Rewritten expression: {result}");
 
         let expected = expressions::BinaryExpr::new(
-            Arc::new(CastExpr::new(
+            Arc::new(CastColumnExpr::new(
                 Arc::new(Column::new("a", 0)),
-                DataType::Int64,
+                Arc::new(Field::new("a", DataType::Int32, false)),
+                Arc::new(Field::new("a", DataType::Int64, false)),
                 None,
             )),
             Operator::Plus,
@@ -554,7 +590,11 @@ mod tests {
         let column_expr = Arc::new(Column::new("data", 0));
 
         let error_msg = adapter.rewrite(column_expr).unwrap_err().to_string();
-        assert_contains!(error_msg, "Cannot cast column 'data'");
+        // validate_struct_compatibility provides more specific error about which field can't be cast
+        assert_contains!(
+            error_msg,
+            "Cannot cast struct field 'field1' from type Binary to type Int32"
+        );
     }
 
     #[test]
@@ -589,15 +629,30 @@ mod tests {
 
         let result = adapter.rewrite(column_expr).unwrap();
 
-        let expected = Arc::new(CastExpr::new(
+        let expected = Arc::new(CastColumnExpr::new(
             Arc::new(Column::new("data", 0)),
-            DataType::Struct(
-                vec![
-                    Field::new("id", DataType::Int64, false),
-                    Field::new("name", DataType::Utf8View, true),
-                ]
-                .into(),
-            ),
+            Arc::new(Field::new(
+                "data",
+                DataType::Struct(
+                    vec![
+                        Field::new("id", DataType::Int32, false),
+                        Field::new("name", DataType::Utf8, true),
+                    ]
+                    .into(),
+                ),
+                false,
+            )),
+            Arc::new(Field::new(
+                "data",
+                DataType::Struct(
+                    vec![
+                        Field::new("id", DataType::Int64, false),
+                        Field::new("name", DataType::Utf8View, true),
+                    ]
+                    .into(),
+                ),
+                false,
+            )),
             None,
         )) as Arc<dyn PhysicalExpr>;
 
@@ -661,30 +716,51 @@ mod tests {
     }
 
     #[test]
-    fn test_rewrite_partition_column() -> Result<()> {
-        let (physical_schema, logical_schema) = create_test_schema();
-
-        let partition_field =
-            Arc::new(Field::new("partition_col", DataType::Utf8, false));
+    fn test_replace_columns_with_literals() -> Result<()> {
         let partition_value = ScalarValue::Utf8(Some("test_value".to_string()));
-        let partition_values = vec![(partition_field, partition_value)];
+        let replacements = HashMap::from([("partition_col", &partition_value)]);
 
-        let factory = DefaultPhysicalExprAdapterFactory;
-        let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema));
-        let adapter = adapter.with_partition_values(partition_values);
-
-        let column_expr = Arc::new(Column::new("partition_col", 0));
-        let result = adapter.rewrite(column_expr)?;
+        let column_expr =
+            Arc::new(Column::new("partition_col", 0)) as Arc<dyn PhysicalExpr>;
+        let result = replace_columns_with_literals(column_expr, &replacements)?;
 
         // Should be replaced with the partition value
-        if let Some(literal) = result.as_any().downcast_ref::<expressions::Literal>() {
-            assert_eq!(
-                *literal.value(),
-                ScalarValue::Utf8(Some("test_value".to_string()))
-            );
-        } else {
-            panic!("Expected literal expression");
-        }
+        let literal = result
+            .as_any()
+            .downcast_ref::<expressions::Literal>()
+            .expect("Expected literal expression");
+        assert_eq!(*literal.value(), partition_value);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_replace_columns_with_literals_no_match() -> Result<()> {
+        let value = ScalarValue::Utf8(Some("test_value".to_string()));
+        let replacements = HashMap::from([("other_col", &value)]);
+
+        let column_expr =
+            Arc::new(Column::new("partition_col", 0)) as Arc<dyn PhysicalExpr>;
+        let result = replace_columns_with_literals(column_expr, &replacements)?;
+
+        assert!(result.as_any().downcast_ref::<Column>().is_some());
+        Ok(())
+    }
+
+    #[test]
+    fn test_replace_columns_with_literals_nested_expr() -> Result<()> {
+        let value_a = ScalarValue::Int64(Some(10));
+        let value_b = ScalarValue::Int64(Some(20));
+        let replacements = HashMap::from([("a", &value_a), ("b", &value_b)]);
+
+        let expr = Arc::new(expressions::BinaryExpr::new(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 1)),
+        )) as Arc<dyn PhysicalExpr>;
+
+        let result = replace_columns_with_literals(expr, &replacements)?;
+        assert_eq!(result.to_string(), "10 + 20");
 
         Ok(())
     }
@@ -821,6 +897,118 @@ mod tests {
         );
     }
 
+    /// Test that struct columns are properly adapted including:
+    /// - Type casting of subfields (Int32 -> Int64, Utf8 -> Utf8View)
+    /// - Missing fields in logical schema are filled with nulls
+    #[test]
+    fn test_adapt_struct_batches() {
+        // Physical struct: {id: Int32, name: Utf8}
+        let physical_struct_fields: Fields = vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ]
+        .into();
+
+        let struct_array = StructArray::new(
+            physical_struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])) as _,
+                Arc::new(StringArray::from(vec![
+                    Some("alice"),
+                    None,
+                    Some("charlie"),
+                ])) as _,
+            ],
+            None,
+        );
+
+        let physical_schema = Arc::new(Schema::new(vec![Field::new(
+            "data",
+            DataType::Struct(physical_struct_fields),
+            false,
+        )]));
+
+        let physical_batch = RecordBatch::try_new(
+            Arc::clone(&physical_schema),
+            vec![Arc::new(struct_array)],
+        )
+        .unwrap();
+
+        // Logical struct: {id: Int64, name: Utf8View, extra: Boolean}
+        // - id: cast from Int32 to Int64
+        // - name: cast from Utf8 to Utf8View
+        // - extra: missing from physical, should be filled with nulls
+        let logical_struct_fields: Fields = vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("name", DataType::Utf8View, true),
+            Field::new("extra", DataType::Boolean, true), // New field, not in physical
+        ]
+        .into();
+
+        let logical_schema = Arc::new(Schema::new(vec![Field::new(
+            "data",
+            DataType::Struct(logical_struct_fields),
+            false,
+        )]));
+
+        let projection = vec![col("data", &logical_schema).unwrap()];
+
+        let factory = DefaultPhysicalExprAdapterFactory;
+        let adapter =
+            factory.create(Arc::clone(&logical_schema), Arc::clone(&physical_schema));
+
+        let adapted_projection = projection
+            .into_iter()
+            .map(|expr| adapter.rewrite(expr).unwrap())
+            .collect_vec();
+
+        let adapted_schema = Arc::new(Schema::new(
+            adapted_projection
+                .iter()
+                .map(|expr| expr.return_field(&physical_schema).unwrap())
+                .collect_vec(),
+        ));
+
+        let res = batch_project(
+            adapted_projection,
+            &physical_batch,
+            Arc::clone(&adapted_schema),
+        )
+        .unwrap();
+
+        assert_eq!(res.num_columns(), 1);
+
+        let result_struct = res
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+
+        // Verify id field is cast to Int64
+        let id_col = result_struct.column_by_name("id").unwrap();
+        assert_eq!(id_col.data_type(), &DataType::Int64);
+        let id_values = id_col.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(
+            id_values.iter().collect_vec(),
+            vec![Some(1), Some(2), Some(3)]
+        );
+
+        // Verify name field is cast to Utf8View
+        let name_col = result_struct.column_by_name("name").unwrap();
+        assert_eq!(name_col.data_type(), &DataType::Utf8View);
+        let name_values = name_col.as_any().downcast_ref::<StringViewArray>().unwrap();
+        assert_eq!(
+            name_values.iter().collect_vec(),
+            vec![Some("alice"), None, Some("charlie")]
+        );
+
+        // Verify extra field (missing from physical) is filled with nulls
+        let extra_col = result_struct.column_by_name("extra").unwrap();
+        assert_eq!(extra_col.data_type(), &DataType::Boolean);
+        let extra_values = extra_col.as_any().downcast_ref::<BooleanArray>().unwrap();
+        assert_eq!(extra_values.iter().collect_vec(), vec![None, None, None]);
+    }
+
     #[test]
     fn test_try_rewrite_struct_field_access() {
         // Test the core logic of try_rewrite_struct_field_access
@@ -847,7 +1035,6 @@ mod tests {
         let rewriter = DefaultPhysicalExprAdapterRewriter {
             logical_file_schema: &logical_schema,
             physical_file_schema: &physical_schema,
-            partition_fields: &[],
         };
 
         // Test that when a field exists in physical schema, it returns None
diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml
index 4602e59c422c3..d292da212e6c8 100644
--- a/datafusion/physical-expr-common/Cargo.toml
+++ b/datafusion/physical-expr-common/Cargo.toml
@@ -43,7 +43,9 @@ name = "datafusion_physical_expr_common"
 [dependencies]
 ahash = { workspace = true }
 arrow = { workspace = true }
+chrono = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-expr-common = { workspace = true }
 hashbrown = { workspace = true }
 itertools = { workspace = true }
+parking_lot = { workspace = true }
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index 24bc430630598..ab95302bbb046 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -20,10 +20,10 @@
 
 use ahash::RandomState;
 use arrow::array::{
-    cast::AsArray,
-    types::{ByteArrayType, GenericBinaryType, GenericStringType},
     Array, ArrayRef, BufferBuilder, GenericBinaryArray, GenericStringArray,
     NullBufferBuilder, OffsetSizeTrait,
+    cast::AsArray,
+    types::{ByteArrayType, GenericBinaryType, GenericStringType},
 };
 use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
 use arrow::datatypes::DataType;
diff --git a/datafusion/physical-expr-common/src/datum.rs b/datafusion/physical-expr-common/src/datum.rs
index 7084bc440e86b..16ef38b0940b1 100644
--- a/datafusion/physical-expr-common/src/datum.rs
+++ b/datafusion/physical-expr-common/src/datum.rs
@@ -16,13 +16,15 @@
 // under the License.
 
 use arrow::array::BooleanArray;
-use arrow::array::{make_comparator, ArrayRef, Datum};
+use arrow::array::{ArrayRef, Datum, make_comparator};
 use arrow::buffer::NullBuffer;
-use arrow::compute::SortOptions;
+use arrow::compute::kernels::cmp::{
+    distinct, eq, gt, gt_eq, lt, lt_eq, neq, not_distinct,
+};
+use arrow::compute::{SortOptions, ilike, like, nilike, nlike};
 use arrow::error::ArrowError;
-use datafusion_common::DataFusionError;
-use datafusion_common::{arrow_datafusion_err, internal_err};
 use datafusion_common::{Result, ScalarValue};
+use datafusion_common::{arrow_datafusion_err, assert_or_internal_err, internal_err};
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::operator::Operator;
 use std::sync::Arc;
@@ -53,39 +55,67 @@ pub fn apply(
     }
 }
 
-/// Applies a binary [`Datum`] comparison kernel `f` to `lhs` and `rhs`
+/// Applies a binary [`Datum`] comparison operator `op` to `lhs` and `rhs`
 pub fn apply_cmp(
+    op: Operator,
     lhs: &ColumnarValue,
     rhs: &ColumnarValue,
-    f: impl Fn(&dyn Datum, &dyn Datum) -> Result<BooleanArray, ArrowError>,
 ) -> Result<ColumnarValue> {
-    apply(lhs, rhs, |l, r| Ok(Arc::new(f(l, r)?)))
+    if lhs.data_type().is_nested() {
+        apply_cmp_for_nested(op, lhs, rhs)
+    } else {
+        let f = match op {
+            Operator::Eq => eq,
+            Operator::NotEq => neq,
+            Operator::Lt => lt,
+            Operator::LtEq => lt_eq,
+            Operator::Gt => gt,
+            Operator::GtEq => gt_eq,
+            Operator::IsDistinctFrom => distinct,
+            Operator::IsNotDistinctFrom => not_distinct,
+
+            Operator::LikeMatch => like,
+            Operator::ILikeMatch => ilike,
+            Operator::NotLikeMatch => nlike,
+            Operator::NotILikeMatch => nilike,
+
+            _ => {
+                return internal_err!("Invalid compare operator: {}", op);
+            }
+        };
+
+        apply(lhs, rhs, |l, r| Ok(Arc::new(f(l, r)?)))
+    }
 }
 
-/// Applies a binary [`Datum`] comparison kernel `f` to `lhs` and `rhs` for nested type like
+/// Applies a binary [`Datum`] comparison operator `op` to `lhs` and `rhs` for nested type like
 /// List, FixedSizeList, LargeList, Struct, Union, Map, or a dictionary of a nested type
 pub fn apply_cmp_for_nested(
     op: Operator,
     lhs: &ColumnarValue,
     rhs: &ColumnarValue,
 ) -> Result<ColumnarValue> {
-    if matches!(
-        op,
-        Operator::Eq
-            | Operator::NotEq
-            | Operator::Lt
-            | Operator::Gt
-            | Operator::LtEq
-            | Operator::GtEq
-            | Operator::IsDistinctFrom
-            | Operator::IsNotDistinctFrom
-    ) {
-        apply(lhs, rhs, |l, r| {
-            Ok(Arc::new(compare_op_for_nested(op, l, r)?))
-        })
-    } else {
-        internal_err!("invalid operator for nested")
-    }
+    let left_data_type = lhs.data_type();
+    let right_data_type = rhs.data_type();
+
+    assert_or_internal_err!(
+        matches!(
+            op,
+            Operator::Eq
+                | Operator::NotEq
+                | Operator::Lt
+                | Operator::Gt
+                | Operator::LtEq
+                | Operator::GtEq
+                | Operator::IsDistinctFrom
+                | Operator::IsNotDistinctFrom
+        ) && left_data_type.equals_datatype(&right_data_type),
+        "invalid operator or data type mismatch for nested data, op {op} left {left_data_type}, right {right_data_type}",
+    );
+
+    apply(lhs, rhs, |l, r| {
+        Ok(Arc::new(compare_op_for_nested(op, l, r)?))
+    })
 }
 
 /// Compare with eq with either nested or non-nested
@@ -97,7 +127,7 @@ pub fn compare_with_eq(
     if is_nested {
         compare_op_for_nested(Operator::Eq, lhs, rhs)
     } else {
-        arrow::compute::kernels::cmp::eq(lhs, rhs).map_err(|e| arrow_datafusion_err!(e))
+        eq(lhs, rhs).map_err(|e| arrow_datafusion_err!(e))
     }
 }
 
@@ -112,9 +142,7 @@ pub fn compare_op_for_nested(
     let l_len = l.len();
     let r_len = r.len();
 
-    if l_len != r_len && !is_l_scalar && !is_r_scalar {
-        return internal_err!("len mismatch");
-    }
+    assert_or_internal_err!(l_len == r_len || is_l_scalar || is_r_scalar, "len mismatch");
 
     let len = match is_l_scalar {
         true => r_len,
diff --git a/datafusion/physical-expr-common/src/lib.rs b/datafusion/physical-expr-common/src/lib.rs
index e21206d906422..84378a3d26eee 100644
--- a/datafusion/physical-expr-common/src/lib.rs
+++ b/datafusion/physical-expr-common/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Physical Expr Common packages for [DataFusion]
 //! This package contains high level PhysicalExpr trait
@@ -32,6 +35,7 @@
 pub mod binary_map;
 pub mod binary_view_map;
 pub mod datum;
+pub mod metrics;
 pub mod physical_expr;
 pub mod sort_expr;
 pub mod tree_node;
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-expr-common/src/metrics/baseline.rs
similarity index 92%
rename from datafusion/physical-plan/src/metrics/baseline.rs
rename to datafusion/physical-expr-common/src/metrics/baseline.rs
index 858773b94664d..0de8e26494931 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-expr-common/src/metrics/baseline.rs
@@ -20,18 +20,18 @@
 use std::task::Poll;
 
 use arrow::record_batch::RecordBatch;
-
-use crate::spill::get_record_batch_memory_size;
+use datafusion_common::{Result, utils::memory::get_record_batch_memory_size};
 
 use super::{Count, ExecutionPlanMetricsSet, MetricBuilder, Time, Timestamp};
-use datafusion_common::Result;
 
 /// Helper for creating and tracking common "baseline" metrics for
 /// each operator
 ///
 /// Example:
 /// ```
-/// use datafusion_physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet};
+/// use datafusion_physical_expr_common::metrics::{
+///     BaselineMetrics, ExecutionPlanMetricsSet,
+/// };
 /// let metrics = ExecutionPlanMetricsSet::new();
 ///
 /// let partition = 2;
@@ -63,6 +63,9 @@ pub struct BaselineMetrics {
     /// multiple times.
     /// Issue: <https://github.com/apache/datafusion/issues/16841>
     output_bytes: Count,
+
+    /// output batches: the total output batch count
+    output_batches: Count,
     // Remember to update `docs/source/user-guide/metrics.md` when updating comments
     // or adding new metrics
 }
@@ -86,6 +89,9 @@ impl BaselineMetrics {
             output_bytes: MetricBuilder::new(metrics)
                 .with_type(super::MetricType::SUMMARY)
                 .output_bytes(partition),
+            output_batches: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::DEV)
+                .output_batches(partition),
         }
     }
 
@@ -100,6 +106,7 @@ impl BaselineMetrics {
             elapsed_compute: self.elapsed_compute.clone(),
             output_rows: Default::default(),
             output_bytes: Default::default(),
+            output_batches: Default::default(),
         }
     }
 
@@ -113,6 +120,11 @@ impl BaselineMetrics {
         &self.output_rows
     }
 
+    /// return the metric for the total number of output batches produced
+    pub fn output_batches(&self) -> &Count {
+        &self.output_batches
+    }
+
     /// Records the fact that this operator's execution is complete
     /// (recording the `end_time` metric).
     ///
@@ -191,7 +203,7 @@ impl SpillMetrics {
     }
 }
 
-/// Metrics for tracking [`crate::stream::BatchSplitStream`] activity
+/// Metrics for tracking batch splitting activity
 #[derive(Debug, Clone)]
 pub struct SplitMetrics {
     /// Number of times an input [`RecordBatch`] was split
@@ -229,6 +241,7 @@ impl RecordOutput for RecordBatch {
         bm.record_output(self.num_rows());
         let n_bytes = get_record_batch_memory_size(&self);
         bm.output_bytes.add(n_bytes);
+        bm.output_batches.add(1);
         self
     }
 }
@@ -238,6 +251,7 @@ impl RecordOutput for &RecordBatch {
         bm.record_output(self.num_rows());
         let n_bytes = get_record_batch_memory_size(self);
         bm.output_bytes.add(n_bytes);
+        bm.output_batches.add(1);
         self
     }
 }
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-expr-common/src/metrics/builder.rs
similarity index 91%
rename from datafusion/physical-plan/src/metrics/builder.rs
rename to datafusion/physical-expr-common/src/metrics/builder.rs
index 6ea947b6d21b0..4fa938f69ed36 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-expr-common/src/metrics/builder.rs
@@ -20,8 +20,8 @@
 use std::{borrow::Cow, sync::Arc};
 
 use crate::metrics::{
-    value::{PruningMetrics, RatioMetrics},
     MetricType,
+    value::{PruningMetrics, RatioMergeStrategy, RatioMetrics},
 };
 
 use super::{
@@ -34,7 +34,7 @@ use super::{
 /// case of constant strings
 ///
 /// ```rust
-/// use datafusion_physical_plan::metrics::*;
+/// use datafusion_physical_expr_common::metrics::*;
 ///
 /// let metrics = ExecutionPlanMetricsSet::new();
 /// let partition = 1;
@@ -161,6 +161,14 @@ impl<'a> MetricBuilder<'a> {
         count
     }
 
+    /// Consume self and create a new counter for recording total output batches
+    pub fn output_batches(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::OutputBatches(count.clone()));
+        count
+    }
+
     /// Consume self and create a new gauge for reporting current memory usage
     pub fn mem_used(self, partition: usize) -> Gauge {
         let gauge = Gauge::new();
@@ -275,7 +283,17 @@ impl<'a> MetricBuilder<'a> {
         name: impl Into<Cow<'static, str>>,
         partition: usize,
     ) -> RatioMetrics {
-        let ratio_metrics = RatioMetrics::new();
+        self.ratio_metrics_with_strategy(name, partition, RatioMergeStrategy::default())
+    }
+
+    /// Consumes self and creates a new [`RatioMetrics`] with a specific merge strategy
+    pub fn ratio_metrics_with_strategy(
+        self,
+        name: impl Into<Cow<'static, str>>,
+        partition: usize,
+        merge_strategy: RatioMergeStrategy,
+    ) -> RatioMetrics {
+        let ratio_metrics = RatioMetrics::new().with_merge_strategy(merge_strategy);
         self.with_partition(partition).build(MetricValue::Ratio {
             name: name.into(),
             ratio_metrics: ratio_metrics.clone(),
diff --git a/datafusion/physical-plan/src/metrics/custom.rs b/datafusion/physical-expr-common/src/metrics/custom.rs
similarity index 98%
rename from datafusion/physical-plan/src/metrics/custom.rs
rename to datafusion/physical-expr-common/src/metrics/custom.rs
index 4421db94dc179..0bd7ba1b10a25 100644
--- a/datafusion/physical-plan/src/metrics/custom.rs
+++ b/datafusion/physical-expr-common/src/metrics/custom.rs
@@ -44,7 +44,7 @@ use std::{any::Any, fmt::Debug, fmt::Display, sync::Arc};
 /// # use std::any::Any;
 /// # use std::sync::atomic::{AtomicUsize, Ordering};
 ///
-/// # use datafusion_physical_plan::metrics::CustomMetricValue;
+/// # use datafusion_physical_expr_common::metrics::CustomMetricValue;
 ///
 /// #[derive(Debug, Default)]
 /// struct MyCounter {
diff --git a/datafusion/physical-expr-common/src/metrics/expression.rs b/datafusion/physical-expr-common/src/metrics/expression.rs
new file mode 100644
index 0000000000000..4a092b0d1b522
--- /dev/null
+++ b/datafusion/physical-expr-common/src/metrics/expression.rs
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Metrics helpers for expression evaluation.
+
+use super::{ExecutionPlanMetricsSet, MetricBuilder, MetricType, ScopedTimerGuard, Time};
+
+/// Tracks evaluation time for a sequence of expressions.
+///
+/// # Example
+/// Given SQL query:
+///     EXPLAIN ANALYZE
+///     SELECT a+1, pow(a,2)
+///     FROM generate_series(1, 1000000) as t1(a)
+///
+/// This struct holds two time metrics for the projection expressions
+/// `a+1` and `pow(a,2)`, respectively.
+///
+/// The output reads:
+/// `ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[... expr_0_eval_time=9.23ms, expr_1_eval_time=32.35ms...]`
+#[derive(Debug, Clone)]
+pub struct ExpressionEvaluatorMetrics {
+    expression_times: Vec<Time>,
+}
+
+impl ExpressionEvaluatorMetrics {
+    /// Create metrics for a collection of expressions.
+    ///
+    /// # Args
+    /// - metrics: see `MetricBuilder` for details.
+    /// - partition: see `MetricBuilder` for details.
+    /// - expression_labels: unique identifier for each metric, so that the metric
+    ///   can get aggregated across multiple partitions. It is not the name showed
+    ///   in the `EXPLAIN ANALYZE`, the metric name will be `expr_{idx}_eval_time`
+    ///   according to the expression order.
+    pub fn new<T>(
+        metrics: &ExecutionPlanMetricsSet,
+        partition: usize,
+        expression_labels: impl IntoIterator<Item = T>,
+    ) -> Self
+    where
+        T: Into<String>,
+    {
+        let expression_times = expression_labels
+            .into_iter()
+            .enumerate()
+            .map(|(idx, label)| {
+                MetricBuilder::new(metrics)
+                    .with_new_label("expr", label.into())
+                    .with_type(MetricType::DEV)
+                    // Existing PhysicalExpr formatter is a bit verbose, so use simple name
+                    .subset_time(format!("expr_{idx}_eval_time"), partition)
+            })
+            .collect();
+
+        Self { expression_times }
+    }
+
+    /// Returns a timer guard for the expression at `index`, if present.
+    #[inline]
+    pub fn scoped_timer(&self, index: usize) -> Option<ScopedTimerGuard<'_>> {
+        self.expression_times.get(index).map(Time::timer)
+    }
+
+    /// The number of tracked expressions.
+    pub fn len(&self) -> usize {
+        self.expression_times.len()
+    }
+
+    /// True when no expressions are tracked.
+    pub fn is_empty(&self) -> bool {
+        self.expression_times.is_empty()
+    }
+}
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-expr-common/src/metrics/mod.rs
similarity index 96%
rename from datafusion/physical-plan/src/metrics/mod.rs
rename to datafusion/physical-expr-common/src/metrics/mod.rs
index 4e98af722d4e0..18dafa41276d9 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-expr-common/src/metrics/mod.rs
@@ -20,8 +20,10 @@
 mod baseline;
 mod builder;
 mod custom;
+mod expression;
 mod value;
 
+use datafusion_common::HashMap;
 use parking_lot::Mutex;
 use std::{
     borrow::Cow,
@@ -29,26 +31,25 @@ use std::{
     sync::Arc,
 };
 
-use datafusion_common::HashMap;
-
 // public exports
+
 pub use baseline::{BaselineMetrics, RecordOutput, SpillMetrics, SplitMetrics};
 pub use builder::MetricBuilder;
 pub use custom::CustomMetricValue;
+pub use expression::ExpressionEvaluatorMetrics;
 pub use value::{
-    Count, Gauge, MetricValue, PruningMetrics, RatioMetrics, ScopedTimerGuard, Time,
-    Timestamp,
+    Count, Gauge, MetricValue, PruningMetrics, RatioMergeStrategy, RatioMetrics,
+    ScopedTimerGuard, Time, Timestamp,
 };
 
-/// Something that tracks a value of interest (metric) of a DataFusion
-/// [`ExecutionPlan`] execution.
+/// Something that tracks a value of interest (metric) during execution.
 ///
 /// Typically [`Metric`]s are not created directly, but instead
 /// are created using [`MetricBuilder`] or methods on
 /// [`ExecutionPlanMetricsSet`].
 ///
 /// ```
-/// use datafusion_physical_plan::metrics::*;
+/// use datafusion_physical_expr_common::metrics::*;
 ///
 /// let metrics = ExecutionPlanMetricsSet::new();
 /// assert!(metrics.clone_inner().output_rows().is_none());
@@ -66,8 +67,6 @@ pub use value::{
 /// // As well as from the metrics set
 /// assert_eq!(metrics.clone_inner().output_rows(), Some(13));
 /// ```
-///
-/// [`ExecutionPlan`]: super::ExecutionPlan
 
 #[derive(Debug)]
 pub struct Metric {
@@ -204,9 +203,7 @@ impl Metric {
     }
 }
 
-/// A snapshot of the metrics for a particular ([`ExecutionPlan`]).
-///
-/// [`ExecutionPlan`]: super::ExecutionPlan
+/// A snapshot of the metrics for a particular execution plan.
 #[derive(Default, Debug, Clone)]
 pub struct MetricsSet {
     metrics: Vec<Arc<Metric>>,
@@ -299,6 +296,7 @@ impl MetricsSet {
             MetricValue::SpillCount(_) => false,
             MetricValue::SpilledBytes(_) => false,
             MetricValue::OutputBytes(_) => false,
+            MetricValue::OutputBatches(_) => false,
             MetricValue::SpilledRows(_) => false,
             MetricValue::CurrentMemoryUsage(_) => false,
             MetricValue::Gauge { name, .. } => name == metric_name,
@@ -400,17 +398,14 @@ impl Display for MetricsSet {
     }
 }
 
-/// A set of [`Metric`]s for an individual "operator" (e.g. `&dyn
-/// ExecutionPlan`).
+/// A set of [`Metric`]s for an individual operator.
 ///
-/// This structure is intended as a convenience for [`ExecutionPlan`]
+/// This structure is intended as a convenience for execution plan
 /// implementations so they can generate different streams for multiple
 /// partitions but easily report them together.
 ///
 /// Each `clone()` of this structure will add metrics to the same
 /// underlying metrics set
-///
-/// [`ExecutionPlan`]: super::ExecutionPlan
 #[derive(Default, Debug, Clone)]
 pub struct ExecutionPlanMetricsSet {
     inner: Arc<Mutex<MetricsSet>>,
@@ -741,9 +736,15 @@ mod tests {
             n.join(", ")
         }
 
-        assert_eq!("end_timestamp, start_timestamp, elapsed_compute, the_second_counter, the_counter, the_third_counter, the_time, output_rows", metric_names(&metrics));
+        assert_eq!(
+            "end_timestamp, start_timestamp, elapsed_compute, the_second_counter, the_counter, the_third_counter, the_time, output_rows",
+            metric_names(&metrics)
+        );
 
         let metrics = metrics.sorted_for_display();
-        assert_eq!("output_rows, elapsed_compute, the_counter, the_second_counter, the_third_counter, the_time, start_timestamp, end_timestamp", metric_names(&metrics));
+        assert_eq!(
+            "output_rows, elapsed_compute, the_counter, the_second_counter, the_third_counter, the_time, start_timestamp, end_timestamp",
+            metric_names(&metrics)
+        );
     }
 }
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-expr-common/src/metrics/value.rs
similarity index 80%
rename from datafusion/physical-plan/src/metrics/value.rs
rename to datafusion/physical-expr-common/src/metrics/value.rs
index 298d63e5e216a..9a14b804a20b5 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-expr-common/src/metrics/value.rs
@@ -19,15 +19,16 @@
 
 use super::CustomMetricValue;
 use chrono::{DateTime, Utc};
-use datafusion_common::instant::Instant;
-use datafusion_execution::memory_pool::human_readable_size;
+use datafusion_common::{
+    human_readable_count, human_readable_duration, human_readable_size, instant::Instant,
+};
 use parking_lot::Mutex;
 use std::{
     borrow::{Borrow, Cow},
     fmt::{Debug, Display},
     sync::{
-        atomic::{AtomicUsize, Ordering},
         Arc,
+        atomic::{AtomicUsize, Ordering},
     },
     time::Duration,
 };
@@ -49,7 +50,7 @@ impl PartialEq for Count {
 
 impl Display for Count {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.value())
+        write!(f, "{}", human_readable_count(self.value()))
     }
 }
 
@@ -169,8 +170,7 @@ impl PartialEq for Time {
 
 impl Display for Time {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        let duration = Duration::from_nanos(self.value() as u64);
-        write!(f, "{duration:?}")
+        write!(f, "{}", human_readable_duration(self.value() as u64))
     }
 }
 
@@ -379,7 +379,12 @@ impl Display for PruningMetrics {
         let matched = self.matched.load(Ordering::Relaxed);
         let total = self.pruned.load(Ordering::Relaxed) + matched;
 
-        write!(f, "{total} total → {matched} matched")
+        write!(
+            f,
+            "{} total → {} matched",
+            human_readable_count(total),
+            human_readable_count(matched)
+        )
     }
 }
 
@@ -437,6 +442,15 @@ impl PruningMetrics {
 pub struct RatioMetrics {
     part: Arc<AtomicUsize>,
     total: Arc<AtomicUsize>,
+    merge_strategy: RatioMergeStrategy,
+}
+
+#[derive(Debug, Clone, Default)]
+pub enum RatioMergeStrategy {
+    #[default]
+    AddPartAddTotal,
+    AddPartSetTotal,
+    SetPartAddTotal,
 }
 
 impl RatioMetrics {
@@ -445,9 +459,15 @@ impl RatioMetrics {
         Self {
             part: Arc::new(AtomicUsize::new(0)),
             total: Arc::new(AtomicUsize::new(0)),
+            merge_strategy: RatioMergeStrategy::AddPartAddTotal,
         }
     }
 
+    pub fn with_merge_strategy(mut self, merge_strategy: RatioMergeStrategy) -> Self {
+        self.merge_strategy = merge_strategy;
+        self
+    }
+
     /// Add `n` to the numerator (`part`) value
     pub fn add_part(&self, n: usize) {
         self.part.fetch_add(n, Ordering::Relaxed);
@@ -458,10 +478,32 @@ impl RatioMetrics {
         self.total.fetch_add(n, Ordering::Relaxed);
     }
 
+    /// Set the numerator (`part`) value to `n`, overwriting any existing value
+    pub fn set_part(&self, n: usize) {
+        self.part.store(n, Ordering::Relaxed);
+    }
+
+    /// Set the denominator (`total`) value to `n`, overwriting any existing value
+    pub fn set_total(&self, n: usize) {
+        self.total.store(n, Ordering::Relaxed);
+    }
+
     /// Merge the value from `other` into `self`
     pub fn merge(&self, other: &Self) {
-        self.add_part(other.part());
-        self.add_total(other.total());
+        match self.merge_strategy {
+            RatioMergeStrategy::AddPartAddTotal => {
+                self.add_part(other.part());
+                self.add_total(other.total());
+            }
+            RatioMergeStrategy::AddPartSetTotal => {
+                self.add_part(other.part());
+                self.set_total(other.total());
+            }
+            RatioMergeStrategy::SetPartAddTotal => {
+                self.set_part(other.part());
+                self.add_total(other.total());
+            }
+        }
     }
 
     /// Return the numerator (`part`) value
@@ -506,12 +548,18 @@ impl Display for RatioMetrics {
             if part == 0 {
                 write!(f, "N/A (0/0)")
             } else {
-                write!(f, "N/A ({part}/0)")
+                write!(f, "N/A ({}/0)", human_readable_count(part))
             }
         } else {
             let percentage = (part as f64 / total as f64) * 100.0;
 
-            write!(f, "{}% ({part}/{total})", fmt_significant(percentage, 2))
+            write!(
+                f,
+                "{}% ({}/{})",
+                fmt_significant(percentage, 2),
+                human_readable_count(part),
+                human_readable_count(total)
+            )
         }
     }
 }
@@ -551,6 +599,8 @@ pub enum MetricValue {
     SpilledBytes(Count),
     /// Total size of output bytes produced: "output_bytes" metric
     OutputBytes(Count),
+    /// Total number of output batches produced: "output_batches" metric
+    OutputBatches(Count),
     /// Total size of spilled rows produced: "spilled_rows" metric
     SpilledRows(Count),
     /// Current memory used
@@ -618,6 +668,9 @@ impl PartialEq for MetricValue {
             (MetricValue::OutputBytes(count), MetricValue::OutputBytes(other)) => {
                 count == other
             }
+            (MetricValue::OutputBatches(count), MetricValue::OutputBatches(other)) => {
+                count == other
+            }
             (MetricValue::SpilledRows(count), MetricValue::SpilledRows(other)) => {
                 count == other
             }
@@ -699,6 +752,7 @@ impl MetricValue {
             Self::SpillCount(_) => "spill_count",
             Self::SpilledBytes(_) => "spilled_bytes",
             Self::OutputBytes(_) => "output_bytes",
+            Self::OutputBatches(_) => "output_batches",
             Self::SpilledRows(_) => "spilled_rows",
             Self::CurrentMemoryUsage(_) => "mem_used",
             Self::ElapsedCompute(_) => "elapsed_compute",
@@ -721,6 +775,7 @@ impl MetricValue {
             Self::SpillCount(count) => count.value(),
             Self::SpilledBytes(bytes) => bytes.value(),
             Self::OutputBytes(bytes) => bytes.value(),
+            Self::OutputBatches(count) => count.value(),
             Self::SpilledRows(count) => count.value(),
             Self::CurrentMemoryUsage(used) => used.value(),
             Self::ElapsedCompute(time) => time.value(),
@@ -755,6 +810,7 @@ impl MetricValue {
             Self::SpillCount(_) => Self::SpillCount(Count::new()),
             Self::SpilledBytes(_) => Self::SpilledBytes(Count::new()),
             Self::OutputBytes(_) => Self::OutputBytes(Count::new()),
+            Self::OutputBatches(_) => Self::OutputBatches(Count::new()),
             Self::SpilledRows(_) => Self::SpilledRows(Count::new()),
             Self::CurrentMemoryUsage(_) => Self::CurrentMemoryUsage(Gauge::new()),
             Self::ElapsedCompute(_) => Self::ElapsedCompute(Time::new()),
@@ -776,10 +832,17 @@ impl MetricValue {
                 name: name.clone(),
                 pruning_metrics: PruningMetrics::new(),
             },
-            Self::Ratio { name, .. } => Self::Ratio {
-                name: name.clone(),
-                ratio_metrics: RatioMetrics::new(),
-            },
+            Self::Ratio {
+                name,
+                ratio_metrics,
+            } => {
+                let merge_strategy = ratio_metrics.merge_strategy.clone();
+                Self::Ratio {
+                    name: name.clone(),
+                    ratio_metrics: RatioMetrics::new()
+                        .with_merge_strategy(merge_strategy),
+                }
+            }
             Self::Custom { name, value } => Self::Custom {
                 name: name.clone(),
                 value: value.new_empty(),
@@ -802,6 +865,7 @@ impl MetricValue {
             | (Self::SpillCount(count), Self::SpillCount(other_count))
             | (Self::SpilledBytes(count), Self::SpilledBytes(other_count))
             | (Self::OutputBytes(count), Self::OutputBytes(other_count))
+            | (Self::OutputBatches(count), Self::OutputBatches(other_count))
             | (Self::SpilledRows(count), Self::SpilledRows(other_count))
             | (
                 Self::Count { count, .. },
@@ -879,6 +943,7 @@ impl MetricValue {
             Self::OutputRows(_) => 0,
             Self::ElapsedCompute(_) => 1,
             Self::OutputBytes(_) => 2,
+            Self::OutputBatches(_) => 3,
             // Other metrics
             Self::PruningMetrics { name, .. } => match name.as_ref() {
                 // The following metrics belong to `DataSourceExec` with a Parquet data source.
@@ -888,23 +953,23 @@ impl MetricValue {
                 // You may update these metrics as long as their relative order remains unchanged.
                 //
                 // Reference PR: <https://github.com/apache/datafusion/pull/18379>
-                "files_ranges_pruned_statistics" => 3,
-                "row_groups_pruned_statistics" => 4,
-                "row_groups_pruned_bloom_filter" => 5,
-                "page_index_rows_pruned" => 6,
-                _ => 7,
+                "files_ranges_pruned_statistics" => 4,
+                "row_groups_pruned_statistics" => 5,
+                "row_groups_pruned_bloom_filter" => 6,
+                "page_index_rows_pruned" => 7,
+                _ => 8,
             },
-            Self::SpillCount(_) => 8,
-            Self::SpilledBytes(_) => 9,
-            Self::SpilledRows(_) => 10,
-            Self::CurrentMemoryUsage(_) => 11,
-            Self::Count { .. } => 12,
-            Self::Gauge { .. } => 13,
-            Self::Time { .. } => 14,
-            Self::Ratio { .. } => 15,
-            Self::StartTimestamp(_) => 16, // show timestamps last
-            Self::EndTimestamp(_) => 17,
-            Self::Custom { .. } => 18,
+            Self::SpillCount(_) => 9,
+            Self::SpilledBytes(_) => 10,
+            Self::SpilledRows(_) => 11,
+            Self::CurrentMemoryUsage(_) => 12,
+            Self::Count { .. } => 13,
+            Self::Gauge { .. } => 14,
+            Self::Time { .. } => 15,
+            Self::Ratio { .. } => 16,
+            Self::StartTimestamp(_) => 17, // show timestamps last
+            Self::EndTimestamp(_) => 18,
+            Self::Custom { .. } => 19,
         }
     }
 
@@ -919,6 +984,7 @@ impl Display for MetricValue {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         match self {
             Self::OutputRows(count)
+            | Self::OutputBatches(count)
             | Self::SpillCount(count)
             | Self::SpilledRows(count)
             | Self::Count { count, .. } => {
@@ -928,8 +994,14 @@ impl Display for MetricValue {
                 let readable_count = human_readable_size(count.value());
                 write!(f, "{readable_count}")
             }
-            Self::CurrentMemoryUsage(gauge) | Self::Gauge { gauge, .. } => {
-                write!(f, "{gauge}")
+            Self::CurrentMemoryUsage(gauge) => {
+                // CurrentMemoryUsage is in bytes, format like SpilledBytes
+                let readable_size = human_readable_size(gauge.value());
+                write!(f, "{readable_size}")
+            }
+            Self::Gauge { gauge, .. } => {
+                // Generic gauge metrics - format with human-readable count
+                write!(f, "{}", human_readable_count(gauge.value()))
             }
             Self::ElapsedCompute(time) | Self::Time { time, .. } => {
                 // distinguish between no time recorded and very small
@@ -961,7 +1033,7 @@ mod tests {
     use std::any::Any;
 
     use chrono::TimeZone;
-    use datafusion_execution::memory_pool::units::MB;
+    use datafusion_common::units::MB;
 
     use super::*;
 
@@ -1003,12 +1075,11 @@ mod tests {
     fn new_custom_counter(name: &'static str, value: usize) -> MetricValue {
         let custom_counter = CustomCounter::default();
         custom_counter.count.fetch_add(value, Ordering::Relaxed);
-        let custom_val = MetricValue::Custom {
+
+        MetricValue::Custom {
             name: Cow::Borrowed(name),
             value: Arc::new(custom_counter),
-        };
-
-        custom_val
+        }
     }
 
     #[test]
@@ -1099,7 +1170,7 @@ mod tests {
 
         time.add_duration(Duration::from_nanos(1042));
         for value in &values {
-            assert_eq!("1.042µs", value.to_string(), "value {value:?}");
+            assert_eq!("1.04µs", value.to_string(), "value {value:?}");
         }
     }
 
@@ -1126,7 +1197,68 @@ mod tests {
         };
         tiny_ratio_metrics.add_part(1);
         tiny_ratio_metrics.add_total(3000);
-        assert_eq!("0.033% (1/3000)", tiny_ratio.to_string());
+        assert_eq!("0.033% (1/3.00 K)", tiny_ratio.to_string());
+    }
+
+    #[test]
+    fn test_ratio_set_methods() {
+        let ratio_metrics = RatioMetrics::new();
+
+        // Ensure set methods don't increment
+        ratio_metrics.set_part(10);
+        ratio_metrics.set_part(10);
+        ratio_metrics.set_total(40);
+        ratio_metrics.set_total(40);
+        assert_eq!("25% (10/40)", ratio_metrics.to_string());
+
+        let ratio_metrics = RatioMetrics::new();
+
+        // Calling set should change the value
+        ratio_metrics.set_part(10);
+        ratio_metrics.set_part(30);
+        ratio_metrics.set_total(40);
+        ratio_metrics.set_total(50);
+        assert_eq!("60% (30/50)", ratio_metrics.to_string());
+    }
+
+    #[test]
+    fn test_ratio_merge_strategy() {
+        // Test AddPartSetTotal strategy
+        let ratio_metrics1 =
+            RatioMetrics::new().with_merge_strategy(RatioMergeStrategy::AddPartSetTotal);
+
+        ratio_metrics1.set_part(10);
+        ratio_metrics1.set_total(40);
+        assert_eq!("25% (10/40)", ratio_metrics1.to_string());
+        let ratio_metrics2 =
+            RatioMetrics::new().with_merge_strategy(RatioMergeStrategy::AddPartSetTotal);
+        ratio_metrics2.set_part(20);
+        ratio_metrics2.set_total(40);
+        assert_eq!("50% (20/40)", ratio_metrics2.to_string());
+
+        ratio_metrics1.merge(&ratio_metrics2);
+        assert_eq!("75% (30/40)", ratio_metrics1.to_string());
+
+        // Test SetPartAddTotal strategy
+        let ratio_metrics1 =
+            RatioMetrics::new().with_merge_strategy(RatioMergeStrategy::SetPartAddTotal);
+        ratio_metrics1.set_part(20);
+        ratio_metrics1.set_total(50);
+        let ratio_metrics2 = RatioMetrics::new();
+        ratio_metrics2.set_part(20);
+        ratio_metrics2.set_total(50);
+        ratio_metrics1.merge(&ratio_metrics2);
+        assert_eq!("20% (20/100)", ratio_metrics1.to_string());
+
+        // Test AddPartAddTotal strategy (default)
+        let ratio_metrics1 = RatioMetrics::new();
+        ratio_metrics1.set_part(20);
+        ratio_metrics1.set_total(50);
+        let ratio_metrics2 = RatioMetrics::new();
+        ratio_metrics2.set_part(20);
+        ratio_metrics2.set_total(50);
+        ratio_metrics1.merge(&ratio_metrics2);
+        assert_eq!("40% (40/100)", ratio_metrics1.to_string());
     }
 
     #[test]
@@ -1246,4 +1378,104 @@ mod tests {
             "Expected ~10ms total, got {new_recorded} ns",
         );
     }
+
+    #[test]
+    fn test_human_readable_metric_formatting() {
+        // Test Count formatting with various sizes
+        let small_count = Count::new();
+        small_count.add(42);
+        assert_eq!(
+            MetricValue::OutputRows(small_count.clone()).to_string(),
+            "42"
+        );
+
+        let thousand_count = Count::new();
+        thousand_count.add(10_100);
+        assert_eq!(
+            MetricValue::OutputRows(thousand_count.clone()).to_string(),
+            "10.10 K"
+        );
+
+        let million_count = Count::new();
+        million_count.add(1_532_000);
+        assert_eq!(
+            MetricValue::SpilledRows(million_count.clone()).to_string(),
+            "1.53 M"
+        );
+
+        let billion_count = Count::new();
+        billion_count.add(2_500_000_000);
+        assert_eq!(
+            MetricValue::OutputBatches(billion_count.clone()).to_string(),
+            "2.50 B"
+        );
+
+        // Test Time formatting with various durations
+        let micros_time = Time::new();
+        micros_time.add_duration(Duration::from_nanos(1_234));
+        assert_eq!(
+            MetricValue::ElapsedCompute(micros_time.clone()).to_string(),
+            "1.23µs"
+        );
+
+        let millis_time = Time::new();
+        millis_time.add_duration(Duration::from_nanos(11_295_377));
+        assert_eq!(
+            MetricValue::ElapsedCompute(millis_time.clone()).to_string(),
+            "11.30ms"
+        );
+
+        let seconds_time = Time::new();
+        seconds_time.add_duration(Duration::from_nanos(1_234_567_890));
+        assert_eq!(
+            MetricValue::ElapsedCompute(seconds_time.clone()).to_string(),
+            "1.23s"
+        );
+
+        // Test CurrentMemoryUsage formatting (should use size, not count)
+        let mem_gauge = Gauge::new();
+        mem_gauge.add(100 * MB as usize);
+        assert_eq!(
+            MetricValue::CurrentMemoryUsage(mem_gauge.clone()).to_string(),
+            "100.0 MB"
+        );
+
+        // Test custom Gauge formatting (should use count)
+        let custom_gauge = Gauge::new();
+        custom_gauge.add(50_000);
+        assert_eq!(
+            MetricValue::Gauge {
+                name: "custom".into(),
+                gauge: custom_gauge.clone()
+            }
+            .to_string(),
+            "50.00 K"
+        );
+
+        // Test PruningMetrics formatting
+        let pruning = PruningMetrics::new();
+        pruning.add_matched(500_000);
+        pruning.add_pruned(500_000);
+        assert_eq!(
+            MetricValue::PruningMetrics {
+                name: "test_pruning".into(),
+                pruning_metrics: pruning.clone()
+            }
+            .to_string(),
+            "1.00 M total → 500.0 K matched"
+        );
+
+        // Test RatioMetrics formatting
+        let ratio = RatioMetrics::new();
+        ratio.add_part(250_000);
+        ratio.add_total(1_000_000);
+        assert_eq!(
+            MetricValue::Ratio {
+                name: "test_ratio".into(),
+                ratio_metrics: ratio.clone()
+            }
+            .to_string(),
+            "25% (250.0 K/1.00 M)"
+        );
+    }
 }
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index e8280409c0279..2358a21940912 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -23,14 +23,16 @@ use std::sync::Arc;
 
 use crate::utils::scatter;
 
-use arrow::array::{new_empty_array, ArrayRef, BooleanArray};
+use arrow::array::{ArrayRef, BooleanArray, new_empty_array};
 use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, Field, FieldRef, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{
+    Result, ScalarValue, assert_eq_or_internal_err, exec_err, not_impl_err,
+};
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
 use datafusion_expr_common::sort_properties::ExprProperties;
@@ -103,7 +105,10 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash {
     ) -> Result<ColumnarValue> {
         let row_count = batch.num_rows();
         if row_count != selection.len() {
-            return exec_err!("Selection array length does not match batch row count: {} != {row_count}", selection.len());
+            return exec_err!(
+                "Selection array length does not match batch row count: {} != {row_count}",
+                selection.len()
+            );
         }
 
         let selection_count = selection.true_count();
@@ -247,9 +252,9 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash {
         let output_interval = self.evaluate_bounds(children_ranges_refs.as_slice())?;
         let dt = output_interval.data_type();
         if dt.eq(&DataType::Boolean) {
-            let p = if output_interval.eq(&Interval::CERTAINLY_TRUE) {
+            let p = if output_interval.eq(&Interval::TRUE) {
                 ScalarValue::new_one(&dt)
-            } else if output_interval.eq(&Interval::CERTAINLY_FALSE) {
+            } else if output_interval.eq(&Interval::FALSE) {
                 ScalarValue::new_zero(&dt)
             } else {
                 ScalarValue::try_from(&dt)
@@ -309,9 +314,9 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash {
                     Ok((*child).clone())
                 } else if new_interval.data_type().eq(&DataType::Boolean) {
                     let dt = old_interval.data_type();
-                    let p = if new_interval.eq(&Interval::CERTAINLY_TRUE) {
+                    let p = if new_interval.eq(&Interval::TRUE) {
                         ScalarValue::new_one(&dt)
-                    } else if new_interval.eq(&Interval::CERTAINLY_FALSE) {
+                    } else if new_interval.eq(&Interval::FALSE) {
                         ScalarValue::new_zero(&dt)
                     } else {
                         unreachable!("Given that we have a range reduction for a boolean interval, we should have certainty")
@@ -453,9 +458,13 @@ pub fn with_new_children_if_necessary(
     children: Vec<Arc<dyn PhysicalExpr>>,
 ) -> Result<Arc<dyn PhysicalExpr>> {
     let old_children = expr.children();
-    if children.len() != old_children.len() {
-        internal_err!("PhysicalExpr: Wrong number of children")
-    } else if children.is_empty()
+    assert_eq_or_internal_err!(
+        children.len(),
+        old_children.len(),
+        "PhysicalExpr: Wrong number of children"
+    );
+
+    if children.is_empty()
         || children
             .iter()
             .zip(old_children.iter())
@@ -570,6 +579,25 @@ pub fn fmt_sql(expr: &dyn PhysicalExpr) -> impl Display + '_ {
 pub fn snapshot_physical_expr(
     expr: Arc<dyn PhysicalExpr>,
 ) -> Result<Arc<dyn PhysicalExpr>> {
+    snapshot_physical_expr_opt(expr).data()
+}
+
+/// Take a snapshot of the given `PhysicalExpr` if it is dynamic.
+///
+/// Take a snapshot of this `PhysicalExpr` if it is dynamic.
+/// This is used to capture the current state of `PhysicalExpr`s that may contain
+/// dynamic references to other operators in order to serialize it over the wire
+/// or treat it via downcast matching.
+///
+/// See the documentation of [`PhysicalExpr::snapshot`] for more details.
+///
+/// # Returns
+///
+/// Returns a `[`Transformed`] indicating whether a snapshot was taken,
+/// along with the resulting `PhysicalExpr`.
+pub fn snapshot_physical_expr_opt(
+    expr: Arc<dyn PhysicalExpr>,
+) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
     expr.transform_up(|e| {
         if let Some(snapshot) = e.snapshot()? {
             Ok(Transformed::yes(snapshot))
@@ -577,7 +605,6 @@ pub fn snapshot_physical_expr(
             Ok(Transformed::no(Arc::clone(&e)))
         }
     })
-    .data()
 }
 
 /// Check the generation of this `PhysicalExpr`.
@@ -777,44 +804,44 @@ mod test {
     #[test]
     pub fn test_evaluate_selection_with_non_empty_record_batch() {
         test_evaluate_selection(
-            unsafe { &RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
+            &unsafe { RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
             &BooleanArray::from(vec![true; 10]),
             &ColumnarValue::Array(Arc::new(Int64Array::from(vec![1; 10]))),
         );
     }
 
     #[test]
-    pub fn test_evaluate_selection_with_non_empty_record_batch_with_larger_false_selection(
-    ) {
+    pub fn test_evaluate_selection_with_non_empty_record_batch_with_larger_false_selection()
+     {
         test_evaluate_selection_error(
-            unsafe { &RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
+            &unsafe { RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
             &BooleanArray::from(vec![false; 20]),
         );
     }
 
     #[test]
-    pub fn test_evaluate_selection_with_non_empty_record_batch_with_larger_true_selection(
-    ) {
+    pub fn test_evaluate_selection_with_non_empty_record_batch_with_larger_true_selection()
+     {
         test_evaluate_selection_error(
-            unsafe { &RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
+            &unsafe { RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
             &BooleanArray::from(vec![true; 20]),
         );
     }
 
     #[test]
-    pub fn test_evaluate_selection_with_non_empty_record_batch_with_smaller_false_selection(
-    ) {
+    pub fn test_evaluate_selection_with_non_empty_record_batch_with_smaller_false_selection()
+     {
         test_evaluate_selection_error(
-            unsafe { &RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
+            &unsafe { RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
             &BooleanArray::from(vec![false; 5]),
         );
     }
 
     #[test]
-    pub fn test_evaluate_selection_with_non_empty_record_batch_with_smaller_true_selection(
-    ) {
+    pub fn test_evaluate_selection_with_non_empty_record_batch_with_smaller_true_selection()
+     {
         test_evaluate_selection_error(
-            unsafe { &RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
+            &unsafe { RecordBatch::new_unchecked(Arc::new(Schema::empty()), vec![], 10) },
             &BooleanArray::from(vec![true; 5]),
         );
     }
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs
index d19d7024a516e..db30dd6ed26e2 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -24,7 +24,7 @@ use std::ops::{Deref, DerefMut};
 use std::sync::Arc;
 use std::vec::IntoIter;
 
-use crate::physical_expr::{fmt_sql, PhysicalExpr};
+use crate::physical_expr::{PhysicalExpr, fmt_sql};
 
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::datatypes::Schema;
@@ -426,6 +426,73 @@ impl LexOrdering {
         self.exprs.truncate(len);
         true
     }
+
+    /// Check if reversing this ordering would satisfy another ordering requirement.
+    ///
+    /// This supports **prefix matching**: if this ordering is `[A DESC, B ASC]`
+    /// and `other` is `[A ASC]`, reversing this gives `[A ASC, B DESC]`, which
+    /// satisfies `other` since `[A ASC]` is a prefix.
+    ///
+    /// # Arguments
+    /// * `other` - The ordering requirement to check against
+    ///
+    /// # Returns
+    /// `true` if reversing this ordering would satisfy `other`
+    ///
+    /// # Example
+    /// ```text
+    /// self:  [number DESC, letter ASC]
+    /// other: [number ASC]
+    /// After reversing self: [number ASC, letter DESC]  ✓ Prefix match!
+    /// ```
+    pub fn is_reverse(&self, other: &LexOrdering) -> bool {
+        let self_exprs = self.as_ref();
+        let other_exprs = other.as_ref();
+
+        if other_exprs.len() > self_exprs.len() {
+            return false;
+        }
+
+        other_exprs.iter().zip(self_exprs.iter()).all(|(req, cur)| {
+            req.expr.eq(&cur.expr) && is_reversed_sort_options(&req.options, &cur.options)
+        })
+    }
+
+    /// Returns the sort options for the given expression if one is defined in this `LexOrdering`.
+    pub fn get_sort_options(&self, expr: &dyn PhysicalExpr) -> Option<SortOptions> {
+        for e in self {
+            if e.expr.as_ref().dyn_eq(expr) {
+                return Some(e.options);
+            }
+        }
+
+        None
+    }
+}
+
+/// Check if two SortOptions represent reversed orderings.
+///
+/// Returns `true` if both `descending` and `nulls_first` are opposite.
+///
+/// # Example
+/// ```
+/// use arrow::compute::SortOptions;
+/// # use datafusion_physical_expr_common::sort_expr::is_reversed_sort_options;
+///
+/// let asc_nulls_last = SortOptions {
+///     descending: false,
+///     nulls_first: false,
+/// };
+/// let desc_nulls_first = SortOptions {
+///     descending: true,
+///     nulls_first: true,
+/// };
+///
+/// assert!(is_reversed_sort_options(&asc_nulls_last, &desc_nulls_first));
+/// assert!(is_reversed_sort_options(&desc_nulls_first, &asc_nulls_last));
+/// ```
+pub fn is_reversed_sort_options(lhs: &SortOptions, rhs: &SortOptions) -> bool {
+    lhs.descending != rhs.descending && lhs.nulls_first != rhs.nulls_first
 }
 
 impl PartialEq for LexOrdering {
@@ -732,3 +799,50 @@ impl DerefMut for OrderingRequirements {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_is_reversed_sort_options() {
+        // Test basic reversal: ASC NULLS LAST ↔ DESC NULLS FIRST
+        let asc_nulls_last = SortOptions {
+            descending: false,
+            nulls_first: false,
+        };
+        let desc_nulls_first = SortOptions {
+            descending: true,
+            nulls_first: true,
+        };
+        assert!(is_reversed_sort_options(&asc_nulls_last, &desc_nulls_first));
+        assert!(is_reversed_sort_options(&desc_nulls_first, &asc_nulls_last));
+
+        // Test another reversal: ASC NULLS FIRST ↔ DESC NULLS LAST
+        let asc_nulls_first = SortOptions {
+            descending: false,
+            nulls_first: true,
+        };
+        let desc_nulls_last = SortOptions {
+            descending: true,
+            nulls_first: false,
+        };
+        assert!(is_reversed_sort_options(&asc_nulls_first, &desc_nulls_last));
+        assert!(is_reversed_sort_options(&desc_nulls_last, &asc_nulls_first));
+
+        // Test non-reversal: same options
+        assert!(!is_reversed_sort_options(&asc_nulls_last, &asc_nulls_last));
+        assert!(!is_reversed_sort_options(
+            &desc_nulls_first,
+            &desc_nulls_first
+        ));
+
+        // Test non-reversal: only descending differs
+        assert!(!is_reversed_sort_options(&asc_nulls_last, &desc_nulls_last));
+        assert!(!is_reversed_sort_options(&desc_nulls_last, &asc_nulls_last));
+
+        // Test non-reversal: only nulls_first differs
+        assert!(!is_reversed_sort_options(&asc_nulls_last, &asc_nulls_first));
+        assert!(!is_reversed_sort_options(&asc_nulls_first, &asc_nulls_last));
+    }
+}
diff --git a/datafusion/physical-expr-common/src/tree_node.rs b/datafusion/physical-expr-common/src/tree_node.rs
index c37e67575bf00..6c7d04a22535f 100644
--- a/datafusion/physical-expr-common/src/tree_node.rs
+++ b/datafusion/physical-expr-common/src/tree_node.rs
@@ -20,10 +20,10 @@
 use std::fmt::{self, Display, Formatter};
 use std::sync::Arc;
 
-use crate::physical_expr::{with_new_children_if_necessary, PhysicalExpr};
+use crate::physical_expr::{PhysicalExpr, with_new_children_if_necessary};
 
-use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode};
 use datafusion_common::Result;
+use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode};
 
 impl DynTreeNode for dyn PhysicalExpr {
     fn arc_children(&self) -> Vec<&Arc<Self>> {
diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs
index 05b216ab75ebc..278294a9bf2ad 100644
--- a/datafusion/physical-expr-common/src/utils.rs
+++ b/datafusion/physical-expr-common/src/utils.rs
@@ -17,11 +17,13 @@
 
 use std::sync::Arc;
 
+use crate::metrics::ExpressionEvaluatorMetrics;
 use crate::physical_expr::PhysicalExpr;
 use crate::tree_node::ExprContext;
 
-use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
-use arrow::compute::{and_kleene, is_not_null, SlicesIterator};
+use arrow::array::{Array, ArrayRef, BooleanArray, MutableArrayData, make_array};
+use arrow::compute::{SlicesIterator, and_kleene, is_not_null};
+use arrow::record_batch::RecordBatch;
 use datafusion_common::Result;
 use datafusion_expr_common::sort_properties::ExprProperties;
 
@@ -91,6 +93,40 @@ pub fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result<ArrayRef> {
     Ok(make_array(data))
 }
 
+/// Evaluates expressions against a record batch.
+/// This will convert the resulting ColumnarValues to ArrayRefs,
+/// duplicating any ScalarValues that may have been returned,
+/// and validating that the returned arrays all have the same
+/// number of rows as the input batch.
+#[inline]
+pub fn evaluate_expressions_to_arrays<'a>(
+    exprs: impl IntoIterator<Item = &'a Arc<dyn PhysicalExpr>>,
+    batch: &RecordBatch,
+) -> Result<Vec<ArrayRef>> {
+    evaluate_expressions_to_arrays_with_metrics(exprs, batch, None)
+}
+
+/// Same as [`evaluate_expressions_to_arrays`] but records optional per-expression metrics.
+///
+/// For metrics tracking, see [`ExpressionEvaluatorMetrics`] for details.
+#[inline]
+pub fn evaluate_expressions_to_arrays_with_metrics<'a>(
+    exprs: impl IntoIterator<Item = &'a Arc<dyn PhysicalExpr>>,
+    batch: &RecordBatch,
+    metrics: Option<&ExpressionEvaluatorMetrics>,
+) -> Result<Vec<ArrayRef>> {
+    let num_rows = batch.num_rows();
+    exprs
+        .into_iter()
+        .enumerate()
+        .map(|(idx, e)| {
+            let _timer = metrics.and_then(|m| m.scoped_timer(idx));
+            e.evaluate(batch)
+                .and_then(|col| col.into_array_of_size(num_rows))
+        })
+        .collect::<Result<Vec<ArrayRef>>>()
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 953a46929c394..1b23beeaa37cc 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -40,6 +40,9 @@ workspace = true
 [lib]
 name = "datafusion_physical_expr"
 
+[features]
+recursive_protection = ["dep:recursive"]
+
 [dependencies]
 ahash = { workspace = true }
 arrow = { workspace = true }
@@ -48,13 +51,15 @@ datafusion-expr = { workspace = true }
 datafusion-expr-common = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
-half = { workspace = true }
 hashbrown = { workspace = true }
 indexmap = { workspace = true }
 itertools = { workspace = true, features = ["use_std"] }
 parking_lot = { workspace = true }
-paste = "^1.0"
+paste = { workspace = true }
 petgraph = "0.8.3"
+recursive = { workspace = true, optional = true }
+tokio = { workspace = true }
+half = { workspace = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
@@ -79,3 +84,6 @@ name = "is_null"
 [[bench]]
 harness = false
 name = "binary_op"
+
+[package.metadata.cargo-machete]
+ignored = ["half"]
diff --git a/datafusion/physical-expr/benches/binary_op.rs b/datafusion/physical-expr/benches/binary_op.rs
index 9bffd79dc00f9..99fc40fa1c91b 100644
--- a/datafusion/physical-expr/benches/binary_op.rs
+++ b/datafusion/physical-expr/benches/binary_op.rs
@@ -20,12 +20,12 @@ use arrow::{
     datatypes::{DataType, Field, Schema},
 };
 use arrow::{array::StringArray, record_batch::RecordBatch};
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion_expr::{and, binary_expr, col, lit, or, Operator};
+use criterion::{Criterion, criterion_group, criterion_main};
+use datafusion_expr::{Operator, and, binary_expr, col, lit, or};
 use datafusion_physical_expr::{
+    PhysicalExpr,
     expressions::{BinaryExpr, Column},
     planner::logical2physical,
-    PhysicalExpr,
 };
 use std::hint::black_box;
 use std::sync::Arc;
@@ -286,6 +286,7 @@ fn generate_test_strings(num_rows: usize) -> (Vec<String>, Vec<String>) {
 /// Creates record batches with boolean arrays that test different short-circuit scenarios.
 /// When TEST_ALL_FALSE = true: creates data for AND operator benchmarks (needs early false exit)
 /// When TEST_ALL_FALSE = false: creates data for OR operator benchmarks (needs early true exit)
+#[expect(clippy::needless_pass_by_value)]
 fn create_record_batch<const TEST_ALL_FALSE: bool>(
     schema: Arc<Schema>,
     b_values: &[String],
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 9ed6b58da7f7e..eb0886a31e8df 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -19,13 +19,13 @@ use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder, StringArray};
 use arrow::datatypes::{ArrowNativeTypeOp, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::test_util::seedable_rng;
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{case, col, lit, BinaryExpr};
+use datafusion_physical_expr::expressions::{BinaryExpr, case, col, lit};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use itertools::Itertools;
-use rand::distr::uniform::SampleUniform;
 use rand::distr::Alphanumeric;
+use rand::distr::uniform::SampleUniform;
 use rand::rngs::StdRng;
 use rand::{Rng, RngCore};
 use std::fmt::{Display, Formatter};
@@ -293,7 +293,7 @@ fn create_random_string_generator(
 /// `null_percentage` is the percentage of null values
 /// The rest of the values will be outside the specified range
 fn generate_values_for_lookup<T, A>(
-    options: Options<T>,
+    options: &Options<T>,
     generate_other_value: impl Fn(&mut StdRng, &[T]) -> T,
 ) -> A
 where
@@ -416,7 +416,7 @@ fn benchmark_lookup_table_case_when(c: &mut Criterion, batch_size: usize) {
                         &input,
                         |b, input| {
                             let array: Int32Array = generate_values_for_lookup(
-                                Options::<i32> {
+                                &Options::<i32> {
                                     number_of_rows: batch_size,
                                     range_of_values: when_thens_primitive_to_string
                                         .iter()
@@ -469,7 +469,7 @@ fn benchmark_lookup_table_case_when(c: &mut Criterion, batch_size: usize) {
                         &input,
                         |b, input| {
                             let array: StringArray = generate_values_for_lookup(
-                                Options::<String> {
+                                &Options::<String> {
                                     number_of_rows: batch_size,
                                     range_of_values: when_thens_string_to_primitive
                                         .iter()
diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs
index 778204055bbdd..954715d0e5a9c 100644
--- a/datafusion/physical-expr/benches/in_list.rs
+++ b/datafusion/physical-expr/benches/in_list.rs
@@ -15,17 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{Array, ArrayRef, Float32Array, Int32Array, StringArray};
+use arrow::array::{
+    Array, ArrayRef, Float32Array, Int16Array, Int32Array, StringArray, StringViewArray,
+    TimestampNanosecondArray, UInt8Array,
+};
 use arrow::datatypes::{Field, Schema};
 use arrow::record_batch::RecordBatch;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::ScalarValue;
 use datafusion_physical_expr::expressions::{col, in_list, lit};
 use rand::distr::Alphanumeric;
 use rand::prelude::*;
+use std::any::TypeId;
 use std::hint::black_box;
 use std::sync::Arc;
+use std::time::Duration;
 
+/// Measures how long `in_list(col("a"), exprs)` takes to evaluate against a single RecordBatch.
 fn do_bench(c: &mut Criterion, name: &str, values: ArrayRef, exprs: &[ScalarValue]) {
     let schema = Schema::new(vec![Field::new("a", values.data_type().clone(), true)]);
     let exprs = exprs.iter().map(|s| lit(s.clone())).collect();
@@ -37,78 +43,236 @@ fn do_bench(c: &mut Criterion, name: &str, values: ArrayRef, exprs: &[ScalarValu
     });
 }
 
+/// Generates a random alphanumeric string of the specified length.
 fn random_string(rng: &mut StdRng, len: usize) -> String {
     let value = rng.sample_iter(&Alphanumeric).take(len).collect();
     String::from_utf8(value).unwrap()
 }
 
-fn do_benches(
-    c: &mut Criterion,
-    array_length: usize,
-    in_list_length: usize,
-    null_percent: f64,
-) {
-    let mut rng = StdRng::seed_from_u64(120320);
-    for string_length in [5, 10, 20] {
-        let values: StringArray = (0..array_length)
-            .map(|_| {
-                rng.random_bool(null_percent)
-                    .then(|| random_string(&mut rng, string_length))
-            })
-            .collect();
-
-        let in_list: Vec<_> = (0..in_list_length)
-            .map(|_| ScalarValue::from(random_string(&mut rng, string_length)))
-            .collect();
-
-        do_bench(
-            c,
-            &format!(
-                "in_list_utf8({string_length}) ({array_length}, {null_percent}) IN ({in_list_length}, 0)"
-            ),
-            Arc::new(values),
-            &in_list,
-        )
+const IN_LIST_LENGTHS: [usize; 4] = [3, 8, 28, 100];
+const NULL_PERCENTS: [f64; 2] = [0., 0.2];
+const STRING_LENGTHS: [usize; 3] = [3, 12, 100];
+const ARRAY_LENGTH: usize = 8192;
+
+/// Mixed string lengths for realistic benchmarks.
+/// ~50% short (≤12 bytes), ~50% long (>12 bytes).
+const MIXED_STRING_LENGTHS: &[usize] = &[3, 6, 9, 12, 16, 20, 25, 30];
+
+/// Returns a friendly type name for the array type.
+fn array_type_name<A: 'static>() -> &'static str {
+    let id = TypeId::of::<A>();
+    if id == TypeId::of::<StringArray>() {
+        "Utf8"
+    } else if id == TypeId::of::<StringViewArray>() {
+        "Utf8View"
+    } else if id == TypeId::of::<Float32Array>() {
+        "Float32"
+    } else if id == TypeId::of::<Int16Array>() {
+        "Int16"
+    } else if id == TypeId::of::<Int32Array>() {
+        "Int32"
+    } else if id == TypeId::of::<TimestampNanosecondArray>() {
+        "TimestampNs"
+    } else if id == TypeId::of::<UInt8Array>() {
+        "UInt8"
+    } else {
+        "Unknown"
     }
+}
 
-    let values: Float32Array = (0..array_length)
-        .map(|_| rng.random_bool(null_percent).then(|| rng.random()))
-        .collect();
+/// Builds a benchmark name from array type, list size, and null percentage.
+fn bench_name<A: 'static>(in_list_length: usize, null_percent: f64) -> String {
+    format!(
+        "in_list/{}/list={in_list_length}/nulls={}%",
+        array_type_name::<A>(),
+        (null_percent * 100.0) as u32
+    )
+}
 
-    let in_list: Vec<_> = (0..in_list_length)
-        .map(|_| ScalarValue::Float32(Some(rng.random())))
-        .collect();
+/// Runs in_list benchmarks for a string array type across all list-size × null-ratio × string-length combinations.
+fn bench_string_type<A>(
+    c: &mut Criterion,
+    rng: &mut StdRng,
+    make_scalar: fn(String) -> ScalarValue,
+) where
+    A: Array + FromIterator<Option<String>> + 'static,
+{
+    for in_list_length in IN_LIST_LENGTHS {
+        for null_percent in NULL_PERCENTS {
+            for string_length in STRING_LENGTHS {
+                let values: A = (0..ARRAY_LENGTH)
+                    .map(|_| {
+                        rng.random_bool(1.0 - null_percent)
+                            .then(|| random_string(rng, string_length))
+                    })
+                    .collect();
 
-    do_bench(
-        c,
-        &format!("in_list_f32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)"),
-        Arc::new(values),
-        &in_list,
-    );
+                let in_list: Vec<_> = (0..in_list_length)
+                    .map(|_| make_scalar(random_string(rng, string_length)))
+                    .collect();
 
-    let values: Int32Array = (0..array_length)
-        .map(|_| rng.random_bool(null_percent).then(|| rng.random()))
-        .collect();
+                do_bench(
+                    c,
+                    &format!(
+                        "{}/str={string_length}",
+                        bench_name::<A>(in_list_length, null_percent)
+                    ),
+                    Arc::new(values),
+                    &in_list,
+                )
+            }
+        }
+    }
+}
 
-    let in_list: Vec<_> = (0..in_list_length)
-        .map(|_| ScalarValue::Int32(Some(rng.random())))
-        .collect();
+/// Runs in_list benchmarks for a numeric array type across all list-size × null-ratio combinations.
+fn bench_numeric_type<T, A>(
+    c: &mut Criterion,
+    rng: &mut StdRng,
+    mut gen_value: impl FnMut(&mut StdRng) -> T,
+    make_scalar: fn(T) -> ScalarValue,
+) where
+    A: Array + FromIterator<Option<T>> + 'static,
+{
+    for in_list_length in IN_LIST_LENGTHS {
+        for null_percent in NULL_PERCENTS {
+            let values: A = (0..ARRAY_LENGTH)
+                .map(|_| rng.random_bool(1.0 - null_percent).then(|| gen_value(rng)))
+                .collect();
 
-    do_bench(
-        c,
-        &format!("in_list_i32 ({array_length}, {null_percent}) IN ({in_list_length}, 0)"),
-        Arc::new(values),
-        &in_list,
-    )
+            let in_list: Vec<_> = (0..in_list_length)
+                .map(|_| make_scalar(gen_value(rng)))
+                .collect();
+
+            do_bench(
+                c,
+                &bench_name::<A>(in_list_length, null_percent),
+                Arc::new(values),
+                &in_list,
+            );
+        }
+    }
 }
 
-fn criterion_benchmark(c: &mut Criterion) {
-    for in_list_length in [1, 3, 10, 100] {
-        for null_percent in [0., 0.2] {
-            do_benches(c, 1024, in_list_length, null_percent)
+/// Generates a random string with a length chosen from MIXED_STRING_LENGTHS.
+fn random_mixed_length_string(rng: &mut StdRng) -> String {
+    let len = *MIXED_STRING_LENGTHS.choose(rng).unwrap();
+    random_string(rng, len)
+}
+
+/// Benchmarks realistic mixed-length IN list scenario.
+///
+/// Tests with:
+/// - Mixed short (≤12 bytes) and long (>12 bytes) strings in the IN list
+/// - Varying prefixes (fully random strings)
+/// - Configurable match rate (% of values that are in the IN list)
+/// - Various IN list sizes (3, 8, 28, 100)
+fn bench_realistic_mixed_strings<A>(
+    c: &mut Criterion,
+    rng: &mut StdRng,
+    make_scalar: fn(String) -> ScalarValue,
+) where
+    A: Array + FromIterator<Option<String>> + 'static,
+{
+    for in_list_length in IN_LIST_LENGTHS {
+        for match_percent in [0.0, 0.25, 0.75] {
+            for null_percent in NULL_PERCENTS {
+                // Generate IN list with mixed-length random strings
+                let in_list_strings: Vec<String> = (0..in_list_length)
+                    .map(|_| random_mixed_length_string(rng))
+                    .collect();
+
+                let in_list: Vec<_> = in_list_strings
+                    .iter()
+                    .map(|s| make_scalar(s.clone()))
+                    .collect();
+
+                // Generate values array with controlled match rate
+                let values: A = (0..ARRAY_LENGTH)
+                    .map(|_| {
+                        if !rng.random_bool(1.0 - null_percent) {
+                            None
+                        } else if rng.random_bool(match_percent) {
+                            // Pick from IN list (will match)
+                            Some(in_list_strings.choose(rng).unwrap().clone())
+                        } else {
+                            // Generate new random string (unlikely to match)
+                            Some(random_mixed_length_string(rng))
+                        }
+                    })
+                    .collect();
+
+                do_bench(
+                    c,
+                    &format!(
+                        "in_list/{}/mixed/list={}/match={}%/nulls={}%",
+                        array_type_name::<A>(),
+                        in_list_length,
+                        (match_percent * 100.0) as u32,
+                        (null_percent * 100.0) as u32
+                    ),
+                    Arc::new(values),
+                    &in_list,
+                );
+            }
         }
     }
 }
 
-criterion_group!(benches, criterion_benchmark);
+/// Entry point: registers in_list benchmarks for string and numeric array types.
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(120320);
+
+    // Benchmarks for string array types (Utf8, Utf8View)
+    bench_string_type::<StringArray>(c, &mut rng, |s| ScalarValue::Utf8(Some(s)));
+    bench_string_type::<StringViewArray>(c, &mut rng, |s| ScalarValue::Utf8View(Some(s)));
+
+    // Realistic mixed-length string benchmarks (TPC-H style)
+    bench_realistic_mixed_strings::<StringArray>(c, &mut rng, |s| {
+        ScalarValue::Utf8(Some(s))
+    });
+    bench_realistic_mixed_strings::<StringViewArray>(c, &mut rng, |s| {
+        ScalarValue::Utf8View(Some(s))
+    });
+
+    // Benchmarks for numeric types
+    bench_numeric_type::<u8, UInt8Array>(
+        c,
+        &mut rng,
+        |rng| rng.random(),
+        |v| ScalarValue::UInt8(Some(v)),
+    );
+    bench_numeric_type::<i16, Int16Array>(
+        c,
+        &mut rng,
+        |rng| rng.random(),
+        |v| ScalarValue::Int16(Some(v)),
+    );
+    bench_numeric_type::<f32, Float32Array>(
+        c,
+        &mut rng,
+        |rng| rng.random(),
+        |v| ScalarValue::Float32(Some(v)),
+    );
+    bench_numeric_type::<i32, Int32Array>(
+        c,
+        &mut rng,
+        |rng| rng.random(),
+        |v| ScalarValue::Int32(Some(v)),
+    );
+    bench_numeric_type::<i64, TimestampNanosecondArray>(
+        c,
+        &mut rng,
+        |rng| rng.random(),
+        |v| ScalarValue::TimestampNanosecond(Some(v), None),
+    );
+}
+
+criterion_group! {
+    name = benches;
+    config = Criterion::default()
+        .warm_up_time(Duration::from_millis(100))
+        .measurement_time(Duration::from_millis(500));
+    targets = criterion_benchmark
+}
 criterion_main!(benches);
diff --git a/datafusion/physical-expr/benches/is_null.rs b/datafusion/physical-expr/benches/is_null.rs
index 80b2907a9e989..0637ade1b3eec 100644
--- a/datafusion/physical-expr/benches/is_null.rs
+++ b/datafusion/physical-expr/benches/is_null.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{builder::Int32Builder, RecordBatch};
+use arrow::array::{RecordBatch, builder::Int32Builder};
 use arrow::datatypes::{DataType, Field, Schema};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_physical_expr::expressions::{Column, IsNotNullExpr, IsNullExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::hint::black_box;
diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
index 2a8467eb88327..d031ad7d85750 100644
--- a/datafusion/physical-expr/src/aggregate.rs
+++ b/datafusion/physical-expr/src/aggregate.rs
@@ -16,12 +16,12 @@
 // under the License.
 
 pub(crate) mod groups_accumulator {
-    #[allow(unused_imports)]
+    #[expect(unused_imports)]
     pub(crate) mod accumulate {
         pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
     }
     pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{
-        accumulate::NullState, GroupsAccumulatorAdapter,
+        GroupsAccumulatorAdapter, accumulate::NullState,
     };
 }
 pub(crate) mod stats {
@@ -29,8 +29,8 @@ pub(crate) mod stats {
 }
 pub mod utils {
     pub use datafusion_functions_aggregate_common::utils::{
-        get_accum_scalar_values_as_arrays, get_sort_options, ordering_fields,
-        DecimalAverager, Hashable,
+        DecimalAverager, Hashable, get_accum_scalar_values_as_arrays, get_sort_options,
+        ordering_fields,
     };
 }
 
@@ -41,7 +41,9 @@ use crate::expressions::Column;
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, FieldRef, Schema, SchemaRef};
-use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{
+    Result, ScalarValue, assert_or_internal_err, internal_err, not_impl_err,
+};
 use datafusion_expr::{AggregateUDF, ReversedUDAF, SetMonotonicity};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
@@ -198,9 +200,7 @@ impl AggregateExprBuilder {
             is_distinct,
             is_reversed,
         } = self;
-        if args.is_empty() {
-            return internal_err!("args should not be empty");
-        }
+        assert_or_internal_err!(!args.is_empty(), "args should not be empty");
 
         let ordering_types = order_bys
             .iter()
@@ -226,7 +226,7 @@ impl AggregateExprBuilder {
             None => {
                 return internal_err!(
                     "AggregateExprBuilder::alias must be provided prior to calling build"
-                )
+                );
             }
             Some(alias) => alias,
         };
@@ -739,18 +739,18 @@ fn replace_order_by_clause(order_by: &mut String) {
         (" ASC NULLS LAST]", " DESC NULLS FIRST]"),
     ];
 
-    if let Some(start) = order_by.find("ORDER BY [") {
-        if let Some(end) = order_by[start..].find(']') {
-            let order_by_start = start + 9;
-            let order_by_end = start + end;
-
-            let column_order = &order_by[order_by_start..=order_by_end];
-            for (suffix, replacement) in suffixes {
-                if column_order.ends_with(suffix) {
-                    let new_order = column_order.replace(suffix, replacement);
-                    order_by.replace_range(order_by_start..=order_by_end, &new_order);
-                    break;
-                }
+    if let Some(start) = order_by.find("ORDER BY [")
+        && let Some(end) = order_by[start..].find(']')
+    {
+        let order_by_start = start + 9;
+        let order_by_end = start + end;
+
+        let column_order = &order_by[order_by_start..=order_by_end];
+        for (suffix, replacement) in suffixes {
+            if column_order.ends_with(suffix) {
+                let new_order = column_order.replace(suffix, replacement);
+                order_by.replace_range(order_by_start..=order_by_end, &new_order);
+                break;
             }
         }
     }
diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs
index 1d59dab8fd6dd..d734c86726f1d 100644
--- a/datafusion/physical-expr/src/analysis.rs
+++ b/datafusion/physical-expr/src/analysis.rs
@@ -20,17 +20,18 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
+use crate::PhysicalExpr;
 use crate::expressions::Column;
 use crate::intervals::cp_solver::{ExprIntervalGraph, PropagationResult};
 use crate::utils::collect_columns;
-use crate::PhysicalExpr;
 
 use arrow::datatypes::Schema;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, ColumnStatistics, Result, ScalarValue,
+    ColumnStatistics, Result, ScalarValue, assert_or_internal_err,
+    internal_datafusion_err, internal_err,
 };
-use datafusion_expr::interval_arithmetic::{cardinality_ratio, Interval};
+use datafusion_expr::interval_arithmetic::{Interval, cardinality_ratio};
 
 /// The shared context used during the analysis of an expression. Includes
 /// the boundaries for all known columns.
@@ -170,27 +171,24 @@ pub fn analyze(
         .iter()
         .all(|bound| bound.interval.is_none())
     {
-        if initial_boundaries
-            .iter()
-            .any(|bound| bound.distinct_count != Precision::Exact(0))
-        {
-            return internal_err!(
-                "ExprBoundaries has a non-zero distinct count although it represents an empty table"
-            );
-        }
-        if context.selectivity != Some(0.0) {
-            return internal_err!(
-                "AnalysisContext has a non-zero selectivity although it represents an empty table"
-            );
-        }
+        assert_or_internal_err!(
+            !initial_boundaries
+                .iter()
+                .any(|bound| bound.distinct_count != Precision::Exact(0)),
+            "ExprBoundaries has a non-zero distinct count although it represents an empty table"
+        );
+        assert_or_internal_err!(
+            context.selectivity == Some(0.0),
+            "AnalysisContext has a non-zero selectivity although it represents an empty table"
+        );
         Ok(context)
     } else if initial_boundaries
         .iter()
         .any(|bound| bound.interval.is_none())
     {
         internal_err!(
-                "AnalysisContext is an inconsistent state. Some columns represent empty table while others don't"
-            )
+            "AnalysisContext is an inconsistent state. Some columns represent empty table while others don't"
+        )
     } else {
         let mut target_boundaries = context.boundaries;
         let mut graph = ExprIntervalGraph::try_new(Arc::clone(expr), schema)?;
@@ -203,22 +201,19 @@ pub fn analyze(
         let target_expr_and_indices = graph.gather_node_indices(columns.as_slice());
 
         for (expr, index) in &target_expr_and_indices {
-            if let Some(column) = expr.as_any().downcast_ref::<Column>() {
-                if let Some(bound) =
+            if let Some(column) = expr.as_any().downcast_ref::<Column>()
+                && let Some(bound) =
                     target_boundaries.iter().find(|b| b.column == *column)
-                {
-                    // Now, it's safe to unwrap
-                    target_indices_and_boundaries
-                        .push((*index, bound.interval.as_ref().unwrap().clone()));
-                }
+            {
+                // Now, it's safe to unwrap
+                target_indices_and_boundaries
+                    .push((*index, bound.interval.as_ref().unwrap().clone()));
             }
         }
 
-        match graph
-            .update_ranges(&mut target_indices_and_boundaries, Interval::CERTAINLY_TRUE)?
-        {
+        match graph.update_ranges(&mut target_indices_and_boundaries, Interval::TRUE)? {
             PropagationResult::Success => {
-                shrink_boundaries(graph, target_boundaries, target_expr_and_indices)
+                shrink_boundaries(&graph, target_boundaries, &target_expr_and_indices)
             }
             PropagationResult::Infeasible => {
                 // If the propagation result is infeasible, set intervals to None
@@ -239,27 +234,27 @@ pub fn analyze(
 /// Following this, it constructs and returns a new `AnalysisContext` with the
 /// updated parameters.
 fn shrink_boundaries(
-    graph: ExprIntervalGraph,
+    graph: &ExprIntervalGraph,
     mut target_boundaries: Vec<ExprBoundaries>,
-    target_expr_and_indices: Vec<(Arc<dyn PhysicalExpr>, usize)>,
+    target_expr_and_indices: &[(Arc<dyn PhysicalExpr>, usize)],
 ) -> Result<AnalysisContext> {
     let initial_boundaries = target_boundaries.clone();
     target_expr_and_indices.iter().for_each(|(expr, i)| {
-        if let Some(column) = expr.as_any().downcast_ref::<Column>() {
-            if let Some(bound) = target_boundaries
+        if let Some(column) = expr.as_any().downcast_ref::<Column>()
+            && let Some(bound) = target_boundaries
                 .iter_mut()
                 .find(|bound| bound.column.eq(column))
-            {
-                bound.interval = Some(graph.get_interval(*i));
-            };
-        }
+        {
+            bound.interval = Some(graph.get_interval(*i));
+        };
     });
 
     let selectivity = calculate_selectivity(&target_boundaries, &initial_boundaries)?;
 
-    if !(0.0..=1.0).contains(&selectivity) {
-        return internal_err!("Selectivity is out of limit: {}", selectivity);
-    }
+    assert_or_internal_err!(
+        (0.0..=1.0).contains(&selectivity),
+        "Selectivity is out of limit: {selectivity}",
+    );
 
     Ok(AnalysisContext::new(target_boundaries).with_selectivity(selectivity))
 }
@@ -287,8 +282,8 @@ fn calculate_selectivity(
             }
             (None, Some(_)) => {
                 return internal_err!(
-                "Initial boundary cannot be None while having a Some() target boundary"
-            );
+                    "Initial boundary cannot be None while having a Some() target boundary"
+                );
             }
             _ => return Ok(0.0),
         }
@@ -302,14 +297,14 @@ mod tests {
     use std::sync::Arc;
 
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::{assert_contains, DFSchema};
+    use datafusion_common::{DFSchema, assert_contains};
     use datafusion_expr::{
-        col, execution_props::ExecutionProps, interval_arithmetic::Interval, lit, Expr,
+        Expr, col, execution_props::ExecutionProps, interval_arithmetic::Interval, lit,
     };
 
-    use crate::{create_physical_expr, AnalysisContext};
+    use crate::{AnalysisContext, create_physical_expr};
 
-    use super::{analyze, ExprBoundaries};
+    use super::{ExprBoundaries, analyze};
 
     fn make_field(name: &str, data_type: DataType) -> Field {
         let nullable = false;
@@ -376,7 +371,9 @@ mod tests {
             )
             .unwrap();
             let Some(actual) = &analysis_result.boundaries[0].interval else {
-                panic!("The analysis result should contain non-empty intervals for all columns");
+                panic!(
+                    "The analysis result should contain non-empty intervals for all columns"
+                );
             };
             let expected = Interval::make(lower, upper).unwrap();
             assert_eq!(
diff --git a/datafusion/physical-expr/src/async_scalar_function.rs b/datafusion/physical-expr/src/async_scalar_function.rs
index b434694a20cc8..2f50a17afc39a 100644
--- a/datafusion/physical-expr/src/async_scalar_function.rs
+++ b/datafusion/physical-expr/src/async_scalar_function.rs
@@ -16,13 +16,14 @@
 // under the License.
 
 use crate::ScalarFunctionExpr;
-use arrow::array::{make_array, MutableArrayData, RecordBatch};
+use arrow::array::RecordBatch;
+use arrow::compute::concat;
 use arrow::datatypes::{DataType, Field, FieldRef, Schema};
-use datafusion_common::config::ConfigOptions;
 use datafusion_common::Result;
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::{internal_err, not_impl_err};
-use datafusion_expr::async_udf::AsyncScalarUDF;
 use datafusion_expr::ScalarFunctionArgs;
+use datafusion_expr::async_udf::AsyncScalarUDF;
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::any::Any;
@@ -98,12 +99,11 @@ impl AsyncFuncExpr {
 
     /// Return the ideal batch size for this function
     pub fn ideal_batch_size(&self) -> Result<Option<usize>> {
-        if let Some(expr) = self.func.as_any().downcast_ref::<ScalarFunctionExpr>() {
-            if let Some(udf) =
+        if let Some(expr) = self.func.as_any().downcast_ref::<ScalarFunctionExpr>()
+            && let Some(udf) =
                 expr.fun().inner().as_any().downcast_ref::<AsyncScalarUDF>()
-            {
-                return Ok(udf.ideal_batch_size());
-            }
+        {
+            return Ok(udf.ideal_batch_size());
         }
         not_impl_err!("Can't get ideal_batch_size from {:?}", self.func)
     }
@@ -192,17 +192,21 @@ impl AsyncFuncExpr {
             );
         }
 
-        let datas = ColumnarValue::values_to_arrays(&result_batches)?
+        let datas = result_batches
+            .into_iter()
+            .map(|cv| match cv {
+                ColumnarValue::Array(arr) => Ok(arr),
+                ColumnarValue::Scalar(scalar) => Ok(scalar.to_array_of_size(1)?),
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        // Get references to the arrays as dyn Array to call concat
+        let dyn_arrays = datas
             .iter()
-            .map(|b| b.to_data())
+            .map(|arr| arr as &dyn arrow::array::Array)
             .collect::<Vec<_>>();
-        let total_len = datas.iter().map(|d| d.len()).sum();
-        let mut mutable = MutableArrayData::new(datas.iter().collect(), false, total_len);
-        datas.iter().enumerate().for_each(|(i, data)| {
-            mutable.extend(i, 0, data.len());
-        });
-        let array_ref = make_array(mutable.freeze());
-        Ok(ColumnarValue::Array(array_ref))
+        let result_array = concat(&dyn_arrays)?;
+        Ok(ColumnarValue::Array(result_array))
     }
 }
 
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index 5b64884f65bb8..91d339910b589 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -737,10 +737,10 @@ impl EquivalenceGroup {
         if let Some(lit) = expr.as_any().downcast_ref::<Literal>() {
             return Some(AcrossPartitions::Uniform(Some(lit.value().clone())));
         }
-        if let Some(cls) = self.get_equivalence_class(expr) {
-            if cls.constant.is_some() {
-                return cls.constant.clone();
-            }
+        if let Some(cls) = self.get_equivalence_class(expr)
+            && cls.constant.is_some()
+        {
+            return cls.constant.clone();
         }
         // TODO: This function should be able to return values of non-literal
         //       complex constants as well; e.g. it should return `8` for the
@@ -819,15 +819,15 @@ impl EquivalenceGroup {
 
         // Check if expressions are equivalent through equivalence classes
         // We need to check both directions since expressions might be in different classes
-        if let Some(left_class) = self.get_equivalence_class(left) {
-            if left_class.contains(right) {
-                return true;
-            }
+        if let Some(left_class) = self.get_equivalence_class(left)
+            && left_class.contains(right)
+        {
+            return true;
         }
-        if let Some(right_class) = self.get_equivalence_class(right) {
-            if right_class.contains(left) {
-                return true;
-            }
+        if let Some(right_class) = self.get_equivalence_class(right)
+            && right_class.contains(left)
+        {
+            return true;
         }
 
         // For non-leaf nodes, check structural equality
@@ -910,7 +910,7 @@ impl From<Vec<EquivalenceClass>> for EquivalenceGroup {
 mod tests {
     use super::*;
     use crate::equivalence::tests::create_test_params;
-    use crate::expressions::{binary, col, lit, BinaryExpr, Column, Literal};
+    use crate::expressions::{BinaryExpr, Column, Literal, binary, col, lit};
     use arrow::datatypes::{DataType, Field, Schema};
 
     use datafusion_common::{Result, ScalarValue};
@@ -1082,8 +1082,7 @@ mod tests {
                 left: Arc::clone(&col_a),
                 right: Arc::clone(&col_b),
                 expected: false,
-                description:
-                    "Columns in different equivalence classes should not be equal",
+                description: "Columns in different equivalence classes should not be equal",
             },
             // Literal tests
             TestCase {
@@ -1111,8 +1110,7 @@ mod tests {
                     Arc::clone(&col_y),
                 )) as _,
                 expected: true,
-                description:
-                    "Binary expressions with equivalent operands should be equal",
+                description: "Binary expressions with equivalent operands should be equal",
             },
             TestCase {
                 left: Arc::new(BinaryExpr::new(
@@ -1126,8 +1124,7 @@ mod tests {
                     Arc::clone(&col_a),
                 )) as _,
                 expected: false,
-                description:
-                    "Binary expressions with non-equivalent operands should not be equal",
+                description: "Binary expressions with non-equivalent operands should not be equal",
             },
             TestCase {
                 left: Arc::new(BinaryExpr::new(
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index a7289103806b8..0d6699c7101fe 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -31,9 +31,9 @@ pub use class::{AcrossPartitions, ConstExpr, EquivalenceClass, EquivalenceGroup}
 pub use ordering::OrderingEquivalenceClass;
 // Re-export for backwards compatibility, we recommend importing from
 // datafusion_physical_expr::projection instead
-pub use crate::projection::{project_ordering, project_orderings, ProjectionMapping};
+pub use crate::projection::{ProjectionMapping, project_ordering, project_orderings};
 pub use properties::{
-    calculate_union, join_equivalence_properties, EquivalenceProperties,
+    EquivalenceProperties, calculate_union, join_equivalence_properties,
 };
 
 // Convert each tuple to a `PhysicalSortExpr` and construct a vector.
@@ -57,7 +57,7 @@ pub fn convert_to_orderings<T: Borrow<Arc<dyn PhysicalExpr>>>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{col, Column};
+    use crate::expressions::{Column, col};
     use crate::{LexRequirement, PhysicalSortExpr};
 
     use arrow::compute::SortOptions;
diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs
index aa65c4a80ae9a..2ce8a8d246fe7 100644
--- a/datafusion/physical-expr/src/equivalence/ordering.rs
+++ b/datafusion/physical-expr/src/equivalence/ordering.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use std::vec::IntoIter;
 
 use crate::expressions::with_new_schema;
-use crate::{add_offset_to_physical_sort_exprs, LexOrdering, PhysicalExpr};
+use crate::{LexOrdering, PhysicalExpr, add_offset_to_physical_sort_exprs};
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::SchemaRef;
@@ -326,10 +326,10 @@ mod tests {
 
     use crate::equivalence::tests::create_test_schema;
     use crate::equivalence::{
-        convert_to_orderings, convert_to_sort_exprs, EquivalenceClass, EquivalenceGroup,
-        EquivalenceProperties, OrderingEquivalenceClass,
+        EquivalenceClass, EquivalenceGroup, EquivalenceProperties,
+        OrderingEquivalenceClass, convert_to_orderings, convert_to_sort_exprs,
     };
-    use crate::expressions::{col, BinaryExpr, Column};
+    use crate::expressions::{BinaryExpr, Column, col};
     use crate::utils::tests::TestScalarUDF;
     use crate::{
         AcrossPartitions, ConstExpr, PhysicalExpr, PhysicalExprRef, PhysicalSortExpr,
@@ -338,8 +338,8 @@ mod tests {
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::Result;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{Operator, ScalarUDF};
 
     #[test]
@@ -639,8 +639,9 @@ mod tests {
         ];
 
         for (orderings, eq_group, constants, reqs, expected) in test_cases {
-            let err_msg =
-                format!("error in test orderings: {orderings:?}, eq_group: {eq_group:?}, constants: {constants:?}, reqs: {reqs:?}, expected: {expected:?}");
+            let err_msg = format!(
+                "error in test orderings: {orderings:?}, eq_group: {eq_group:?}, constants: {constants:?}, reqs: {reqs:?}, expected: {expected:?}"
+            );
             let mut eq_properties = EquivalenceProperties::new(Arc::clone(&test_schema));
             let orderings = convert_to_orderings(&orderings);
             eq_properties.add_orderings(orderings);
diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
index 8945d18be430f..edbf7033f4e7a 100644
--- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
@@ -389,8 +389,8 @@ mod tests {
     use crate::equivalence::tests::{
         convert_to_sort_reqs, create_test_params, create_test_schema, parse_sort_expr,
     };
-    use crate::equivalence::{convert_to_sort_exprs, ProjectionMapping};
-    use crate::expressions::{col, BinaryExpr, CastExpr, Column};
+    use crate::equivalence::{ProjectionMapping, convert_to_sort_exprs};
+    use crate::expressions::{BinaryExpr, CastExpr, Column, col};
     use crate::projection::tests::output_schema;
     use crate::{ConstExpr, EquivalenceProperties, ScalarFunctionExpr};
 
@@ -398,8 +398,8 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use datafusion_common::config::ConfigOptions;
     use datafusion_common::{Constraint, Constraints, Result};
-    use datafusion_expr::sort_properties::SortProperties;
     use datafusion_expr::Operator;
+    use datafusion_expr::sort_properties::SortProperties;
     use datafusion_functions::string::concat;
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_expr_common::sort_expr::{
diff --git a/datafusion/physical-expr/src/equivalence/properties/joins.rs b/datafusion/physical-expr/src/equivalence/properties/joins.rs
index 485b11d586397..aa3e1c5675d53 100644
--- a/datafusion/physical-expr/src/equivalence/properties/joins.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/joins.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use super::EquivalenceProperties;
-use crate::{equivalence::OrderingEquivalenceClass, PhysicalExprRef};
+use crate::{PhysicalExprRef, equivalence::OrderingEquivalenceClass};
 
 use arrow::datatypes::SchemaRef;
 use datafusion_common::{JoinSide, JoinType, Result};
diff --git a/datafusion/physical-expr/src/equivalence/properties/mod.rs b/datafusion/physical-expr/src/equivalence/properties/mod.rs
index 4d919d623bf9b..70f97139f8af4 100644
--- a/datafusion/physical-expr/src/equivalence/properties/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/mod.rs
@@ -27,13 +27,13 @@ use std::mem;
 use std::sync::Arc;
 
 use self::dependency::{
-    construct_prefix_orderings, generate_dependency_orderings, referred_dependencies,
-    Dependencies, DependencyMap,
+    Dependencies, DependencyMap, construct_prefix_orderings,
+    generate_dependency_orderings, referred_dependencies,
 };
 use crate::equivalence::{
     AcrossPartitions, EquivalenceGroup, OrderingEquivalenceClass, ProjectionMapping,
 };
-use crate::expressions::{with_new_schema, CastExpr, Column, Literal};
+use crate::expressions::{CastExpr, Column, Literal, with_new_schema};
 use crate::{
     ConstExpr, LexOrdering, LexRequirement, PhysicalExpr, PhysicalSortExpr,
     PhysicalSortRequirement,
@@ -41,7 +41,7 @@ use crate::{
 
 use arrow::datatypes::SchemaRef;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{plan_err, Constraint, Constraints, HashMap, Result};
+use datafusion_common::{Constraint, Constraints, HashMap, Result, plan_err};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_physical_expr_common::sort_expr::options_compatible;
@@ -380,7 +380,7 @@ impl EquivalenceProperties {
         right: Arc<dyn PhysicalExpr>,
     ) -> Result<()> {
         // Add equal expressions to the state:
-        if self.eq_group.add_equal_conditions(Arc::clone(&left), right) {
+        if self.eq_group.add_equal_conditions(left, right) {
             self.update_oeq_cache()?;
         }
         self.update_oeq_cache()?;
diff --git a/datafusion/physical-expr/src/equivalence/properties/union.rs b/datafusion/physical-expr/src/equivalence/properties/union.rs
index efbefd0d39bfb..d77129472a8ba 100644
--- a/datafusion/physical-expr/src/equivalence/properties/union.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/union.rs
@@ -23,7 +23,7 @@ use crate::equivalence::class::AcrossPartitions;
 use crate::{ConstExpr, PhysicalSortExpr};
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 /// Computes the union (in the sense of `UnionExec`) `EquivalenceProperties`
@@ -307,9 +307,9 @@ fn advance_if_matches_constant<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::PhysicalExpr;
     use crate::equivalence::tests::{create_test_schema, parse_sort_expr};
     use crate::expressions::col;
-    use crate::PhysicalExpr;
 
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::ScalarValue;
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index ce3d4ced4e3a2..8df09c22bbd8d 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -17,32 +17,29 @@
 
 mod kernels;
 
-use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison};
 use crate::PhysicalExpr;
+use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison};
 use std::hash::Hash;
 use std::{any::Any, sync::Arc};
 
 use arrow::array::*;
 use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
-use arrow::compute::kernels::cmp::*;
 use arrow::compute::kernels::concat_elements::concat_elements_utf8;
-use arrow::compute::{
-    cast, filter_record_batch, ilike, like, nilike, nlike, SlicesIterator,
-};
+use arrow::compute::{SlicesIterator, cast, filter_record_batch};
 use arrow::datatypes::*;
 use arrow::error::ArrowError;
 use datafusion_common::cast::as_boolean_array;
-use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err, not_impl_err};
 use datafusion_expr::binary::BinaryTypeCoercer;
-use datafusion_expr::interval_arithmetic::{apply_operator, Interval};
+use datafusion_expr::interval_arithmetic::{Interval, apply_operator};
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::statistics::Distribution::{Bernoulli, Gaussian};
 use datafusion_expr::statistics::{
-    combine_bernoullis, combine_gaussians, create_bernoulli_from_comparison,
-    new_generic_from_binary_op, Distribution,
+    Distribution, combine_bernoullis, combine_gaussians,
+    create_bernoulli_from_comparison, new_generic_from_binary_op,
 };
 use datafusion_expr::{ColumnarValue, Operator};
-use datafusion_physical_expr_common::datum::{apply, apply_cmp, apply_cmp_for_nested};
+use datafusion_physical_expr_common::datum::{apply, apply_cmp};
 
 use kernels::{
     bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
@@ -251,13 +248,6 @@ impl PhysicalExpr for BinaryExpr {
         let schema = batch.schema();
         let input_schema = schema.as_ref();
 
-        if left_data_type.is_nested() {
-            if !left_data_type.equals_datatype(&right_data_type) {
-                return internal_err!("Cannot evaluate binary expression because of type mismatch: left {}, right {} ", left_data_type, right_data_type);
-            }
-            return apply_cmp_for_nested(self.op, &lhs, &rhs);
-        }
-
         match self.op {
             Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
             Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
@@ -267,36 +257,35 @@ impl PhysicalExpr for BinaryExpr {
             Operator::Multiply => return apply(&lhs, &rhs, mul_wrapping),
             Operator::Divide => return apply(&lhs, &rhs, div),
             Operator::Modulo => return apply(&lhs, &rhs, rem),
-            Operator::Eq => return apply_cmp(&lhs, &rhs, eq),
-            Operator::NotEq => return apply_cmp(&lhs, &rhs, neq),
-            Operator::Lt => return apply_cmp(&lhs, &rhs, lt),
-            Operator::Gt => return apply_cmp(&lhs, &rhs, gt),
-            Operator::LtEq => return apply_cmp(&lhs, &rhs, lt_eq),
-            Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
-            Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
-            Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs, not_distinct),
-            Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
-            Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
-            Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
-            Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
+
+            Operator::Eq
+            | Operator::NotEq
+            | Operator::Lt
+            | Operator::Gt
+            | Operator::LtEq
+            | Operator::GtEq
+            | Operator::IsDistinctFrom
+            | Operator::IsNotDistinctFrom
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch => {
+                return apply_cmp(self.op, &lhs, &rhs);
+            }
             _ => {}
         }
 
         let result_type = self.data_type(input_schema)?;
 
         // If the left-hand side is an array and the right-hand side is a non-null scalar, try the optimized kernel.
-        if let (ColumnarValue::Array(array), ColumnarValue::Scalar(ref scalar)) =
-            (&lhs, &rhs)
+        if let (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) = (&lhs, &rhs)
+            && !scalar.is_null()
+            && let Some(result_array) =
+                self.evaluate_array_scalar(array, scalar.clone())?
         {
-            if !scalar.is_null() {
-                if let Some(result_array) =
-                    self.evaluate_array_scalar(array, scalar.clone())?
-                {
-                    let final_array = result_array
-                        .and_then(|a| to_result_type_array(&self.op, a, &result_type));
-                    return final_array.map(ColumnarValue::Array);
-                }
-            }
+            let final_array = result_array
+                .and_then(|a| to_result_type_array(&self.op, a, &result_type));
+            return final_array.map(ColumnarValue::Array);
         }
 
         // if both arrays or both literals - extract arrays and continue execution
@@ -340,33 +329,27 @@ impl PhysicalExpr for BinaryExpr {
         let right_interval = children[1];
 
         if self.op.eq(&Operator::And) {
-            if interval.eq(&Interval::CERTAINLY_TRUE) {
+            if interval.eq(&Interval::TRUE) {
                 // A certainly true logical conjunction can only derive from possibly
                 // true operands. Otherwise, we prove infeasibility.
-                Ok((!left_interval.eq(&Interval::CERTAINLY_FALSE)
-                    && !right_interval.eq(&Interval::CERTAINLY_FALSE))
-                .then(|| vec![Interval::CERTAINLY_TRUE, Interval::CERTAINLY_TRUE]))
-            } else if interval.eq(&Interval::CERTAINLY_FALSE) {
+                Ok((!left_interval.eq(&Interval::FALSE)
+                    && !right_interval.eq(&Interval::FALSE))
+                .then(|| vec![Interval::TRUE, Interval::TRUE]))
+            } else if interval.eq(&Interval::FALSE) {
                 // If the logical conjunction is certainly false, one of the
                 // operands must be false. However, it's not always possible to
                 // determine which operand is false, leading to different scenarios.
 
                 // If one operand is certainly true and the other one is uncertain,
                 // then the latter must be certainly false.
-                if left_interval.eq(&Interval::CERTAINLY_TRUE)
-                    && right_interval.eq(&Interval::UNCERTAIN)
+                if left_interval.eq(&Interval::TRUE)
+                    && right_interval.eq(&Interval::TRUE_OR_FALSE)
                 {
-                    Ok(Some(vec![
-                        Interval::CERTAINLY_TRUE,
-                        Interval::CERTAINLY_FALSE,
-                    ]))
-                } else if right_interval.eq(&Interval::CERTAINLY_TRUE)
-                    && left_interval.eq(&Interval::UNCERTAIN)
+                    Ok(Some(vec![Interval::TRUE, Interval::FALSE]))
+                } else if right_interval.eq(&Interval::TRUE)
+                    && left_interval.eq(&Interval::TRUE_OR_FALSE)
                 {
-                    Ok(Some(vec![
-                        Interval::CERTAINLY_FALSE,
-                        Interval::CERTAINLY_TRUE,
-                    ]))
+                    Ok(Some(vec![Interval::FALSE, Interval::TRUE]))
                 }
                 // If both children are uncertain, or if one is certainly false,
                 // we cannot conclusively refine their intervals. In this case,
@@ -380,33 +363,27 @@ impl PhysicalExpr for BinaryExpr {
                 Ok(Some(vec![]))
             }
         } else if self.op.eq(&Operator::Or) {
-            if interval.eq(&Interval::CERTAINLY_FALSE) {
+            if interval.eq(&Interval::FALSE) {
                 // A certainly false logical disjunction can only derive from certainly
                 // false operands. Otherwise, we prove infeasibility.
-                Ok((!left_interval.eq(&Interval::CERTAINLY_TRUE)
-                    && !right_interval.eq(&Interval::CERTAINLY_TRUE))
-                .then(|| vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_FALSE]))
-            } else if interval.eq(&Interval::CERTAINLY_TRUE) {
+                Ok((!left_interval.eq(&Interval::TRUE)
+                    && !right_interval.eq(&Interval::TRUE))
+                .then(|| vec![Interval::FALSE, Interval::FALSE]))
+            } else if interval.eq(&Interval::TRUE) {
                 // If the logical disjunction is certainly true, one of the
                 // operands must be true. However, it's not always possible to
                 // determine which operand is true, leading to different scenarios.
 
                 // If one operand is certainly false and the other one is uncertain,
                 // then the latter must be certainly true.
-                if left_interval.eq(&Interval::CERTAINLY_FALSE)
-                    && right_interval.eq(&Interval::UNCERTAIN)
+                if left_interval.eq(&Interval::FALSE)
+                    && right_interval.eq(&Interval::TRUE_OR_FALSE)
                 {
-                    Ok(Some(vec![
-                        Interval::CERTAINLY_FALSE,
-                        Interval::CERTAINLY_TRUE,
-                    ]))
-                } else if right_interval.eq(&Interval::CERTAINLY_FALSE)
-                    && left_interval.eq(&Interval::UNCERTAIN)
+                    Ok(Some(vec![Interval::FALSE, Interval::TRUE]))
+                } else if right_interval.eq(&Interval::FALSE)
+                    && left_interval.eq(&Interval::TRUE_OR_FALSE)
                 {
-                    Ok(Some(vec![
-                        Interval::CERTAINLY_TRUE,
-                        Interval::CERTAINLY_FALSE,
-                    ]))
+                    Ok(Some(vec![Interval::TRUE, Interval::FALSE]))
                 }
                 // If both children are uncertain, or if one is certainly true,
                 // we cannot conclusively refine their intervals. In this case,
@@ -439,10 +416,10 @@ impl PhysicalExpr for BinaryExpr {
             // We might be able to construct the output statistics more accurately,
             // without falling back to an unknown distribution, if we are dealing
             // with Gaussian distributions and numerical operations.
-            if let (Gaussian(left), Gaussian(right)) = (left, right) {
-                if let Some(result) = combine_gaussians(&self.op, left, right)? {
-                    return Ok(Gaussian(result));
-                }
+            if let (Gaussian(left), Gaussian(right)) = (left, right)
+                && let Some(result) = combine_gaussians(&self.op, left, right)?
+            {
+                return Ok(Gaussian(result));
             }
         } else if self.op.is_logic_operator() {
             // If we are dealing with logical operators, we expect (and can only
@@ -559,8 +536,8 @@ fn to_result_type_array(
                     Ok(cast(&array, result_type)?)
                 } else {
                     internal_err!(
-                            "Incompatible Dictionary value type {value_type} with result type {result_type} of Binary operator {op:?}"
-                        )
+                        "Incompatible Dictionary value type {value_type} with result type {result_type} of Binary operator {op:?}"
+                    )
                 }
             }
             _ => Ok(array),
@@ -580,10 +557,10 @@ impl BinaryExpr {
     ) -> Result<Option<Result<ArrayRef>>> {
         use Operator::*;
         let scalar_result = match &self.op {
-            RegexMatch => regex_match_dyn_scalar(array, scalar, false, false),
-            RegexIMatch => regex_match_dyn_scalar(array, scalar, false, true),
-            RegexNotMatch => regex_match_dyn_scalar(array, scalar, true, false),
-            RegexNotIMatch => regex_match_dyn_scalar(array, scalar, true, true),
+            RegexMatch => regex_match_dyn_scalar(array, &scalar, false, false),
+            RegexIMatch => regex_match_dyn_scalar(array, &scalar, false, true),
+            RegexNotMatch => regex_match_dyn_scalar(array, &scalar, true, false),
+            RegexNotIMatch => regex_match_dyn_scalar(array, &scalar, true, true),
             BitwiseAnd => bitwise_and_dyn_scalar(array, scalar),
             BitwiseOr => bitwise_or_dyn_scalar(array, scalar),
             BitwiseXor => bitwise_xor_dyn_scalar(array, scalar),
@@ -632,16 +609,16 @@ impl BinaryExpr {
                     )
                 }
             }
-            RegexMatch => regex_match_dyn(left, right, false, false),
-            RegexIMatch => regex_match_dyn(left, right, false, true),
-            RegexNotMatch => regex_match_dyn(left, right, true, false),
-            RegexNotIMatch => regex_match_dyn(left, right, true, true),
+            RegexMatch => regex_match_dyn(&left, &right, false, false),
+            RegexIMatch => regex_match_dyn(&left, &right, false, true),
+            RegexNotMatch => regex_match_dyn(&left, &right, true, false),
+            RegexNotIMatch => regex_match_dyn(&left, &right, true, true),
             BitwiseAnd => bitwise_and_dyn(left, right),
             BitwiseOr => bitwise_or_dyn(left, right),
             BitwiseXor => bitwise_xor_dyn(left, right),
             BitwiseShiftRight => bitwise_shift_right_dyn(left, right),
             BitwiseShiftLeft => bitwise_shift_left_dyn(left, right),
-            StringConcat => concat_elements(left, right),
+            StringConcat => concat_elements(&left, &right),
             AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt
             | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe
             | IntegerDivide => {
@@ -861,7 +838,7 @@ fn pre_selection_scatter(
     Ok(ColumnarValue::Array(Arc::new(boolean_result)))
 }
 
-fn concat_elements(left: Arc<dyn Array>, right: Arc<dyn Array>) -> Result<ArrayRef> {
+fn concat_elements(left: &ArrayRef, right: &ArrayRef) -> Result<ArrayRef> {
     Ok(match left.data_type() {
         DataType::Utf8 => Arc::new(concat_elements_utf8(
             left.as_string::<i32>(),
@@ -914,7 +891,7 @@ pub fn similar_to(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{col, lit, try_cast, Column, Literal};
+    use crate::expressions::{Column, Literal, col, lit, try_cast};
     use datafusion_expr::lit as expr_lit;
 
     use datafusion_common::plan_datafusion_err;
@@ -1037,7 +1014,8 @@ mod tests {
             ]);
             let a = $A_ARRAY::from($A_VEC);
             let b = $B_ARRAY::from($B_VEC);
-            let (lhs, rhs) = BinaryTypeCoercer::new(&$A_TYPE, &$OP, &$B_TYPE).get_input_types()?;
+            let (lhs, rhs) =
+                BinaryTypeCoercer::new(&$A_TYPE, &$OP, &$B_TYPE).get_input_types()?;
 
             let left = try_cast(col("a", &schema)?, &schema, lhs)?;
             let right = try_cast(col("b", &schema)?, &schema, rhs)?;
@@ -1053,7 +1031,10 @@ mod tests {
             assert_eq!(expression.data_type(&schema)?, $C_TYPE);
 
             // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows()).expect("Failed to convert to array");
+            let result = expression
+                .evaluate(&batch)?
+                .into_array(batch.num_rows())
+                .expect("Failed to convert to array");
 
             // verify that the array's data_type is correct
             assert_eq!(*result.data_type(), $C_TYPE);
@@ -1067,8 +1048,7 @@ mod tests {
             for (i, x) in $VEC.iter().enumerate() {
                 let v = result.value(i);
                 assert_eq!(
-                    v,
-                    *x,
+                    v, *x,
                     "Unexpected output at position {i}:\n\nActual:\n{v}\n\nExpected:\n{x}"
                 );
             }
@@ -4445,11 +4425,13 @@ mod tests {
 
         // evaluate expression
         let result = expr.evaluate(&batch);
-        assert!(result
-            .err()
-            .unwrap()
-            .to_string()
-            .contains("Overflow happened on: 2147483647 + 1"));
+        assert!(
+            result
+                .err()
+                .unwrap()
+                .to_string()
+                .contains("Overflow happened on: 2147483647 + 1")
+        );
         Ok(())
     }
 
@@ -4474,11 +4456,13 @@ mod tests {
 
         // evaluate expression
         let result = expr.evaluate(&batch);
-        assert!(result
-            .err()
-            .unwrap()
-            .to_string()
-            .contains("Overflow happened on: -2147483648 - 1"));
+        assert!(
+            result
+                .err()
+                .unwrap()
+                .to_string()
+                .contains("Overflow happened on: -2147483648 - 1")
+        );
         Ok(())
     }
 
@@ -4503,11 +4487,13 @@ mod tests {
 
         // evaluate expression
         let result = expr.evaluate(&batch);
-        assert!(result
-            .err()
-            .unwrap()
-            .to_string()
-            .contains("Overflow happened on: 2147483647 * 2"));
+        assert!(
+            result
+                .err()
+                .unwrap()
+                .to_string()
+                .contains("Overflow happened on: 2147483647 * 2")
+        );
         Ok(())
     }
 
@@ -4816,9 +4802,10 @@ mod tests {
             let child_refs = child_view.iter().collect::<Vec<_>>();
             for op in &ops {
                 let expr = binary_expr(Arc::clone(&a), *op, Arc::clone(&b), schema)?;
-                assert!(expr
-                    .propagate_statistics(&parent, child_refs.as_slice())?
-                    .is_some());
+                assert!(
+                    expr.propagate_statistics(&parent, child_refs.as_slice())?
+                        .is_some()
+                );
             }
         }
 
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index 6c96975ed6446..39e3a6f16b5cf 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -27,8 +27,8 @@ use arrow::compute::kernels::boolean::not;
 use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar};
 use arrow::datatypes::DataType;
 use arrow::error::ArrowError;
-use datafusion_common::{internal_err, plan_err};
 use datafusion_common::{Result, ScalarValue};
+use datafusion_common::{internal_err, plan_err};
 
 use std::sync::Arc;
 
@@ -108,16 +108,35 @@ macro_rules! call_scalar_kernel {
 /// downcasts left / right to the appropriate integral type and calls the kernel
 macro_rules! create_left_integral_dyn_scalar_kernel {
     ($FUNC:ident, $KERNEL:ident) => {
-        pub(crate) fn $FUNC(array: &dyn Array, scalar: ScalarValue) -> Option<Result<ArrayRef>> {
+        pub(crate) fn $FUNC(
+            array: &dyn Array,
+            scalar: ScalarValue,
+        ) -> Option<Result<ArrayRef>> {
             let result = match array.data_type() {
-                DataType::Int8 => call_scalar_kernel!(array, scalar, $KERNEL, Int8Array, i8),
-                DataType::Int16 => call_scalar_kernel!(array, scalar, $KERNEL, Int16Array, i16),
-                DataType::Int32 => call_scalar_kernel!(array, scalar, $KERNEL, Int32Array, i32),
-                DataType::Int64 => call_scalar_kernel!(array, scalar, $KERNEL, Int64Array, i64),
-                DataType::UInt8 => call_scalar_kernel!(array, scalar, $KERNEL, UInt8Array, u8),
-                DataType::UInt16 => call_scalar_kernel!(array, scalar, $KERNEL, UInt16Array, u16),
-                DataType::UInt32 => call_scalar_kernel!(array, scalar, $KERNEL, UInt32Array, u32),
-                DataType::UInt64 => call_scalar_kernel!(array, scalar, $KERNEL, UInt64Array, u64),
+                DataType::Int8 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, Int8Array, i8)
+                }
+                DataType::Int16 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, Int16Array, i16)
+                }
+                DataType::Int32 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, Int32Array, i32)
+                }
+                DataType::Int64 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, Int64Array, i64)
+                }
+                DataType::UInt8 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, UInt8Array, u8)
+                }
+                DataType::UInt16 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, UInt16Array, u16)
+                }
+                DataType::UInt32 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, UInt32Array, u32)
+                }
+                DataType::UInt64 => {
+                    call_scalar_kernel!(array, scalar, $KERNEL, UInt64Array, u64)
+                }
                 other => plan_err!(
                     "Data type {} not supported for binary operation '{}' on dyn arrays",
                     other,
@@ -207,8 +226,8 @@ macro_rules! regexp_is_match_flag {
 }
 
 pub(crate) fn regex_match_dyn(
-    left: ArrayRef,
-    right: ArrayRef,
+    left: &ArrayRef,
+    right: &ArrayRef,
     not_match: bool,
     flag: bool,
 ) -> Result<ArrayRef> {
@@ -259,7 +278,7 @@ macro_rules! regexp_is_match_flag_scalar {
 
 pub(crate) fn regex_match_dyn_scalar(
     left: &dyn Array,
-    right: ScalarValue,
+    right: &ScalarValue,
     not_match: bool,
     flag: bool,
 ) -> Option<Result<ArrayRef>> {
@@ -296,8 +315,8 @@ pub(crate) fn regex_match_dyn_scalar(
             )
         }
         other => internal_err!(
-                "Data type {} not supported for operation 'regex_match_dyn_scalar' on string array",
-                other
+            "Data type {} not supported for operation 'regex_match_dyn_scalar' on string array",
+            other
         ),
     };
     Some(result)
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 7a33aa95c56b5..582d6a141a5ca 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -15,33 +15,36 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod literal_lookup_table;
+
 use super::{Column, Literal};
-use crate::expressions::case::ResultState::{Complete, Empty, Partial};
-use crate::expressions::try_cast;
 use crate::PhysicalExpr;
+use crate::expressions::{lit, try_cast};
 use arrow::array::*;
 use arrow::compute::kernels::zip::zip;
 use arrow::compute::{
-    is_not_null, not, nullif, prep_null_mask_filter, FilterBuilder, FilterPredicate,
-    SlicesIterator,
+    FilterBuilder, FilterPredicate, is_not_null, not, nullif, prep_null_mask_filter,
 };
 use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode};
 use arrow::error::ArrowError;
 use datafusion_common::cast::as_boolean_array;
-use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
 use datafusion_common::{
-    exec_err, internal_datafusion_err, internal_err, DataFusionError, HashMap, HashSet,
-    Result, ScalarValue,
+    DataFusionError, HashMap, HashSet, Result, ScalarValue, assert_or_internal_err,
+    exec_err, internal_datafusion_err, internal_err,
 };
 use datafusion_expr::ColumnarValue;
-use datafusion_physical_expr_common::datum::compare_with_eq;
-use itertools::Itertools;
 use std::borrow::Cow;
-use std::fmt::{Debug, Formatter};
 use std::hash::Hash;
 use std::{any::Any, sync::Arc};
 
-type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
+use crate::expressions::case::literal_lookup_table::LiteralLookupTable;
+use arrow::compute::kernels::merge::{MergeIndex, merge, merge_n};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion_physical_expr_common::datum::compare_with_eq;
+use itertools::Itertools;
+use std::fmt::{Debug, Formatter};
+
+pub(super) type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
 
 #[derive(Debug, Hash, PartialEq, Eq)]
 enum EvalMethod {
@@ -72,8 +75,37 @@ enum EvalMethod {
     ///
     /// CASE WHEN condition THEN expression ELSE expression END
     ExpressionOrExpression(ProjectedCaseBody),
+
+    /// This is a specialization for [`EvalMethod::WithExpression`] when the value and results are literals
+    ///
+    /// See [`LiteralLookupTable`] for more details
+    WithExprScalarLookupTable(LiteralLookupTable),
 }
 
+/// Implementing hash so we can use `derive` on [`EvalMethod`].
+///
+/// not implementing actual [`Hash`] as it is not dyn compatible so we cannot implement it for
+/// `dyn` [`literal_lookup_table::WhenLiteralIndexMap`].
+///
+/// So implementing empty hash is still valid as the data is derived from `PhysicalExpr` s which are already hashed
+impl Hash for LiteralLookupTable {
+    fn hash<H: std::hash::Hasher>(&self, _state: &mut H) {}
+}
+
+/// Implementing Equal so we can use `derive` on [`EvalMethod`].
+///
+/// not implementing actual [`PartialEq`] as it is not dyn compatible so we cannot implement it for
+/// `dyn` [`literal_lookup_table::WhenLiteralIndexMap`].
+///
+/// So we always return true as the data is derived from `PhysicalExpr` s which are already compared
+impl PartialEq for LiteralLookupTable {
+    fn eq(&self, _other: &Self) -> bool {
+        true
+    }
+}
+
+impl Eq for LiteralLookupTable {}
+
 /// The body of a CASE expression which consists of an optional base expression, the "when/then"
 /// branches and an optional "else" branch.
 #[derive(Debug, Hash, PartialEq, Eq)]
@@ -304,189 +336,6 @@ fn filter_array(
     filter.filter(array)
 }
 
-fn merge(
-    mask: &BooleanArray,
-    truthy: ColumnarValue,
-    falsy: ColumnarValue,
-) -> std::result::Result<ArrayRef, ArrowError> {
-    let (truthy, truthy_is_scalar) = match truthy {
-        ColumnarValue::Array(a) => (a, false),
-        ColumnarValue::Scalar(s) => (s.to_array()?, true),
-    };
-    let (falsy, falsy_is_scalar) = match falsy {
-        ColumnarValue::Array(a) => (a, false),
-        ColumnarValue::Scalar(s) => (s.to_array()?, true),
-    };
-
-    if truthy_is_scalar && falsy_is_scalar {
-        return zip(mask, &Scalar::new(truthy), &Scalar::new(falsy));
-    }
-
-    let falsy = falsy.to_data();
-    let truthy = truthy.to_data();
-
-    let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, truthy.len());
-
-    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
-    // fill with falsy values
-
-    // keep track of how much is filled
-    let mut filled = 0;
-    let mut falsy_offset = 0;
-    let mut truthy_offset = 0;
-
-    SlicesIterator::new(mask).for_each(|(start, end)| {
-        // the gap needs to be filled with falsy values
-        if start > filled {
-            if falsy_is_scalar {
-                for _ in filled..start {
-                    // Copy the first item from the 'falsy' array into the output buffer.
-                    mutable.extend(1, 0, 1);
-                }
-            } else {
-                let falsy_length = start - filled;
-                let falsy_end = falsy_offset + falsy_length;
-                mutable.extend(1, falsy_offset, falsy_end);
-                falsy_offset = falsy_end;
-            }
-        }
-        // fill with truthy values
-        if truthy_is_scalar {
-            for _ in start..end {
-                // Copy the first item from the 'truthy' array into the output buffer.
-                mutable.extend(0, 0, 1);
-            }
-        } else {
-            let truthy_length = end - start;
-            let truthy_end = truthy_offset + truthy_length;
-            mutable.extend(0, truthy_offset, truthy_end);
-            truthy_offset = truthy_end;
-        }
-        filled = end;
-    });
-    // the remaining part is falsy
-    if filled < mask.len() {
-        if falsy_is_scalar {
-            for _ in filled..mask.len() {
-                // Copy the first item from the 'falsy' array into the output buffer.
-                mutable.extend(1, 0, 1);
-            }
-        } else {
-            let falsy_length = mask.len() - filled;
-            let falsy_end = falsy_offset + falsy_length;
-            mutable.extend(1, falsy_offset, falsy_end);
-        }
-    }
-
-    let data = mutable.freeze();
-    Ok(make_array(data))
-}
-
-/// Merges elements by index from a list of [`ArrayData`], creating a new [`ColumnarValue`] from
-/// those values.
-///
-/// Each element in `indices` is the index of an array in `values`. The `indices` array is processed
-/// sequentially. The first occurrence of index value `n` will be mapped to the first
-/// value of the array at index `n`. The second occurrence to the second value, and so on.
-/// An index value where `PartialResultIndex::is_none` is `true` is used to indicate null values.
-///
-/// # Implementation notes
-///
-/// This algorithm is similar in nature to both `zip` and `interleave`, but there are some important
-/// differences.
-///
-/// In contrast to `zip`, this function supports multiple input arrays. Instead of a boolean
-/// selection vector, an index array is to take values from the input arrays, and a special marker
-/// value is used to indicate null values.
-///
-/// In contrast to `interleave`, this function does not use pairs of indices. The values in
-/// `indices` serve the same purpose as the first value in the pairs passed to `interleave`.
-/// The index in the array is implicit and is derived from the number of times a particular array
-/// index occurs.
-/// The more constrained indexing mechanism used by this algorithm makes it easier to copy values
-/// in contiguous slices. In the example below, the two subsequent elements from array `2` can be
-/// copied in a single operation from the source array instead of copying them one by one.
-/// Long spans of null values are also especially cheap because they do not need to be represented
-/// in an input array.
-///
-/// # Safety
-///
-/// This function does not check that the number of occurrences of any particular array index matches
-/// the length of the corresponding input array. If an array contains more values than required, the
-/// spurious values will be ignored. If an array contains fewer values than necessary, this function
-/// will panic.
-///
-/// # Example
-///
-/// ```text
-/// ┌───────────┐  ┌─────────┐                             ┌─────────┐
-/// │┌─────────┐│  │   None  │                             │   NULL  │
-/// ││    A    ││  ├─────────┤                             ├─────────┤
-/// │└─────────┘│  │    1    │                             │    B    │
-/// │┌─────────┐│  ├─────────┤                             ├─────────┤
-/// ││    B    ││  │    0    │    merge(values, indices)   │    A    │
-/// │└─────────┘│  ├─────────┤  ─────────────────────────▶ ├─────────┤
-/// │┌─────────┐│  │   None  │                             │   NULL  │
-/// ││    C    ││  ├─────────┤                             ├─────────┤
-/// │├─────────┤│  │    2    │                             │    C    │
-/// ││    D    ││  ├─────────┤                             ├─────────┤
-/// │└─────────┘│  │    2    │                             │    D    │
-/// └───────────┘  └─────────┘                             └─────────┘
-///    values        indices                                  result
-/// ```
-fn merge_n(values: &[ArrayData], indices: &[PartialResultIndex]) -> Result<ArrayRef> {
-    #[cfg(debug_assertions)]
-    for ix in indices {
-        if let Some(index) = ix.index() {
-            assert!(
-                index < values.len(),
-                "Index out of bounds: {} >= {}",
-                index,
-                values.len()
-            );
-        }
-    }
-
-    let data_refs = values.iter().collect();
-    let mut mutable = MutableArrayData::new(data_refs, true, indices.len());
-
-    // This loop extends the mutable array by taking slices from the partial results.
-    //
-    // take_offsets keeps track of how many values have been taken from each array.
-    let mut take_offsets = vec![0; values.len() + 1];
-    let mut start_row_ix = 0;
-    loop {
-        let array_ix = indices[start_row_ix];
-
-        // Determine the length of the slice to take.
-        let mut end_row_ix = start_row_ix + 1;
-        while end_row_ix < indices.len() && indices[end_row_ix] == array_ix {
-            end_row_ix += 1;
-        }
-        let slice_length = end_row_ix - start_row_ix;
-
-        // Extend mutable with either nulls or with values from the array.
-        match array_ix.index() {
-            None => mutable.extend_nulls(slice_length),
-            Some(index) => {
-                let start_offset = take_offsets[index];
-                let end_offset = start_offset + slice_length;
-                mutable.extend(index, start_offset, end_offset);
-                take_offsets[index] = end_offset;
-            }
-        }
-
-        if end_row_ix == indices.len() {
-            break;
-        } else {
-            // Set the start_row_ix for the next slice.
-            start_row_ix = end_row_ix;
-        }
-    }
-
-    Ok(make_array(mutable.freeze()))
-}
-
 /// An index into the partial results array that's more compact than `usize`.
 ///
 /// `u32::MAX` is reserved as a special 'none' value. This is used instead of
@@ -517,9 +366,10 @@ impl PartialResultIndex {
             return internal_err!("Partial result index exceeds limit");
         };
 
-        if index == NONE_VALUE {
-            return internal_err!("Partial result index exceeds limit");
-        }
+        assert_or_internal_err!(
+            index != NONE_VALUE,
+            "Partial result index exceeds limit"
+        );
 
         Ok(Self { index })
     }
@@ -528,7 +378,9 @@ impl PartialResultIndex {
     fn is_none(&self) -> bool {
         self.index == NONE_VALUE
     }
+}
 
+impl MergeIndex for PartialResultIndex {
     /// Returns `Some(index)` if this value is not the 'none' placeholder, `None` otherwise.
     fn index(&self) -> Option<usize> {
         if self.is_none() {
@@ -556,7 +408,7 @@ enum ResultState {
     Partial {
         // A `Vec` of partial results that should be merged.
         // `partial_result_indices` contains indexes into this vec.
-        arrays: Vec<ArrayData>,
+        arrays: Vec<ArrayRef>,
         // Indicates per result row from which array in `partial_results` a value should be taken.
         indices: Vec<PartialResultIndex>,
     },
@@ -589,7 +441,7 @@ impl ResultBuilder {
         Self {
             data_type: data_type.clone(),
             row_count,
-            state: Empty,
+            state: ResultState::Empty,
         }
     }
 
@@ -637,7 +489,7 @@ impl ResultBuilder {
                 } else if row_indices.len() == self.row_count {
                     self.set_complete_result(ColumnarValue::Array(a))
                 } else {
-                    self.add_partial_result(row_indices, a.to_data())
+                    self.add_partial_result(row_indices, a)
                 }
             }
             ColumnarValue::Scalar(s) => {
@@ -646,7 +498,7 @@ impl ResultBuilder {
                 } else {
                     self.add_partial_result(
                         row_indices,
-                        s.to_array_of_size(row_indices.len())?.to_data(),
+                        s.to_array_of_size(row_indices.len())?,
                     )
                 }
             }
@@ -661,28 +513,29 @@ impl ResultBuilder {
     fn add_partial_result(
         &mut self,
         row_indices: &ArrayRef,
-        row_values: ArrayData,
+        row_values: ArrayRef,
     ) -> Result<()> {
-        if row_indices.null_count() != 0 {
-            return internal_err!("Row indices must not contain nulls");
-        }
+        assert_or_internal_err!(
+            row_indices.null_count() == 0,
+            "Row indices must not contain nulls"
+        );
 
         match &mut self.state {
-            Empty => {
+            ResultState::Empty => {
                 let array_index = PartialResultIndex::zero();
                 let mut indices = vec![PartialResultIndex::none(); self.row_count];
                 for row_ix in row_indices.as_primitive::<UInt32Type>().values().iter() {
                     indices[*row_ix as usize] = array_index;
                 }
 
-                self.state = Partial {
+                self.state = ResultState::Partial {
                     arrays: vec![row_values],
                     indices,
                 };
 
                 Ok(())
             }
-            Partial { arrays, indices } => {
+            ResultState::Partial { arrays, indices } => {
                 let array_index = PartialResultIndex::try_new(arrays.len())?;
 
                 arrays.push(row_values);
@@ -692,15 +545,17 @@ impl ResultBuilder {
                     // `case_when_with_expr` and `case_when_no_expr`, already ensure that
                     // they only calculate a value for each row at most once.
                     #[cfg(debug_assertions)]
-                    if !indices[*row_ix as usize].is_none() {
-                        return internal_err!("Duplicate value for row {}", *row_ix);
-                    }
+                    assert_or_internal_err!(
+                        indices[*row_ix as usize].is_none(),
+                        "Duplicate value for row {}",
+                        *row_ix
+                    );
 
                     indices[*row_ix as usize] = array_index;
                 }
                 Ok(())
             }
-            Complete(_) => internal_err!(
+            ResultState::Complete(_) => internal_err!(
                 "Cannot add a partial result when complete result is already set"
             ),
         }
@@ -713,23 +568,23 @@ impl ResultBuilder {
     /// without any merging overhead.
     fn set_complete_result(&mut self, value: ColumnarValue) -> Result<()> {
         match &self.state {
-            Empty => {
-                self.state = Complete(value);
+            ResultState::Empty => {
+                self.state = ResultState::Complete(value);
                 Ok(())
             }
-            Partial { .. } => {
+            ResultState::Partial { .. } => {
                 internal_err!(
                     "Cannot set a complete result when there are already partial results"
                 )
             }
-            Complete(_) => internal_err!("Complete result already set"),
+            ResultState::Complete(_) => internal_err!("Complete result already set"),
         }
     }
 
     /// Finishes building the result and returns the final array.
     fn finish(self) -> Result<ColumnarValue> {
         match self.state {
-            Empty => {
+            ResultState::Empty => {
                 // No complete result and no partial results.
                 // This can happen for case expressions with no else branch where no rows
                 // matched.
@@ -737,11 +592,12 @@ impl ResultBuilder {
                     &self.data_type,
                 )?))
             }
-            Partial { arrays, indices } => {
+            ResultState::Partial { arrays, indices } => {
                 // Merge partial results into a single array.
-                Ok(ColumnarValue::Array(merge_n(&arrays, &indices)?))
+                let array_refs = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
+                Ok(ColumnarValue::Array(merge_n(&array_refs, &indices)?))
             }
-            Complete(v) => {
+            ResultState::Complete(v) => {
                 // If we have a complete result, we can just return it.
                 Ok(v)
             }
@@ -776,28 +632,40 @@ impl CaseExpr {
             else_expr,
         };
 
-        let eval_method = if body.expr.is_some() {
-            EvalMethod::WithExpression(body.project()?)
-        } else if body.when_then_expr.len() == 1
-            && is_cheap_and_infallible(&(body.when_then_expr[0].1))
-            && body.else_expr.is_none()
-        {
-            EvalMethod::InfallibleExprOrNull
-        } else if body.when_then_expr.len() == 1
-            && body.when_then_expr[0].1.as_any().is::<Literal>()
-            && body.else_expr.is_some()
-            && body.else_expr.as_ref().unwrap().as_any().is::<Literal>()
-        {
-            EvalMethod::ScalarOrScalar
-        } else if body.when_then_expr.len() == 1 && body.else_expr.is_some() {
-            EvalMethod::ExpressionOrExpression(body.project()?)
-        } else {
-            EvalMethod::NoExpression(body.project()?)
-        };
+        let eval_method = Self::find_best_eval_method(&body)?;
 
         Ok(Self { body, eval_method })
     }
 
+    fn find_best_eval_method(body: &CaseBody) -> Result<EvalMethod> {
+        if body.expr.is_some() {
+            if let Some(mapping) = LiteralLookupTable::maybe_new(body) {
+                return Ok(EvalMethod::WithExprScalarLookupTable(mapping));
+            }
+
+            return Ok(EvalMethod::WithExpression(body.project()?));
+        }
+
+        Ok(
+            if body.when_then_expr.len() == 1
+                && is_cheap_and_infallible(&(body.when_then_expr[0].1))
+                && body.else_expr.is_none()
+            {
+                EvalMethod::InfallibleExprOrNull
+            } else if body.when_then_expr.len() == 1
+                && body.when_then_expr[0].1.as_any().is::<Literal>()
+                && body.else_expr.is_some()
+                && body.else_expr.as_ref().unwrap().as_any().is::<Literal>()
+            {
+                EvalMethod::ScalarOrScalar
+            } else if body.when_then_expr.len() == 1 && body.else_expr.is_some() {
+                EvalMethod::ExpressionOrExpression(body.project()?)
+            } else {
+                EvalMethod::NoExpression(body.project()?)
+            },
+        )
+    }
+
     /// Optional base expression that can be compared to literal values in the "when" expressions
     pub fn expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
         self.body.expr.as_ref()
@@ -826,10 +694,10 @@ impl CaseBody {
             }
         }
         // if all then results are null, we use data type of else expr instead if possible.
-        if data_type.equals_datatype(&DataType::Null) {
-            if let Some(e) = &self.else_expr {
-                data_type = e.data_type(input_schema)?;
-            }
+        if data_type.equals_datatype(&DataType::Null)
+            && let Some(e) = &self.else_expr
+        {
+            data_type = e.data_type(input_schema)?;
         }
 
         Ok(data_type)
@@ -861,12 +729,13 @@ impl CaseBody {
         // Since each when expression is tested against the base expression using the equality
         // operator, null base values can never match any when expression. `x = NULL` is falsy,
         // for all possible values of `x`.
-        if base_values.null_count() > 0 {
+        let base_null_count = base_values.logical_null_count();
+        if base_null_count > 0 {
             // Use `is_not_null` since this is a cheap clone of the null buffer from 'base_value'.
             // We already checked there are nulls, so we can be sure a new buffer will not be
             // created.
             let base_not_nulls = is_not_null(base_values.as_ref())?;
-            let base_all_null = base_values.null_count() == remainder_batch.num_rows();
+            let base_all_null = base_null_count == remainder_batch.num_rows();
 
             // If there is an else expression, use that as the default value for the null rows
             // Otherwise the default `null` value from the result builder will be used.
@@ -1103,11 +972,20 @@ impl CaseBody {
 
         let else_value = else_expr.evaluate(&else_batch)?;
 
-        Ok(ColumnarValue::Array(merge(
-            &when_value,
-            then_value,
-            else_value,
-        )?))
+        Ok(ColumnarValue::Array(match (then_value, else_value) {
+            (ColumnarValue::Array(t), ColumnarValue::Array(e)) => {
+                merge(&when_value, &t, &e)
+            }
+            (ColumnarValue::Scalar(t), ColumnarValue::Array(e)) => {
+                merge(&when_value, &t.to_scalar()?, &e)
+            }
+            (ColumnarValue::Array(t), ColumnarValue::Scalar(e)) => {
+                merge(&when_value, &t, &e.to_scalar()?)
+            }
+            (ColumnarValue::Scalar(t), ColumnarValue::Scalar(e)) => {
+                merge(&when_value, &t.to_scalar()?, &e.to_scalar()?)
+            }
+        }?))
     }
 }
 
@@ -1269,6 +1147,28 @@ impl CaseExpr {
             self.body.expr_or_expr(batch, when_value)
         }
     }
+
+    fn with_lookup_table(
+        &self,
+        batch: &RecordBatch,
+        lookup_table: &LiteralLookupTable,
+    ) -> Result<ColumnarValue> {
+        let expr = self.body.expr.as_ref().unwrap();
+        let evaluated_expression = expr.evaluate(batch)?;
+
+        let is_scalar = matches!(evaluated_expression, ColumnarValue::Scalar(_));
+        let evaluated_expression = evaluated_expression.to_array(1)?;
+
+        let values = lookup_table.map_keys_to_values(&evaluated_expression)?;
+
+        let result = if is_scalar {
+            ColumnarValue::Scalar(ScalarValue::try_from_array(values.as_ref(), 0)?)
+        } else {
+            ColumnarValue::Array(values)
+        };
+
+        Ok(result)
+    }
 }
 
 impl PhysicalExpr for CaseExpr {
@@ -1282,16 +1182,62 @@ impl PhysicalExpr for CaseExpr {
     }
 
     fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        // this expression is nullable if any of the input expressions are nullable
-        let then_nullable = self
+        let nullable_then = self
             .body
             .when_then_expr
             .iter()
-            .map(|(_, t)| t.nullable(input_schema))
-            .collect::<Result<Vec<_>>>()?;
-        if then_nullable.contains(&true) {
-            Ok(true)
+            .filter_map(|(w, t)| {
+                let is_nullable = match t.nullable(input_schema) {
+                    // Pass on error determining nullability verbatim
+                    Err(e) => return Some(Err(e)),
+                    Ok(n) => n,
+                };
+
+                // Branches with a then expression that is not nullable do not impact the
+                // nullability of the case expression.
+                if !is_nullable {
+                    return None;
+                }
+
+                // For case-with-expression assume all 'then' expressions are reachable
+                if self.body.expr.is_some() {
+                    return Some(Ok(()));
+                }
+
+                // For branches with a nullable 'then' expression, try to determine
+                // if the 'then' expression is ever reachable in the situation where
+                // it would evaluate to null.
+
+                // Replace the `then` expression with `NULL` in the `when` expression
+                let with_null = match replace_with_null(w, t.as_ref(), input_schema) {
+                    Err(e) => return Some(Err(e)),
+                    Ok(e) => e,
+                };
+
+                // Try to const evaluate the modified `when` expression.
+                let predicate_result = match evaluate_predicate(&with_null) {
+                    Err(e) => return Some(Err(e)),
+                    Ok(b) => b,
+                };
+
+                match predicate_result {
+                    // Evaluation was inconclusive or true, so the 'then' expression is reachable
+                    None | Some(true) => Some(Ok(())),
+                    // Evaluation proves the branch will never be taken.
+                    // The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
+                    Some(false) => None,
+                }
+            })
+            .next();
+
+        if let Some(nullable_then) = nullable_then {
+            // There is at least one reachable nullable 'then' expression, so the case
+            // expression itself is nullable.
+            // Use `Result::map` to propagate the error from `nullable_then` if there is one.
+            nullable_then.map(|_| true)
         } else if let Some(e) = &self.body.else_expr {
+            // There are no reachable nullable 'then' expressions, so all we still need to
+            // check is the 'else' expression's nullability.
             e.nullable(input_schema)
         } else {
             // CASE produces NULL if there is no `else` expr
@@ -1318,6 +1264,9 @@ impl PhysicalExpr for CaseExpr {
             }
             EvalMethod::ScalarOrScalar => self.scalar_or_scalar(batch),
             EvalMethod::ExpressionOrExpression(p) => self.expr_or_expr(batch, p),
+            EvalMethod::WithExprScalarLookupTable(lookup_table) => {
+                self.with_lookup_table(batch, lookup_table)
+            }
         }
     }
 
@@ -1394,6 +1343,51 @@ impl PhysicalExpr for CaseExpr {
     }
 }
 
+/// Attempts to const evaluate the given `predicate`.
+/// Returns:
+/// - `Some(true)` if the predicate evaluates to a truthy value.
+/// - `Some(false)` if the predicate evaluates to a falsy value.
+/// - `None` if the predicate could not be evaluated.
+fn evaluate_predicate(predicate: &Arc<dyn PhysicalExpr>) -> Result<Option<bool>> {
+    // Create a dummy record with no columns and one row
+    let batch = RecordBatch::try_new_with_options(
+        Arc::new(Schema::empty()),
+        vec![],
+        &RecordBatchOptions::new().with_row_count(Some(1)),
+    )?;
+
+    // Evaluate the predicate and interpret the result as a boolean
+    let result = match predicate.evaluate(&batch) {
+        // An error during evaluation means we couldn't const evaluate the predicate, so return `None`
+        Err(_) => None,
+        Ok(ColumnarValue::Array(array)) => Some(
+            ScalarValue::try_from_array(array.as_ref(), 0)?
+                .cast_to(&DataType::Boolean)?,
+        ),
+        Ok(ColumnarValue::Scalar(scalar)) => Some(scalar.cast_to(&DataType::Boolean)?),
+    };
+    Ok(result.map(|v| matches!(v, ScalarValue::Boolean(Some(true)))))
+}
+
+fn replace_with_null(
+    expr: &Arc<dyn PhysicalExpr>,
+    expr_to_replace: &dyn PhysicalExpr,
+    input_schema: &Schema,
+) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+    let with_null = Arc::clone(expr)
+        .transform_down(|e| {
+            if e.as_ref().dyn_eq(expr_to_replace) {
+                let data_type = e.data_type(input_schema)?;
+                let null_literal = lit(ScalarValue::try_new_null(&data_type)?);
+                Ok(Transformed::yes(null_literal))
+            } else {
+                Ok(Transformed::no(e))
+            }
+        })?
+        .data;
+    Ok(with_null)
+}
+
 /// Create a CASE expression
 pub fn case(
     expr: Option<Arc<dyn PhysicalExpr>>,
@@ -1407,7 +1401,8 @@ pub fn case(
 mod tests {
     use super::*;
 
-    use crate::expressions::{binary, cast, col, lit, BinaryExpr};
+    use crate::expressions;
+    use crate::expressions::{BinaryExpr, binary, cast, col, is_not_null, lit};
     use arrow::buffer::Buffer;
     use arrow::datatypes::DataType::Float64;
     use arrow::datatypes::Field;
@@ -1415,8 +1410,9 @@ mod tests {
     use datafusion_common::plan_err;
     use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
     use datafusion_expr::type_coercion::binary::comparison_coercion;
-    use datafusion_expr::Operator;
+    use datafusion_expr_common::operator::Operator;
     use datafusion_physical_expr_common::physical_expr::fmt_sql;
+    use half::f16;
 
     #[test]
     fn case_with_expr() -> Result<()> {
@@ -1448,6 +1444,164 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn case_with_expr_dictionary() -> Result<()> {
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
+            true,
+        )]);
+        let keys = UInt8Array::from(vec![0u8, 1u8, 2u8, 3u8]);
+        let values = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
+        let dictionary = DictionaryArray::new(keys, Arc::new(values));
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dictionary)])?;
+
+        let schema = batch.schema();
+
+        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 END
+        let when1 = lit("foo");
+        let then1 = lit(123i32);
+        let when2 = lit("bar");
+        let then2 = lit(456i32);
+
+        let expr = generate_case_when_with_type_coercion(
+            Some(col("a", &schema)?),
+            vec![(when1, then1), (when2, then2)],
+            None,
+            schema.as_ref(),
+        )?;
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
+    // Make sure we are not failing when got literal in case when but input is dictionary encoded
+    #[test]
+    fn case_with_expr_primitive_dictionary() -> Result<()> {
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt64)),
+            true,
+        )]);
+        let keys = UInt8Array::from(vec![0u8, 1u8, 2u8, 3u8]);
+        let values = UInt64Array::from(vec![Some(10), Some(20), None, Some(30)]);
+        let dictionary = DictionaryArray::new(keys, Arc::new(values));
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dictionary)])?;
+
+        let schema = batch.schema();
+
+        // CASE a WHEN 10 THEN 123 WHEN 30 THEN 456 END
+        let when1 = lit(10_u64);
+        let then1 = lit(123_i32);
+        let when2 = lit(30_u64);
+        let then2 = lit(456_i32);
+
+        let expr = generate_case_when_with_type_coercion(
+            Some(col("a", &schema)?),
+            vec![(when1, then1), (when2, then2)],
+            None,
+            schema.as_ref(),
+        )?;
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
+    // Make sure we are not failing when got literal in case when but input is dictionary encoded
+    #[test]
+    fn case_with_expr_boolean_dictionary() -> Result<()> {
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Boolean)),
+            true,
+        )]);
+        let keys = UInt8Array::from(vec![0u8, 1u8, 2u8, 3u8]);
+        let values = BooleanArray::from(vec![Some(true), Some(false), None, Some(true)]);
+        let dictionary = DictionaryArray::new(keys, Arc::new(values));
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dictionary)])?;
+
+        let schema = batch.schema();
+
+        // CASE a WHEN true THEN 123 WHEN false THEN 456 END
+        let when1 = lit(true);
+        let then1 = lit(123i32);
+        let when2 = lit(false);
+        let then2 = lit(456i32);
+
+        let expr = generate_case_when_with_type_coercion(
+            Some(col("a", &schema)?),
+            vec![(when1, then1), (when2, then2)],
+            None,
+            schema.as_ref(),
+        )?;
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![Some(123), Some(456), None, Some(123)]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
+    #[test]
+    fn case_with_expr_all_null_dictionary() -> Result<()> {
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
+            true,
+        )]);
+        let keys = UInt8Array::from(vec![2u8, 2u8, 2u8, 2u8]);
+        let values = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
+        let dictionary = DictionaryArray::new(keys, Arc::new(values));
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dictionary)])?;
+
+        let schema = batch.schema();
+
+        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 END
+        let when1 = lit("foo");
+        let then1 = lit(123i32);
+        let when2 = lit("bar");
+        let then2 = lit(456i32);
+
+        let expr = generate_case_when_with_type_coercion(
+            Some(col("a", &schema)?),
+            vec![(when1, then1), (when2, then2)],
+            None,
+            schema.as_ref(),
+        )?;
+        let result = expr
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![None, None, None, None]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
     #[test]
     fn case_with_expr_else() -> Result<()> {
         let batch = case_test_batch()?;
@@ -2242,54 +2396,678 @@ mod tests {
         Ok(())
     }
 
+    fn when_then_else(
+        when: &Arc<dyn PhysicalExpr>,
+        then: &Arc<dyn PhysicalExpr>,
+        els: &Arc<dyn PhysicalExpr>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let case = CaseExpr::try_new(
+            None,
+            vec![(Arc::clone(when), Arc::clone(then))],
+            Some(Arc::clone(els)),
+        )?;
+        Ok(Arc::new(case))
+    }
+
     #[test]
-    fn test_merge_n() {
-        let a1 = StringArray::from(vec![Some("A")]).to_data();
-        let a2 = StringArray::from(vec![Some("B")]).to_data();
-        let a3 = StringArray::from(vec![Some("C"), Some("D")]).to_data();
-
-        let indices = vec![
-            PartialResultIndex::none(),
-            PartialResultIndex::try_new(1).unwrap(),
-            PartialResultIndex::try_new(0).unwrap(),
-            PartialResultIndex::none(),
-            PartialResultIndex::try_new(2).unwrap(),
-            PartialResultIndex::try_new(2).unwrap(),
-        ];
-
-        let merged = merge_n(&[a1, a2, a3], &indices).unwrap();
-        let merged = merged.as_string::<i32>();
-
-        assert_eq!(merged.len(), indices.len());
-        assert!(!merged.is_valid(0));
-        assert!(merged.is_valid(1));
-        assert_eq!(merged.value(1), "B");
-        assert!(merged.is_valid(2));
-        assert_eq!(merged.value(2), "A");
-        assert!(!merged.is_valid(3));
-        assert!(merged.is_valid(4));
-        assert_eq!(merged.value(4), "C");
-        assert!(merged.is_valid(5));
-        assert_eq!(merged.value(5), "D");
+    fn test_case_expression_nullability_with_nullable_column() -> Result<()> {
+        case_expression_nullability(true)
     }
 
     #[test]
-    fn test_merge() {
-        let a1 = Arc::new(StringArray::from(vec![Some("A"), Some("C")]));
-        let a2 = Arc::new(StringArray::from(vec![Some("B")]));
-
-        let mask = BooleanArray::from(vec![true, false, true]);
-
-        let merged =
-            merge(&mask, ColumnarValue::Array(a1), ColumnarValue::Array(a2)).unwrap();
-        let merged = merged.as_string::<i32>();
-
-        assert_eq!(merged.len(), mask.len());
-        assert!(merged.is_valid(0));
-        assert_eq!(merged.value(0), "A");
-        assert!(merged.is_valid(1));
-        assert_eq!(merged.value(1), "B");
-        assert!(merged.is_valid(2));
-        assert_eq!(merged.value(2), "C");
+    fn test_case_expression_nullability_with_not_nullable_column() -> Result<()> {
+        case_expression_nullability(false)
+    }
+
+    fn case_expression_nullability(col_is_nullable: bool) -> Result<()> {
+        let schema =
+            Schema::new(vec![Field::new("foo", DataType::Int32, col_is_nullable)]);
+
+        let foo = col("foo", &schema)?;
+        let foo_is_not_null = is_not_null(Arc::clone(&foo))?;
+        let foo_is_null = expressions::is_null(Arc::clone(&foo))?;
+        let not_foo_is_null = expressions::not(Arc::clone(&foo_is_null))?;
+        let zero = lit(0);
+        let foo_eq_zero =
+            binary(Arc::clone(&foo), Operator::Eq, Arc::clone(&zero), &schema)?;
+
+        assert_not_nullable(when_then_else(&foo_is_not_null, &foo, &zero)?, &schema);
+        assert_not_nullable(when_then_else(&not_foo_is_null, &foo, &zero)?, &schema);
+        assert_not_nullable(when_then_else(&foo_eq_zero, &foo, &zero)?, &schema);
+
+        assert_not_nullable(
+            when_then_else(
+                &binary(
+                    Arc::clone(&foo_is_not_null),
+                    Operator::And,
+                    Arc::clone(&foo_eq_zero),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+        );
+
+        assert_not_nullable(
+            when_then_else(
+                &binary(
+                    Arc::clone(&foo_eq_zero),
+                    Operator::And,
+                    Arc::clone(&foo_is_not_null),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+        );
+
+        assert_not_nullable(
+            when_then_else(
+                &binary(
+                    Arc::clone(&foo_is_not_null),
+                    Operator::Or,
+                    Arc::clone(&foo_eq_zero),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+        );
+
+        assert_not_nullable(
+            when_then_else(
+                &binary(
+                    Arc::clone(&foo_eq_zero),
+                    Operator::Or,
+                    Arc::clone(&foo_is_not_null),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+        );
+
+        assert_nullability(
+            when_then_else(
+                &binary(
+                    Arc::clone(&foo_is_null),
+                    Operator::Or,
+                    Arc::clone(&foo_eq_zero),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+            col_is_nullable,
+        );
+
+        assert_nullability(
+            when_then_else(
+                &binary(
+                    binary(Arc::clone(&foo), Operator::Eq, Arc::clone(&zero), &schema)?,
+                    Operator::Or,
+                    Arc::clone(&foo_is_null),
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+            col_is_nullable,
+        );
+
+        assert_not_nullable(
+            when_then_else(
+                &binary(
+                    binary(
+                        binary(
+                            Arc::clone(&foo),
+                            Operator::Eq,
+                            Arc::clone(&zero),
+                            &schema,
+                        )?,
+                        Operator::And,
+                        Arc::clone(&foo_is_not_null),
+                        &schema,
+                    )?,
+                    Operator::Or,
+                    binary(
+                        binary(
+                            Arc::clone(&foo),
+                            Operator::Eq,
+                            Arc::clone(&foo),
+                            &schema,
+                        )?,
+                        Operator::And,
+                        Arc::clone(&foo_is_not_null),
+                        &schema,
+                    )?,
+                    &schema,
+                )?,
+                &foo,
+                &zero,
+            )?,
+            &schema,
+        );
+
+        Ok(())
+    }
+
+    fn assert_not_nullable(expr: Arc<dyn PhysicalExpr>, schema: &Schema) {
+        assert!(!expr.nullable(schema).unwrap());
+    }
+
+    fn assert_nullable(expr: Arc<dyn PhysicalExpr>, schema: &Schema) {
+        assert!(expr.nullable(schema).unwrap());
+    }
+
+    fn assert_nullability(expr: Arc<dyn PhysicalExpr>, schema: &Schema, nullable: bool) {
+        if nullable {
+            assert_nullable(expr, schema);
+        } else {
+            assert_not_nullable(expr, schema);
+        }
+    }
+
+    // Test Lookup evaluation
+
+    fn test_case_when_literal_lookup(
+        values: ArrayRef,
+        lookup_map: &[(ScalarValue, ScalarValue)],
+        else_value: Option<ScalarValue>,
+        expected: ArrayRef,
+    ) {
+        // Create lookup
+        // CASE <expr>
+        // WHEN <when_constant_1> THEN <then_constant_1>
+        // WHEN <when_constant_2> THEN <then_constant_2>
+        // [ ELSE <else_constant> ]
+
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            values.data_type().clone(),
+            values.is_nullable(),
+        )]);
+        let schema = Arc::new(schema);
+
+        let batch = RecordBatch::try_new(schema, vec![values])
+            .expect("failed to create RecordBatch");
+
+        let schema = batch.schema_ref();
+        let case = col("a", schema).expect("failed to create col");
+
+        let when_then = lookup_map
+            .iter()
+            .map(|(when, then)| {
+                (
+                    Arc::new(Literal::new(when.clone())) as _,
+                    Arc::new(Literal::new(then.clone())) as _,
+                )
+            })
+            .collect::<Vec<WhenThen>>();
+
+        let else_expr = else_value.map(|else_value| {
+            Arc::new(Literal::new(else_value)) as Arc<dyn PhysicalExpr>
+        });
+        let expr = CaseExpr::try_new(Some(case), when_then, else_expr)
+            .expect("failed to create case");
+
+        // Assert that we are testing what we intend to assert
+        assert!(
+            matches!(
+                expr.eval_method,
+                EvalMethod::WithExprScalarLookupTable { .. }
+            ),
+            "we should use the expected eval method"
+        );
+
+        let actual = expr
+            .evaluate(&batch)
+            .expect("failed to evaluate case")
+            .into_array(batch.num_rows())
+            .expect("Failed to convert to array");
+
+        assert_eq!(
+            actual.data_type(),
+            expected.data_type(),
+            "Data type mismatch"
+        );
+
+        assert_eq!(
+            actual.as_ref(),
+            expected.as_ref(),
+            "actual (left) does not match expected (right)"
+        );
+    }
+
+    fn create_lookup<When, Then>(
+        when_then_pairs: impl IntoIterator<Item = (When, Then)>,
+    ) -> Vec<(ScalarValue, ScalarValue)>
+    where
+        ScalarValue: From<When>,
+        ScalarValue: From<Then>,
+    {
+        when_then_pairs
+            .into_iter()
+            .map(|(when, then)| (ScalarValue::from(when), ScalarValue::from(then)))
+            .collect()
+    }
+
+    fn create_input_and_expected<Input, Expected, InputFromItem, ExpectedFromItem>(
+        input_and_expected_pairs: impl IntoIterator<Item = (InputFromItem, ExpectedFromItem)>,
+    ) -> (Input, Expected)
+    where
+        Input: Array + From<Vec<InputFromItem>>,
+        Expected: Array + From<Vec<ExpectedFromItem>>,
+    {
+        let (input_items, expected_items): (Vec<InputFromItem>, Vec<ExpectedFromItem>) =
+            input_and_expected_pairs.into_iter().unzip();
+
+        (Input::from(input_items), Expected::from(expected_items))
+    }
+
+    fn test_lookup_eval_with_and_without_else(
+        lookup_map: &[(ScalarValue, ScalarValue)],
+        input_values: ArrayRef,
+        expected: StringArray,
+    ) {
+        // Testing without ELSE should fallback to None
+        test_case_when_literal_lookup(
+            Arc::clone(&input_values),
+            lookup_map,
+            None,
+            Arc::new(expected.clone()),
+        );
+
+        // Testing with Else
+        let else_value = "___fallback___";
+
+        // Changing each expected None to be fallback
+        let expected_with_else = expected
+            .iter()
+            .map(|item| item.unwrap_or(else_value))
+            .map(Some)
+            .collect::<StringArray>();
+
+        // Test case
+        test_case_when_literal_lookup(
+            input_values,
+            lookup_map,
+            Some(ScalarValue::Utf8(Some(else_value.to_string()))),
+            Arc::new(expected_with_else),
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_int32_to_string() {
+        let lookup_map = create_lookup([
+            (Some(4), Some("four")),
+            (Some(2), Some("two")),
+            (Some(3), Some("three")),
+            (Some(1), Some("one")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Int32Array, StringArray, _, _>([
+                (1, Some("one")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (3, Some("three")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+                (5, None), // No match in WHEN
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_none_case_should_never_match() {
+        let lookup_map = create_lookup([
+            (Some(4), Some("four")),
+            (None, Some("none")),
+            (Some(2), Some("two")),
+            (Some(1), Some("one")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Int32Array, StringArray, _, _>([
+                (Some(1), Some("one")),
+                (Some(5), None), // No match in WHEN
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (Some(2), Some("two")),
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (Some(2), Some("two")),
+                (Some(5), None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_int32_to_string_with_duplicate_cases() {
+        let lookup_map = create_lookup([
+            (Some(4), Some("four")),
+            (Some(4), Some("no 4")),
+            (Some(2), Some("two")),
+            (Some(2), Some("no 2")),
+            (Some(3), Some("three")),
+            (Some(3), Some("no 3")),
+            (Some(2), Some("no 2")),
+            (Some(4), Some("no 4")),
+            (Some(2), Some("no 2")),
+            (Some(3), Some("no 3")),
+            (Some(4), Some("no 4")),
+            (Some(2), Some("no 2")),
+            (Some(3), Some("no 3")),
+            (Some(3), Some("no 3")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Int32Array, StringArray, _, _>([
+                (1, None), // No match in WHEN
+                (2, Some("two")),
+                (3, Some("three")),
+                (3, Some("three")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+                (5, None), // No match in WHEN
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_f32_to_string_with_special_values_and_duplicate_cases()
+     {
+        let lookup_map = create_lookup([
+            (Some(4.0), Some("four point zero")),
+            (Some(f32::NAN), Some("NaN")),
+            (Some(3.2), Some("three point two")),
+            // Duplicate case to make sure it is not used
+            (Some(f32::NAN), Some("should not use this NaN branch")),
+            (Some(f32::INFINITY), Some("Infinity")),
+            (Some(0.0), Some("zero")),
+            // Duplicate case to make sure it is not used
+            (
+                Some(f32::INFINITY),
+                Some("should not use this Infinity branch"),
+            ),
+            (Some(1.1), Some("one point one")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Float32Array, StringArray, _, _>([
+                (1.1, Some("one point one")),
+                (f32::NAN, Some("NaN")),
+                (3.2, Some("three point two")),
+                (3.2, Some("three point two")),
+                (0.0, Some("zero")),
+                (f32::INFINITY, Some("Infinity")),
+                (3.2, Some("three point two")),
+                (f32::NEG_INFINITY, None), // No match in WHEN
+                (f32::NEG_INFINITY, None), // No match in WHEN
+                (3.2, Some("three point two")),
+                (-0.0, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_f16_to_string_with_special_values() {
+        let lookup_map = create_lookup([
+            (
+                ScalarValue::Float16(Some(f16::from_f32(3.2))),
+                Some("3 dot 2"),
+            ),
+            (ScalarValue::Float16(Some(f16::NAN)), Some("NaN")),
+            (
+                ScalarValue::Float16(Some(f16::from_f32(17.4))),
+                Some("17 dot 4"),
+            ),
+            (ScalarValue::Float16(Some(f16::INFINITY)), Some("Infinity")),
+            (ScalarValue::Float16(Some(f16::ZERO)), Some("zero")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Float16Array, StringArray, _, _>([
+                (f16::from_f32(3.2), Some("3 dot 2")),
+                (f16::NAN, Some("NaN")),
+                (f16::from_f32(17.4), Some("17 dot 4")),
+                (f16::from_f32(17.4), Some("17 dot 4")),
+                (f16::INFINITY, Some("Infinity")),
+                (f16::from_f32(17.4), Some("17 dot 4")),
+                (f16::NEG_INFINITY, None), // No match in WHEN
+                (f16::NEG_INFINITY, None), // No match in WHEN
+                (f16::from_f32(17.4), Some("17 dot 4")),
+                (f16::NEG_ZERO, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_f32_to_string_with_special_values() {
+        let lookup_map = create_lookup([
+            (3.2, Some("3 dot 2")),
+            (f32::NAN, Some("NaN")),
+            (17.4, Some("17 dot 4")),
+            (f32::INFINITY, Some("Infinity")),
+            (f32::ZERO, Some("zero")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Float32Array, StringArray, _, _>([
+                (3.2, Some("3 dot 2")),
+                (f32::NAN, Some("NaN")),
+                (17.4, Some("17 dot 4")),
+                (17.4, Some("17 dot 4")),
+                (f32::INFINITY, Some("Infinity")),
+                (17.4, Some("17 dot 4")),
+                (f32::NEG_INFINITY, None), // No match in WHEN
+                (f32::NEG_INFINITY, None), // No match in WHEN
+                (17.4, Some("17 dot 4")),
+                (-0.0, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_case_when_literal_lookup_f64_to_string_with_special_values() {
+        let lookup_map = create_lookup([
+            (3.2, Some("3 dot 2")),
+            (f64::NAN, Some("NaN")),
+            (17.4, Some("17 dot 4")),
+            (f64::INFINITY, Some("Infinity")),
+            (f64::ZERO, Some("zero")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Float64Array, StringArray, _, _>([
+                (3.2, Some("3 dot 2")),
+                (f64::NAN, Some("NaN")),
+                (17.4, Some("17 dot 4")),
+                (17.4, Some("17 dot 4")),
+                (f64::INFINITY, Some("Infinity")),
+                (17.4, Some("17 dot 4")),
+                (f64::NEG_INFINITY, None), // No match in WHEN
+                (f64::NEG_INFINITY, None), // No match in WHEN
+                (17.4, Some("17 dot 4")),
+                (-0.0, None), // No match in WHEN
+            ]);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    // Test that we don't lose the decimal precision and scale info
+    #[test]
+    fn test_decimal_with_non_default_precision_and_scale() {
+        let lookup_map = create_lookup([
+            (ScalarValue::Decimal32(Some(4), 3, 2), Some("four")),
+            (ScalarValue::Decimal32(Some(2), 3, 2), Some("two")),
+            (ScalarValue::Decimal32(Some(3), 3, 2), Some("three")),
+            (ScalarValue::Decimal32(Some(1), 3, 2), Some("one")),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<Decimal32Array, StringArray, _, _>([
+                (1, Some("one")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (3, Some("three")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+                (5, None), // No match in WHEN
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+            ]);
+
+        let input_values = input_values
+            .with_precision_and_scale(3, 2)
+            .expect("must be able to set precision and scale");
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    // Test that we don't lose the timezone info
+    #[test]
+    fn test_timestamp_with_non_default_timezone() {
+        let timezone: Option<Arc<str>> = Some("-10:00".into());
+        let lookup_map = create_lookup([
+            (
+                ScalarValue::TimestampMillisecond(Some(4), timezone.clone()),
+                Some("four"),
+            ),
+            (
+                ScalarValue::TimestampMillisecond(Some(2), timezone.clone()),
+                Some("two"),
+            ),
+            (
+                ScalarValue::TimestampMillisecond(Some(3), timezone.clone()),
+                Some("three"),
+            ),
+            (
+                ScalarValue::TimestampMillisecond(Some(1), timezone.clone()),
+                Some("one"),
+            ),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<TimestampMillisecondArray, StringArray, _, _>([
+                (1, Some("one")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (3, Some("three")),
+                (2, Some("two")),
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+                (5, None), // No match in WHEN
+                (3, Some("three")),
+                (5, None), // No match in WHEN
+            ]);
+
+        let input_values = input_values.with_timezone_opt(timezone);
+
+        test_lookup_eval_with_and_without_else(
+            &lookup_map,
+            Arc::new(input_values),
+            expected,
+        );
+    }
+
+    #[test]
+    fn test_with_strings_to_int32() {
+        let lookup_map = create_lookup([
+            (Some("why"), Some(42)),
+            (Some("what"), Some(22)),
+            (Some("when"), Some(17)),
+        ]);
+
+        let (input_values, expected) =
+            create_input_and_expected::<StringArray, Int32Array, _, _>([
+                (Some("why"), Some(42)),
+                (Some("5"), None), // No match in WHEN
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (Some("what"), Some(22)),
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (None, None), // None cases are never match in CASE <expr> WHEN <value> syntax
+                (Some("what"), Some(22)),
+                (Some("5"), None), // No match in WHEN
+            ]);
+
+        let input_values = Arc::new(input_values) as ArrayRef;
+
+        // Testing without ELSE should fallback to None
+        test_case_when_literal_lookup(
+            Arc::clone(&input_values),
+            &lookup_map,
+            None,
+            Arc::new(expected.clone()),
+        );
+
+        // Testing with Else
+        let else_value = 101;
+
+        // Changing each expected None to be fallback
+        let expected_with_else = expected
+            .iter()
+            .map(|item| item.unwrap_or(else_value))
+            .map(Some)
+            .collect::<Int32Array>();
+
+        // Test case
+        test_case_when_literal_lookup(
+            input_values,
+            &lookup_map,
+            Some(ScalarValue::Int32(Some(else_value))),
+            Arc::new(expected_with_else),
+        );
     }
 }
diff --git a/datafusion/physical-expr/src/expressions/case/literal_lookup_table/boolean_lookup_table.rs b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/boolean_lookup_table.rs
new file mode 100644
index 0000000000000..15b3d04955b2e
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/boolean_lookup_table.rs
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::expressions::case::literal_lookup_table::WhenLiteralIndexMap;
+use arrow::array::{Array, ArrayRef, AsArray, BooleanArray};
+use arrow::datatypes::DataType;
+use datafusion_common::{ScalarValue, internal_err};
+
+#[derive(Clone, Debug)]
+pub(super) struct BooleanIndexMap {
+    true_index: Option<u32>,
+    false_index: Option<u32>,
+}
+
+impl BooleanIndexMap {
+    /// Try creating a new lookup table from the given literals and else index
+    /// The index of each literal in the vector is used as the mapped value in the lookup table.
+    ///
+    /// `literals` are guaranteed to be unique and non-nullable
+    pub(super) fn try_new(
+        unique_non_null_literals: Vec<ScalarValue>,
+    ) -> datafusion_common::Result<Self> {
+        let mut true_index: Option<u32> = None;
+        let mut false_index: Option<u32> = None;
+
+        for (index, literal) in unique_non_null_literals.into_iter().enumerate() {
+            match literal {
+                ScalarValue::Boolean(Some(true)) => {
+                    if true_index.is_some() {
+                        return internal_err!(
+                            "Duplicate true literal found in literals for BooleanIndexMap"
+                        );
+                    }
+                    true_index = Some(index as u32);
+                }
+                ScalarValue::Boolean(Some(false)) => {
+                    if false_index.is_some() {
+                        return internal_err!(
+                            "Duplicate false literal found in literals for BooleanIndexMap"
+                        );
+                    }
+                    false_index = Some(index as u32);
+                }
+                ScalarValue::Boolean(None) => {
+                    return internal_err!(
+                        "Null literal found in non-null literals for BooleanIndexMap"
+                    );
+                }
+                _ => {
+                    return internal_err!(
+                        "Non-boolean literal found in literals for BooleanIndexMap"
+                    );
+                }
+            }
+        }
+
+        Ok(Self {
+            true_index,
+            false_index,
+        })
+    }
+
+    fn map_boolean_array_to_when_indices(
+        &self,
+        array: &BooleanArray,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>> {
+        let true_index = self.true_index.unwrap_or(else_index);
+        let false_index = self.false_index.unwrap_or(else_index);
+
+        Ok(array
+            .into_iter()
+            .map(|value| match value {
+                Some(true) => true_index,
+                Some(false) => false_index,
+                None => else_index,
+            })
+            .collect::<Vec<u32>>())
+    }
+}
+
+impl WhenLiteralIndexMap for BooleanIndexMap {
+    fn map_to_when_indices(
+        &self,
+        array: &ArrayRef,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>> {
+        match array.data_type() {
+            DataType::Boolean => {
+                self.map_boolean_array_to_when_indices(array.as_boolean(), else_index)
+            }
+            // We support dictionary boolean array as we create the lookup table in `CaseWhen` expression
+            // creation when we don't know the schema, so we may receive dictionary encoded boolean arrays at execution time.
+            DataType::Dictionary(_, value_type)
+                if value_type.as_ref() == &DataType::Boolean =>
+            {
+                // Since it is not common to have dictionary encoded boolean arrays
+                // at all than it is ok to do the cast here to simplify the implementation.
+                let converted = arrow::compute::cast(array.as_ref(), &DataType::Boolean)?;
+                self.map_boolean_array_to_when_indices(converted.as_boolean(), else_index)
+            }
+            _ => internal_err!(
+                "Expected boolean array for BooleanIndexMap, got {:?}",
+                array.data_type()
+            ),
+        }
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/case/literal_lookup_table/bytes_like_lookup_table.rs b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/bytes_like_lookup_table.rs
new file mode 100644
index 0000000000000..e5cf3f84fd919
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/bytes_like_lookup_table.rs
@@ -0,0 +1,223 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::expressions::case::literal_lookup_table::WhenLiteralIndexMap;
+use arrow::array::{
+    Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, DictionaryArray,
+    FixedSizeBinaryArray, LargeBinaryArray, LargeStringArray, StringArray,
+    StringViewArray, downcast_integer,
+};
+use arrow::datatypes::{
+    ArrowDictionaryKeyType, BinaryViewType, DataType, StringViewType,
+};
+use datafusion_common::{HashMap, ScalarValue, internal_err, plan_datafusion_err};
+use std::fmt::Debug;
+
+/// Map from byte-like literal values to their first occurrence index
+///
+/// This is a wrapper for handling different kinds of literal maps
+#[derive(Clone, Debug)]
+pub(super) struct BytesLikeIndexMap {
+    /// Map from non-null literal value the first occurrence index in the literals
+    map: HashMap<Vec<u8>, u32>,
+}
+
+impl BytesLikeIndexMap {
+    /// Try creating a new lookup table from the given literals and else index
+    /// The index of each literal in the vector is used as the mapped value in the lookup table.
+    ///
+    /// `literals` are guaranteed to be unique and non-nullable
+    pub(super) fn try_new(
+        unique_non_null_literals: Vec<ScalarValue>,
+    ) -> datafusion_common::Result<Self> {
+        let input = ScalarValue::iter_to_array(unique_non_null_literals)?;
+
+        // Literals are guaranteed to not contain nulls
+        if input.logical_null_count() > 0 {
+            return internal_err!("Literal values for WHEN clauses cannot contain nulls");
+        }
+
+        let map: HashMap<Vec<u8>, u32> = try_get_bytes_iterator(&input)?
+            // Flattening Option<&[u8]> to &[u8] as literals cannot contain nulls
+            .flatten()
+            .enumerate()
+            .map(|(map_index, value)| (value.to_vec(), map_index as u32))
+            // Because literals are unique we can collect directly, and we can avoid only inserting the first occurrence
+            .collect();
+
+        Ok(Self { map })
+    }
+}
+
+impl WhenLiteralIndexMap for BytesLikeIndexMap {
+    fn map_to_when_indices(
+        &self,
+        array: &ArrayRef,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>> {
+        let indices = try_get_bytes_iterator(array)?
+            .map(|value| match value {
+                Some(value) => self.map.get(value).copied().unwrap_or(else_index),
+                None => else_index,
+            })
+            .collect::<Vec<u32>>();
+
+        Ok(indices)
+    }
+}
+
+fn try_get_bytes_iterator(
+    array: &ArrayRef,
+) -> datafusion_common::Result<Box<dyn Iterator<Item = Option<&[u8]>> + '_>> {
+    Ok(match array.data_type() {
+        DataType::Utf8 => Box::new(array.as_string::<i32>().into_iter().map(|item| {
+            item.map(|v| {
+                let bytes: &[u8] = v.as_ref();
+
+                bytes
+            })
+        })),
+
+        DataType::LargeUtf8 => {
+            Box::new(array.as_string::<i64>().into_iter().map(|item| {
+                item.map(|v| {
+                    let bytes: &[u8] = v.as_ref();
+
+                    bytes
+                })
+            }))
+        }
+
+        DataType::Binary => Box::new(array.as_binary::<i32>().into_iter()),
+
+        DataType::LargeBinary => Box::new(array.as_binary::<i64>().into_iter()),
+
+        DataType::FixedSizeBinary(_) => Box::new(array.as_binary::<i64>().into_iter()),
+
+        DataType::Utf8View => Box::new(
+            array
+                .as_byte_view::<StringViewType>()
+                .into_iter()
+                .map(|item| {
+                    item.map(|v| {
+                        let bytes: &[u8] = v.as_ref();
+
+                        bytes
+                    })
+                }),
+        ),
+        DataType::BinaryView => {
+            Box::new(array.as_byte_view::<BinaryViewType>().into_iter())
+        }
+
+        DataType::Dictionary(key, _) => {
+            macro_rules! downcast_dictionary_array_helper {
+                ($t:ty) => {{ get_bytes_iterator_for_dictionary(array.as_dictionary::<$t>())? }};
+            }
+
+            downcast_integer! {
+                key.as_ref() => (downcast_dictionary_array_helper),
+                k => unreachable!("unsupported dictionary key type: {}", k)
+            }
+        }
+        t => {
+            return Err(plan_datafusion_err!(
+                "Unsupported data type for bytes lookup table: {}",
+                t
+            ));
+        }
+    })
+}
+
+fn get_bytes_iterator_for_dictionary<K: ArrowDictionaryKeyType + Send + Sync>(
+    array: &DictionaryArray<K>,
+) -> datafusion_common::Result<Box<dyn Iterator<Item = Option<&[u8]>> + '_>> {
+    Ok(match array.values().data_type() {
+        DataType::Utf8 => Box::new(
+            array
+                .downcast_dict::<StringArray>()
+                .unwrap()
+                .into_iter()
+                .map(|item| {
+                    item.map(|v| {
+                        let bytes: &[u8] = v.as_ref();
+
+                        bytes
+                    })
+                }),
+        ),
+
+        DataType::LargeUtf8 => Box::new(
+            array
+                .downcast_dict::<LargeStringArray>()
+                .unwrap()
+                .into_iter()
+                .map(|item| {
+                    item.map(|v| {
+                        let bytes: &[u8] = v.as_ref();
+
+                        bytes
+                    })
+                }),
+        ),
+
+        DataType::Binary => {
+            Box::new(array.downcast_dict::<BinaryArray>().unwrap().into_iter())
+        }
+
+        DataType::LargeBinary => Box::new(
+            array
+                .downcast_dict::<LargeBinaryArray>()
+                .unwrap()
+                .into_iter(),
+        ),
+
+        DataType::FixedSizeBinary(_) => Box::new(
+            array
+                .downcast_dict::<FixedSizeBinaryArray>()
+                .unwrap()
+                .into_iter(),
+        ),
+
+        DataType::Utf8View => Box::new(
+            array
+                .downcast_dict::<StringViewArray>()
+                .unwrap()
+                .into_iter()
+                .map(|item| {
+                    item.map(|v| {
+                        let bytes: &[u8] = v.as_ref();
+
+                        bytes
+                    })
+                }),
+        ),
+        DataType::BinaryView => Box::new(
+            array
+                .downcast_dict::<BinaryViewArray>()
+                .unwrap()
+                .into_iter(),
+        ),
+
+        t => {
+            return Err(plan_datafusion_err!(
+                "Unsupported data type for lookup table dictionary value: {}",
+                t
+            ));
+        }
+    })
+}
diff --git a/datafusion/physical-expr/src/expressions/case/literal_lookup_table/mod.rs b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/mod.rs
new file mode 100644
index 0000000000000..67b045f9988f8
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/mod.rs
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod boolean_lookup_table;
+mod bytes_like_lookup_table;
+mod primitive_lookup_table;
+
+use crate::expressions::Literal;
+use crate::expressions::case::CaseBody;
+use crate::expressions::case::literal_lookup_table::boolean_lookup_table::BooleanIndexMap;
+use crate::expressions::case::literal_lookup_table::bytes_like_lookup_table::BytesLikeIndexMap;
+use crate::expressions::case::literal_lookup_table::primitive_lookup_table::PrimitiveIndexMap;
+use arrow::array::{Array, ArrayRef, UInt32Array, downcast_primitive};
+use arrow::datatypes::DataType;
+use datafusion_common::{ScalarValue, arrow_datafusion_err, plan_datafusion_err};
+use indexmap::IndexMap;
+use std::fmt::Debug;
+
+/// Optimization for CASE expressions with literal WHEN and THEN clauses
+///
+/// for this form:
+/// ```sql
+/// CASE <expr_a>
+///     WHEN <literal_a> THEN <literal_e>
+///     WHEN <literal_b> THEN <literal_f>
+///     WHEN <literal_c> THEN <literal_g>
+///     WHEN <literal_d> THEN <literal_h>
+///     ELSE <optional-fallback_literal>
+/// END
+/// ```
+///
+/// # Improvement idea
+/// TODO - we should think of unwrapping the `IN` expressions into multiple equality comparisons
+/// so it will use this optimization as well, e.g.
+/// ```sql
+/// -- Before
+/// CASE
+///     WHEN (<expr_a> = <literal_a>) THEN <literal_e>
+///     WHEN (<expr_a> in (<literal_b>, <literal_c>) THEN <literal_f>
+///     WHEN (<expr_a> = <literal_d>) THEN <literal_g>
+/// ELSE <optional-fallback_literal>
+///
+/// -- After
+/// CASE
+///     WHEN (<expr_a> = <literal_a>) THEN <literal_e>
+///     WHEN (<expr_a> = <literal_b>) THEN <literal_f>
+///     WHEN (<expr_a> = <literal_c>) THEN <literal_g>
+///     WHEN (<expr_a> = <literal_d>) THEN <literal_h>
+///     ELSE <optional-fallback_literal>
+/// END
+/// ```
+///
+#[derive(Debug)]
+pub(in super::super) struct LiteralLookupTable {
+    /// The lookup table to use for evaluating the CASE expression
+    lookup: Box<dyn WhenLiteralIndexMap>,
+
+    else_index: u32,
+
+    /// [`ArrayRef`] where `array[i] = then_literals[i]`
+    /// the last value in the array is the else_expr
+    ///
+    /// This will be used to take from based on the indices returned by the lookup table to build the final output
+    then_and_else_values: ArrayRef,
+}
+
+impl LiteralLookupTable {
+    pub(in super::super) fn maybe_new(body: &CaseBody) -> Option<Self> {
+        // We can't use the optimization if we don't have any when then pairs
+        if body.when_then_expr.is_empty() {
+            return None;
+        }
+
+        // If we only have 1 than this optimization is not useful
+        if body.when_then_expr.len() == 1 {
+            return None;
+        }
+
+        // Try to downcast all the WHEN/THEN expressions to literals
+        let when_then_exprs_maybe_literals = body
+            .when_then_expr
+            .iter()
+            .map(|(when, then)| {
+                let when_maybe_literal = when.as_any().downcast_ref::<Literal>();
+                let then_maybe_literal = then.as_any().downcast_ref::<Literal>();
+
+                when_maybe_literal.zip(then_maybe_literal)
+            })
+            .collect::<Vec<_>>();
+
+        // If not all the WHEN/THEN expressions are literals we cannot use this optimization
+        if when_then_exprs_maybe_literals.contains(&None) {
+            return None;
+        }
+
+        let when_then_exprs_scalars = when_then_exprs_maybe_literals
+            .into_iter()
+            // Unwrap the options as we have already checked there is no None
+            .flatten()
+            .map(|(when_lit, then_lit)| {
+                (when_lit.value().clone(), then_lit.value().clone())
+            })
+            // Only keep non-null WHEN literals
+            // as they cannot be matched - case NULL WHEN NULL THEN ... ELSE ... END always goes to ELSE
+            .filter(|(when_lit, _)| !when_lit.is_null())
+            .collect::<Vec<_>>();
+
+        if when_then_exprs_scalars.is_empty() {
+            // All WHEN literals were nulls, so cannot use optimization
+            //
+            // instead, another optimization would be to go straight to the ELSE clause
+            return None;
+        }
+
+        // Keep only the first occurrence of each when literal (as the first match is used)
+        // and remove nulls (as they cannot be matched - case NULL WHEN NULL THEN ... ELSE ... END always goes to ELSE)
+        let (when, then): (Vec<ScalarValue>, Vec<ScalarValue>) = {
+            let mut map = IndexMap::with_capacity(body.when_then_expr.len());
+
+            for (when, then) in when_then_exprs_scalars.into_iter() {
+                // Don't overwrite existing entries as we want to keep the first occurrence
+                if !map.contains_key(&when) {
+                    map.insert(when, then);
+                }
+            }
+
+            map.into_iter().unzip()
+        };
+
+        let else_value: ScalarValue = if let Some(else_expr) = &body.else_expr {
+            let literal = else_expr.as_any().downcast_ref::<Literal>()?;
+
+            literal.value().clone()
+        } else {
+            let Ok(null_scalar) = ScalarValue::try_new_null(&then[0].data_type()) else {
+                return None;
+            };
+
+            null_scalar
+        };
+
+        {
+            let when_data_type = when[0].data_type();
+
+            // If not all the WHEN literals are the same data type we cannot use this optimization
+            if when.iter().any(|l| l.data_type() != when_data_type) {
+                return None;
+            }
+        }
+
+        {
+            let data_type = then[0].data_type();
+
+            // If not all the then and the else literals are the same data type we cannot use this optimization
+            if then.iter().any(|l| l.data_type() != data_type) {
+                return None;
+            }
+
+            if else_value.data_type() != data_type {
+                return None;
+            }
+        }
+
+        let then_and_else_values = ScalarValue::iter_to_array(
+            then.iter()
+                // The else is in the end
+                .chain(std::iter::once(&else_value))
+                .cloned(),
+        )
+        .ok()?;
+        // The else expression is in the end
+        let else_index = then_and_else_values.len() as u32 - 1;
+
+        let lookup = try_creating_lookup_table(when).ok()?;
+
+        Some(Self {
+            lookup,
+            then_and_else_values,
+            else_index,
+        })
+    }
+
+    pub(in super::super) fn map_keys_to_values(
+        &self,
+        keys_array: &ArrayRef,
+    ) -> datafusion_common::Result<ArrayRef> {
+        let take_indices = self
+            .lookup
+            .map_to_when_indices(keys_array, self.else_index)?;
+
+        // Zero-copy conversion
+        let take_indices = UInt32Array::from(take_indices);
+
+        // An optimize version would depend on the type of the values_to_take_from
+        // For example, if the type is view we can just keep pointing to the same value (similar to dictionary)
+        // if the type is dictionary we can just use the indices as is (or cast them to the key type) and create a new dictionary array
+        let output =
+            arrow::compute::take(&self.then_and_else_values, &take_indices, None)
+                .map_err(|e| arrow_datafusion_err!(e))?;
+
+        Ok(output)
+    }
+}
+
+/// Map values that match the WHEN literal to the index of their corresponding WHEN clause
+///
+/// For example, for this CASE expression:
+///
+/// ```sql
+/// CASE <expr_a>
+///     WHEN <literal_a> THEN <result_e>
+///     WHEN <literal_b> THEN <result_f>
+///     WHEN <literal_c> THEN <result_g>
+///     WHEN <literal_d> THEN <result_h>
+///     ELSE <fallback_result>
+/// END
+/// ```
+///
+/// this will map <literal_a> to 0, <literal_b> to 1, <literal_c> to 2, <literal_d> to 3
+pub(super) trait WhenLiteralIndexMap: Debug + Send + Sync {
+    /// Given an array of values, returns a vector of WHEN clause indices corresponding to each value in the provided array.
+    ///
+    /// For example, for this CASE expression:
+    ///
+    /// ```sql
+    /// CASE <expr_a>
+    ///     WHEN <literal_a> THEN <result_e>
+    ///     WHEN <literal_b> THEN <result_f>
+    ///     WHEN <literal_c> THEN <result_g>
+    ///     WHEN <literal_d> THEN <result_h>
+    ///     ELSE <fallback_result>
+    /// END
+    /// ```
+    ///
+    /// the array will be the evaluated values of `<expr_a>`
+    /// and if that array is:
+    /// - `[<literal_a>, <literal_c>, <literal_x>, <literal_b>, <literal_a>]`
+    ///
+    /// the returned vector will be:
+    /// - `[0, 2, else_index, 1, 0]`
+    ///
+    fn map_to_when_indices(
+        &self,
+        array: &ArrayRef,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>>;
+}
+
+fn try_creating_lookup_table(
+    unique_non_null_literals: Vec<ScalarValue>,
+) -> datafusion_common::Result<Box<dyn WhenLiteralIndexMap>> {
+    assert_ne!(
+        unique_non_null_literals.len(),
+        0,
+        "Must have at least one literal"
+    );
+    match unique_non_null_literals[0].data_type() {
+        DataType::Boolean => {
+            let lookup_table = BooleanIndexMap::try_new(unique_non_null_literals)?;
+            Ok(Box::new(lookup_table))
+        }
+
+        data_type if data_type.is_primitive() => {
+            macro_rules! create_matching_map {
+                ($t:ty) => {{
+                    let lookup_table =
+                        PrimitiveIndexMap::<$t>::try_new(unique_non_null_literals)?;
+                    Ok(Box::new(lookup_table))
+                }};
+            }
+
+            downcast_primitive! {
+                data_type => (create_matching_map),
+                _ => Err(plan_datafusion_err!(
+                    "Unsupported field type for primitive: {:?}",
+                    data_type
+                )),
+            }
+        }
+
+        DataType::Utf8
+        | DataType::LargeUtf8
+        | DataType::Binary
+        | DataType::LargeBinary
+        | DataType::FixedSizeBinary(_)
+        | DataType::Utf8View
+        | DataType::BinaryView => {
+            let lookup_table = BytesLikeIndexMap::try_new(unique_non_null_literals)?;
+            Ok(Box::new(lookup_table))
+        }
+
+        DataType::Dictionary(_key, value)
+            if matches!(
+                value.as_ref(),
+                DataType::Utf8
+                    | DataType::LargeUtf8
+                    | DataType::Binary
+                    | DataType::LargeBinary
+                    | DataType::FixedSizeBinary(_)
+                    | DataType::Utf8View
+                    | DataType::BinaryView
+            ) =>
+        {
+            let lookup_table = BytesLikeIndexMap::try_new(unique_non_null_literals)?;
+            Ok(Box::new(lookup_table))
+        }
+
+        _ => Err(plan_datafusion_err!(
+            "Unsupported data type for lookup table: {}",
+            unique_non_null_literals[0].data_type()
+        )),
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/case/literal_lookup_table/primitive_lookup_table.rs b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/primitive_lookup_table.rs
new file mode 100644
index 0000000000000..36d282c2a402b
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/case/literal_lookup_table/primitive_lookup_table.rs
@@ -0,0 +1,229 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::expressions::case::literal_lookup_table::WhenLiteralIndexMap;
+use arrow::array::{
+    Array, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, AsArray, PrimitiveArray,
+};
+use arrow::datatypes::{DataType, IntervalDayTime, IntervalMonthDayNano, i256};
+use datafusion_common::{HashMap, ScalarValue, internal_err};
+use half::f16;
+use std::fmt::Debug;
+use std::hash::Hash;
+
+#[derive(Clone)]
+pub(super) struct PrimitiveIndexMap<T>
+where
+    T: ArrowPrimitiveType,
+    T::Native: ToHashableKey,
+{
+    data_type: DataType,
+    /// Literal value to map index
+    ///
+    /// If searching this map becomes a bottleneck consider using linear map implementations for small hashmaps
+    map: HashMap<<T::Native as ToHashableKey>::HashableKey, u32>,
+}
+
+impl<T> Debug for PrimitiveIndexMap<T>
+where
+    T: ArrowPrimitiveType,
+    T::Native: ToHashableKey,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PrimitiveIndexMap")
+            .field("map", &self.map)
+            .finish()
+    }
+}
+
+impl<T> PrimitiveIndexMap<T>
+where
+    T: ArrowPrimitiveType,
+    T::Native: ToHashableKey,
+{
+    /// Try creating a new lookup table from the given literals and else index.
+    /// The index of each literal in the vector is used as the mapped value in the lookup table.
+    ///
+    /// `literals` are guaranteed to be unique and non-nullable
+    pub(super) fn try_new(
+        unique_non_null_literals: Vec<ScalarValue>,
+    ) -> datafusion_common::Result<Self> {
+        let input = ScalarValue::iter_to_array(unique_non_null_literals)?;
+
+        // Literals are guaranteed to not contain nulls
+        if input.null_count() > 0 {
+            return internal_err!("Literal values for WHEN clauses cannot contain nulls");
+        }
+
+        let map = input
+            .as_primitive::<T>()
+            .values()
+            .iter()
+            .enumerate()
+            // Because literals are unique we can collect directly, and we can avoid only inserting the first occurrence
+            .map(|(map_index, value)| (value.into_hashable_key(), map_index as u32))
+            .collect();
+
+        Ok(Self {
+            map,
+            data_type: input.data_type().clone(),
+        })
+    }
+
+    fn map_primitive_array_to_when_indices(
+        &self,
+        array: &PrimitiveArray<T>,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>> {
+        let indices = array
+            .into_iter()
+            .map(|value| match value {
+                Some(value) => self
+                    .map
+                    .get(&value.into_hashable_key())
+                    .copied()
+                    .unwrap_or(else_index),
+
+                None => else_index,
+            })
+            .collect::<Vec<u32>>();
+
+        Ok(indices)
+    }
+}
+
+impl<T> WhenLiteralIndexMap for PrimitiveIndexMap<T>
+where
+    T: ArrowPrimitiveType,
+    T::Native: ToHashableKey,
+{
+    fn map_to_when_indices(
+        &self,
+        array: &ArrayRef,
+        else_index: u32,
+    ) -> datafusion_common::Result<Vec<u32>> {
+        match array.data_type() {
+            dt if dt == &self.data_type => {
+                let primitive_array = array.as_primitive::<T>();
+
+                self.map_primitive_array_to_when_indices(primitive_array, else_index)
+            }
+            // We support dictionary primitive array as we create the lookup table in `CaseWhen` expression
+            // creation when we don't know the schema, so we may receive dictionary encoded primitive arrays at execution time.
+            DataType::Dictionary(_, value_type)
+                if value_type.as_ref() == &self.data_type =>
+            {
+                // Cast here to simplify the implementation.
+                let converted = arrow::compute::cast(array.as_ref(), &self.data_type)?;
+                self.map_primitive_array_to_when_indices(
+                    converted.as_primitive::<T>(),
+                    else_index,
+                )
+            }
+            _ => internal_err!(
+                "PrimitiveIndexMap expected array of type {:?} but got {:?}",
+                self.data_type,
+                array.data_type()
+            ),
+        }
+    }
+}
+
+// TODO - We need to port it to arrow so that it can be reused in other places
+
+/// Trait that help convert a value to a key that is hashable and equatable
+/// This is needed as some types like f16/f32/f64 do not implement Hash/Eq directly
+pub(super) trait ToHashableKey: ArrowNativeTypeOp {
+    /// The type that is hashable and equatable
+    /// It must be an Arrow native type but it NOT GUARANTEED to be the same as Self
+    /// this is just a helper trait so you can reuse the same code for all arrow native types
+    type HashableKey: Hash + Eq + Debug + Clone + Copy + Send + Sync;
+
+    /// Converts self to a hashable key
+    /// the result of this value can be used as the key in hash maps/sets
+    fn into_hashable_key(self) -> Self::HashableKey;
+}
+
+macro_rules! impl_to_hashable_key {
+    (@single_already_hashable | $t:ty) => {
+        impl ToHashableKey for $t {
+            type HashableKey = $t;
+
+            #[inline]
+            fn into_hashable_key(self) -> Self::HashableKey {
+                self
+            }
+        }
+    };
+    (@already_hashable | $($t:ty),+ $(,)?) => {
+        $(
+            impl_to_hashable_key!(@single_already_hashable | $t);
+        )+
+    };
+    (@float | $t:ty => $hashable:ty) => {
+        impl ToHashableKey for $t {
+            type HashableKey = $hashable;
+
+            #[inline]
+            fn into_hashable_key(self) -> Self::HashableKey {
+                self.to_bits()
+            }
+        }
+    };
+}
+
+impl_to_hashable_key!(@already_hashable | i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, IntervalDayTime, IntervalMonthDayNano);
+impl_to_hashable_key!(@float | f16 => u16);
+impl_to_hashable_key!(@float | f32 => u32);
+impl_to_hashable_key!(@float | f64 => u64);
+
+#[cfg(test)]
+mod tests {
+    use super::ToHashableKey;
+    use arrow::array::downcast_primitive;
+
+    // This test ensure that all arrow primitive types implement ToHashableKey
+    // otherwise the code will not compile
+    #[test]
+    fn should_implement_to_hashable_key_for_all_primitives() {
+        #[derive(Debug, Default)]
+        struct ExampleSet<T>
+        where
+            T: arrow::datatypes::ArrowPrimitiveType,
+            T::Native: ToHashableKey,
+        {
+            _map: std::collections::HashSet<<T::Native as ToHashableKey>::HashableKey>,
+        }
+
+        macro_rules! create_matching_set {
+            ($t:ty) => {{
+                let _lookup_table = ExampleSet::<$t> {
+                    _map: Default::default(),
+                };
+
+                return;
+            }};
+        }
+
+        let data_type = arrow::datatypes::DataType::Float16;
+
+        downcast_primitive! {
+            data_type => (create_matching_set),
+            _ => panic!("not implemented for {data_type}"),
+        }
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index 0419161b532ce..bd5c63a69979f 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -22,11 +22,11 @@ use std::sync::Arc;
 
 use crate::physical_expr::PhysicalExpr;
 
-use arrow::compute::{can_cast_types, CastOptions};
+use arrow::compute::{CastOptions, can_cast_types};
 use arrow::datatypes::{DataType, DataType::*, FieldRef, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
 use datafusion_expr_common::sort_properties::ExprProperties;
@@ -98,13 +98,14 @@ impl CastExpr {
         &self.cast_options
     }
 
-    /// Check if the cast is a widening cast (e.g. from `Int8` to `Int16`).
-    pub fn is_bigger_cast(&self, src: &DataType) -> bool {
-        if self.cast_type.eq(src) {
+    /// Check if casting from the specified source type to the target type is a
+    /// widening cast (e.g. from `Int8` to `Int16`).
+    pub fn check_bigger_cast(cast_type: &DataType, src: &DataType) -> bool {
+        if cast_type.eq(src) {
             return true;
         }
         matches!(
-            (src, &self.cast_type),
+            (src, cast_type),
             (Int8, Int16 | Int32 | Int64)
                 | (Int16, Int32 | Int64)
                 | (Int32, Int64)
@@ -119,6 +120,11 @@ impl CastExpr {
                 | (Utf8, LargeUtf8)
         )
     }
+
+    /// Check if the cast is a widening cast (e.g. from `Int8` to `Int16`).
+    pub fn is_bigger_cast(&self, src: &DataType) -> bool {
+        Self::check_bigger_cast(&self.cast_type, src)
+    }
 }
 
 impl fmt::Display for CastExpr {
@@ -185,7 +191,7 @@ impl PhysicalExpr for CastExpr {
         // Get child's datatype:
         let cast_type = child_interval.data_type();
         Ok(Some(vec![
-            interval.cast_to(&cast_type, &DEFAULT_SAFE_CAST_OPTIONS)?
+            interval.cast_to(&cast_type, &DEFAULT_SAFE_CAST_OPTIONS)?,
         ]))
     }
 
@@ -256,8 +262,8 @@ mod tests {
 
     use arrow::{
         array::{
-            Array, Decimal128Array, Float32Array, Float64Array, Int16Array, Int32Array,
-            Int64Array, Int8Array, StringArray, Time64NanosecondArray,
+            Array, Decimal128Array, Float32Array, Float64Array, Int8Array, Int16Array,
+            Int32Array, Int64Array, StringArray, Time64NanosecondArray,
             TimestampNanosecondArray, UInt32Array,
         },
         datatypes::*,
@@ -740,6 +746,9 @@ mod tests {
         Ok(())
     }
 
+    // Tests for timestamp timezone casting have been moved to timestamps.slt
+    // See the "Casting between timestamp with and without timezone" section
+
     #[test]
     fn invalid_cast() {
         // Ensure a useful error happens at plan time if invalid casts are used
@@ -765,9 +774,10 @@ mod tests {
         match result {
             Ok(_) => panic!("expected error"),
             Err(e) => {
-                assert!(e
-                    .to_string()
-                    .contains("Cannot cast string '9.1' to value of Int32 type"))
+                assert!(
+                    e.to_string()
+                        .contains("Cannot cast string '9.1' to value of Int32 type")
+                )
             }
         }
         Ok(())
diff --git a/datafusion/physical-expr/src/expressions/cast_column.rs b/datafusion/physical-expr/src/expressions/cast_column.rs
index 80d71c3def408..3dc0293da83d4 100644
--- a/datafusion/physical-expr/src/expressions/cast_column.rs
+++ b/datafusion/physical-expr/src/expressions/cast_column.rs
@@ -24,7 +24,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use datafusion_common::{
-    format::DEFAULT_CAST_OPTIONS, nested_struct::cast_column, Result, ScalarValue,
+    Result, ScalarValue, format::DEFAULT_CAST_OPTIONS, nested_struct::cast_column,
 };
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use std::{
@@ -192,8 +192,8 @@ mod tests {
         datatypes::{DataType, Field, Fields, SchemaRef},
     };
     use datafusion_common::{
-        cast::{as_int64_array, as_string_array, as_struct_array, as_uint8_array},
         Result as DFResult, ScalarValue,
+        cast::{as_int64_array, as_string_array, as_struct_array, as_uint8_array},
     };
 
     fn make_schema(field: &Field) -> SchemaRef {
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 9ca464b304306..8c7e8c319fff4 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -28,7 +28,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_expr::ColumnarValue;
 
 /// Represents the column at a given index in a RecordBatch
@@ -158,7 +158,11 @@ impl Column {
                 self.name,
                 self.index,
                 input_schema.fields.len(),
-                input_schema.fields().iter().map(|f| f.name()).collect::<Vec<_>>()
+                input_schema
+                    .fields()
+                    .iter()
+                    .map(|f| f.name())
+                    .collect::<Vec<_>>()
             )
         }
     }
diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
index 964a193db833a..615d9cbbf61ac 100644
--- a/datafusion/physical-expr/src/expressions/dynamic_filters.rs
+++ b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -17,16 +17,35 @@
 
 use parking_lot::RwLock;
 use std::{any::Any, fmt::Display, hash::Hash, sync::Arc};
+use tokio::sync::watch;
 
 use crate::PhysicalExpr;
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::{
-    tree_node::{Transformed, TransformedResult, TreeNode},
     Result,
+    tree_node::{Transformed, TransformedResult, TreeNode},
 };
 use datafusion_expr::ColumnarValue;
 use datafusion_physical_expr_common::physical_expr::{DynEq, DynHash};
 
+/// State of a dynamic filter, tracking both updates and completion.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum FilterState {
+    /// Filter is in progress and may receive more updates.
+    InProgress { generation: u64 },
+    /// Filter is complete and will not receive further updates.
+    Complete { generation: u64 },
+}
+
+impl FilterState {
+    fn generation(&self) -> u64 {
+        match self {
+            FilterState::InProgress { generation }
+            | FilterState::Complete { generation } => *generation,
+        }
+    }
+}
+
 /// A dynamic [`PhysicalExpr`] that can be updated by anyone with a reference to it.
 ///
 /// Any `ExecutionPlan` that uses this expression and holds a reference to it internally should probably also
@@ -44,6 +63,8 @@ pub struct DynamicFilterPhysicalExpr {
     remapped_children: Option<Vec<Arc<dyn PhysicalExpr>>>,
     /// The source of dynamic filters.
     inner: Arc<RwLock<Inner>>,
+    /// Broadcasts filter state (updates and completion) to all waiters.
+    state_watch: watch::Sender<FilterState>,
     /// For testing purposes track the data type and nullability to make sure they don't change.
     /// If they do, there's a bug in the implementation.
     /// But this can have overhead in production, so it's only included in our tests.
@@ -57,6 +78,10 @@ struct Inner {
     /// This is used for [`PhysicalExpr::snapshot_generation`] to have a cheap check for changes.
     generation: u64,
     expr: Arc<dyn PhysicalExpr>,
+    /// Flag for quick synchronous check if filter is complete.
+    /// This is redundant with the watch channel state, but allows us to return immediately
+    /// from `wait_complete()` without subscribing if already complete.
+    is_complete: bool,
 }
 
 impl Inner {
@@ -66,6 +91,7 @@ impl Inner {
             // This is not currently used anywhere but it seems useful to have this simple distinction.
             generation: 1,
             expr,
+            is_complete: false,
         }
     }
 
@@ -134,10 +160,12 @@ impl DynamicFilterPhysicalExpr {
         children: Vec<Arc<dyn PhysicalExpr>>,
         inner: Arc<dyn PhysicalExpr>,
     ) -> Self {
+        let (state_watch, _) = watch::channel(FilterState::InProgress { generation: 1 });
         Self {
             children,
             remapped_children: None, // Initially no remapped children
             inner: Arc::new(RwLock::new(Inner::new(inner))),
+            state_watch,
             data_type: Arc::new(RwLock::new(None)),
             nullable: Arc::new(RwLock::new(None)),
         }
@@ -185,7 +213,7 @@ impl DynamicFilterPhysicalExpr {
         Self::remap_children(&self.children, self.remapped_children.as_ref(), expr)
     }
 
-    /// Update the current expression.
+    /// Update the current expression and notify all waiters.
     /// Any children of this expression must be a subset of the original children
     /// passed to the constructor.
     /// This should be called e.g.:
@@ -204,13 +232,68 @@ impl DynamicFilterPhysicalExpr {
 
         // Load the current inner, increment generation, and store the new one
         let mut current = self.inner.write();
+        let new_generation = current.generation + 1;
         *current = Inner {
-            generation: current.generation + 1,
+            generation: new_generation,
             expr: new_expr,
+            is_complete: current.is_complete,
         };
+        drop(current); // Release the lock before broadcasting
+
+        // Broadcast the new state to all waiters
+        let _ = self.state_watch.send(FilterState::InProgress {
+            generation: new_generation,
+        });
         Ok(())
     }
 
+    /// Mark this dynamic filter as complete and broadcast to all waiters.
+    ///
+    /// This signals that all expected updates have been received.
+    /// Waiters using [`Self::wait_complete`] will be notified.
+    pub fn mark_complete(&self) {
+        let mut current = self.inner.write();
+        let current_generation = current.generation;
+        current.is_complete = true;
+        drop(current);
+
+        // Broadcast completion to all waiters
+        let _ = self.state_watch.send(FilterState::Complete {
+            generation: current_generation,
+        });
+    }
+
+    /// Wait asynchronously for any update to this filter.
+    ///
+    /// This method will return when [`Self::update`] is called and the generation increases.
+    /// It does not guarantee that the filter is complete.
+    pub async fn wait_update(&self) {
+        let mut rx = self.state_watch.subscribe();
+        // Get the current generation
+        let current_gen = rx.borrow_and_update().generation();
+
+        // Wait until generation increases
+        let _ = rx.wait_for(|state| state.generation() > current_gen).await;
+    }
+
+    /// Wait asynchronously until this dynamic filter is marked as complete.
+    ///
+    /// This method returns immediately if the filter is already complete.
+    /// Otherwise, it waits until [`Self::mark_complete`] is called.
+    ///
+    /// Unlike [`Self::wait_update`], this method guarantees that when it returns,
+    /// the filter is fully complete with no more updates expected.
+    pub async fn wait_complete(&self) {
+        if self.inner.read().is_complete {
+            return;
+        }
+
+        let mut rx = self.state_watch.subscribe();
+        let _ = rx
+            .wait_for(|state| matches!(state, FilterState::Complete { .. }))
+            .await;
+    }
+
     fn render(
         &self,
         f: &mut std::fmt::Formatter<'_>,
@@ -253,6 +336,7 @@ impl PhysicalExpr for DynamicFilterPhysicalExpr {
             children: self.children.clone(),
             remapped_children: Some(children),
             inner: Arc::clone(&self.inner),
+            state_watch: self.state_watch.clone(),
             data_type: Arc::clone(&self.data_type),
             nullable: Arc::clone(&self.nullable),
         }))
@@ -336,7 +420,7 @@ impl PhysicalExpr for DynamicFilterPhysicalExpr {
 #[cfg(test)]
 mod test {
     use crate::{
-        expressions::{col, lit, BinaryExpr},
+        expressions::{BinaryExpr, col, lit},
         utils::reassign_expr_columns,
     };
     use arrow::{
@@ -509,4 +593,102 @@ mod test {
             "Expected err when evaluate is called after changing the expression."
         );
     }
+
+    #[tokio::test]
+    async fn test_wait_complete_already_complete() {
+        let dynamic_filter = Arc::new(DynamicFilterPhysicalExpr::new(
+            vec![],
+            lit(42) as Arc<dyn PhysicalExpr>,
+        ));
+
+        // Mark as complete immediately
+        dynamic_filter.mark_complete();
+
+        // wait_complete should return immediately
+        dynamic_filter.wait_complete().await;
+    }
+
+    #[test]
+    fn test_with_new_children_independence() {
+        // Create a schema with columns a, b, c, d
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+            Field::new("d", DataType::Int32, false),
+        ]));
+
+        // Create expression col(a) + col(b)
+        let col_a = col("a", &schema).unwrap();
+        let col_b = col("b", &schema).unwrap();
+        let col_c = col("c", &schema).unwrap();
+        let col_d = col("d", &schema).unwrap();
+
+        let expr = Arc::new(BinaryExpr::new(
+            Arc::clone(&col_a),
+            datafusion_expr::Operator::Plus,
+            Arc::clone(&col_b),
+        ));
+
+        // Create DynamicFilterPhysicalExpr with children [col_a, col_b]
+        let dynamic_filter = Arc::new(DynamicFilterPhysicalExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            expr as Arc<dyn PhysicalExpr>,
+        ));
+
+        // Clone the Arc (two references to the same DynamicFilterPhysicalExpr)
+        let clone_1 = Arc::clone(&dynamic_filter);
+        let clone_2 = Arc::clone(&dynamic_filter);
+
+        // Call with_new_children with different children on each clone
+        // clone_1: replace [a, b] with [b, c] -> expression becomes b + c
+        let remapped_1 = clone_1
+            .with_new_children(vec![Arc::clone(&col_b), Arc::clone(&col_c)])
+            .unwrap();
+
+        // clone_2: replace [a, b] with [b, d] -> expression becomes b + d
+        let remapped_2 = clone_2
+            .with_new_children(vec![Arc::clone(&col_b), Arc::clone(&col_d)])
+            .unwrap();
+
+        // Create a RecordBatch with columns a=1,2,3  b=10,20,30  c=100,200,300  d=1000,2000,3000
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])), // a
+                Arc::new(arrow::array::Int32Array::from(vec![10, 20, 30])), // b
+                Arc::new(arrow::array::Int32Array::from(vec![100, 200, 300])), // c
+                Arc::new(arrow::array::Int32Array::from(vec![1000, 2000, 3000])), // d
+            ],
+        )
+        .unwrap();
+
+        // Evaluate both remapped expressions
+        let result_1 = remapped_1.evaluate(&batch).unwrap();
+        let result_2 = remapped_2.evaluate(&batch).unwrap();
+
+        // Extract arrays from results
+        let ColumnarValue::Array(arr_1) = result_1 else {
+            panic!("Expected ColumnarValue::Array for result_1");
+        };
+        let ColumnarValue::Array(arr_2) = result_2 else {
+            panic!("Expected ColumnarValue::Array for result_2");
+        };
+
+        // Verify result_1 = b + c = [110, 220, 330]
+        let expected_1: Arc<dyn arrow::array::Array> =
+            Arc::new(arrow::array::Int32Array::from(vec![110, 220, 330]));
+        assert!(
+            arr_1.eq(&expected_1),
+            "Expected b + c = [110, 220, 330], got {arr_1:?}",
+        );
+
+        // Verify result_2 = b + d = [1010, 2020, 3030]
+        let expected_2: Arc<dyn arrow::array::Array> =
+            Arc::new(arrow::array::Int32Array::from(vec![1010, 2020, 3030]));
+        assert!(
+            arr_2.eq(&expected_2),
+            "Expected b + d = [1010, 2020, 3030], got {arr_2:?}",
+        );
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index fa91635d9bfd9..5c2f1adcd0cf3 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -22,37 +22,40 @@ use std::fmt::Debug;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use crate::physical_expr::physical_exprs_bag_equal;
 use crate::PhysicalExpr;
+use crate::physical_expr::physical_exprs_bag_equal;
 
-use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
 use arrow::array::*;
-use arrow::buffer::BooleanBuffer;
+use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::kernels::boolean::{not, or_kleene};
-use arrow::compute::take;
+use arrow::compute::{SortOptions, take};
 use arrow::datatypes::*;
 use arrow::util::bit_iterator::BitIndexIterator;
-use arrow::{downcast_dictionary_array, downcast_primitive_array};
-use datafusion_common::cast::{
-    as_boolean_array, as_generic_binary_array, as_string_array,
-};
-use datafusion_common::hash_utils::HashValue;
+use datafusion_common::hash_utils::with_hashes;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, DFSchema, Result, ScalarValue,
+    DFSchema, HashSet, Result, ScalarValue, assert_or_internal_err, exec_datafusion_err,
+    exec_err,
 };
-use datafusion_expr::ColumnarValue;
-use datafusion_physical_expr_common::datum::compare_with_eq;
+use datafusion_expr::{ColumnarValue, expr_vec_fmt};
 
 use ahash::RandomState;
 use datafusion_common::HashMap;
 use hashbrown::hash_map::RawEntryMut;
 
+/// Trait for InList static filters
+trait StaticFilter {
+    fn null_count(&self) -> usize;
+
+    /// Checks if values in `v` are contained in the filter
+    fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray>;
+}
+
 /// InList
 pub struct InListExpr {
     expr: Arc<dyn PhysicalExpr>,
     list: Vec<Arc<dyn PhysicalExpr>>,
     negated: bool,
-    static_filter: Option<Arc<dyn Set>>,
+    static_filter: Option<Arc<dyn StaticFilter + Send + Sync>>,
 }
 
 impl Debug for InListExpr {
@@ -65,13 +68,10 @@ impl Debug for InListExpr {
     }
 }
 
-/// A type-erased container of array elements
-pub trait Set: Send + Sync {
-    fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray>;
-    fn has_nulls(&self) -> bool;
-}
-
-struct ArrayHashSet {
+/// Static filter for InList that stores the array and hash set for O(1) lookups
+#[derive(Debug, Clone)]
+struct ArrayStaticFilter {
+    in_array: ArrayRef,
     state: RandomState,
     /// Used to provide a lookup from value to in list index
     ///
@@ -80,30 +80,24 @@ struct ArrayHashSet {
     map: HashMap<usize, (), ()>,
 }
 
-struct ArraySet<T> {
-    array: T,
-    hash_set: ArrayHashSet,
-}
-
-impl<T> ArraySet<T>
-where
-    T: Array + From<ArrayData>,
-{
-    fn new(array: &T, hash_set: ArrayHashSet) -> Self {
-        Self {
-            array: downcast_array(array),
-            hash_set,
-        }
+impl StaticFilter for ArrayStaticFilter {
+    fn null_count(&self) -> usize {
+        self.in_array.null_count()
     }
-}
 
-impl<T> Set for ArraySet<T>
-where
-    T: Array + 'static,
-    for<'a> &'a T: ArrayAccessor,
-    for<'a> <&'a T as ArrayAccessor>::Item: IsEqual,
-{
+    /// Checks if values in `v` are contained in the `in_array` using this hash set for lookup.
     fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
+        // Null type comparisons always return null (SQL three-valued logic)
+        if v.data_type() == &DataType::Null
+            || self.in_array.data_type() == &DataType::Null
+        {
+            let nulls = NullBuffer::new_null(v.len());
+            return Ok(BooleanArray::new(
+                BooleanBuffer::new_unset(v.len()),
+                Some(nulls),
+            ));
+        }
+
         downcast_dictionary_array! {
             v => {
                 let values_contains = self.contains(v.values().as_ref(), negated)?;
@@ -113,102 +107,431 @@ where
             _ => {}
         }
 
-        let v = v.as_any().downcast_ref::<T>().unwrap();
-        let in_array = &self.array;
-        let has_nulls = in_array.null_count() != 0;
+        let needle_nulls = v.logical_nulls();
+        let needle_nulls = needle_nulls.as_ref();
+        let haystack_has_nulls = self.in_array.null_count() != 0;
+
+        with_hashes([v], &self.state, |hashes| {
+            let cmp = make_comparator(v, &self.in_array, SortOptions::default())?;
+            Ok((0..v.len())
+                .map(|i| {
+                    // SQL three-valued logic: null IN (...) is always null
+                    if needle_nulls.is_some_and(|nulls| nulls.is_null(i)) {
+                        return None;
+                    }
 
-        Ok(ArrayIter::new(v)
-            .map(|v| {
-                v.and_then(|v| {
-                    let hash = v.hash_one(&self.hash_set.state);
+                    let hash = hashes[i];
                     let contains = self
-                        .hash_set
                         .map
                         .raw_entry()
-                        .from_hash(hash, |idx| in_array.value(*idx).is_equal(&v))
+                        .from_hash(hash, |idx| cmp(i, *idx).is_eq())
                         .is_some();
 
                     match contains {
                         true => Some(!negated),
-                        false if has_nulls => None,
+                        false if haystack_has_nulls => None,
                         false => Some(negated),
                     }
                 })
-            })
-            .collect())
+                .collect())
+        })
     }
+}
 
-    fn has_nulls(&self) -> bool {
-        self.array.null_count() != 0
+fn instantiate_static_filter(
+    in_array: ArrayRef,
+) -> Result<Arc<dyn StaticFilter + Send + Sync>> {
+    match in_array.data_type() {
+        // Integer primitive types
+        DataType::Int8 => Ok(Arc::new(Int8StaticFilter::try_new(&in_array)?)),
+        DataType::Int16 => Ok(Arc::new(Int16StaticFilter::try_new(&in_array)?)),
+        DataType::Int32 => Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)),
+        DataType::Int64 => Ok(Arc::new(Int64StaticFilter::try_new(&in_array)?)),
+        DataType::UInt8 => Ok(Arc::new(UInt8StaticFilter::try_new(&in_array)?)),
+        DataType::UInt16 => Ok(Arc::new(UInt16StaticFilter::try_new(&in_array)?)),
+        DataType::UInt32 => Ok(Arc::new(UInt32StaticFilter::try_new(&in_array)?)),
+        DataType::UInt64 => Ok(Arc::new(UInt64StaticFilter::try_new(&in_array)?)),
+        // Float primitive types (use ordered wrappers for Hash/Eq)
+        DataType::Float32 => Ok(Arc::new(Float32StaticFilter::try_new(&in_array)?)),
+        DataType::Float64 => Ok(Arc::new(Float64StaticFilter::try_new(&in_array)?)),
+        _ => {
+            /* fall through to generic implementation for unsupported types (Struct, etc.) */
+            Ok(Arc::new(ArrayStaticFilter::try_new(in_array)?))
+        }
     }
 }
 
-/// Computes an [`ArrayHashSet`] for the provided [`Array`] if there
-/// are nulls present or there are more than the configured number of
-/// elements.
-///
-/// Note: This is split into a separate function as higher-rank trait bounds currently
-/// cause type inference to misbehave
-fn make_hash_set<T>(array: T) -> ArrayHashSet
-where
-    T: ArrayAccessor,
-    T::Item: IsEqual,
-{
-    let state = RandomState::new();
-    let mut map: HashMap<usize, (), ()> =
-        HashMap::with_capacity_and_hasher(array.len(), ());
-
-    let insert_value = |idx| {
-        let value = array.value(idx);
-        let hash = value.hash_one(&state);
-        if let RawEntryMut::Vacant(v) = map
-            .raw_entry_mut()
-            .from_hash(hash, |x| array.value(*x).is_equal(&value))
-        {
-            v.insert_with_hasher(hash, idx, (), |x| array.value(*x).hash_one(&state));
+impl ArrayStaticFilter {
+    /// Computes a [`StaticFilter`] for the provided [`Array`] if there
+    /// are nulls present or there are more than the configured number of
+    /// elements.
+    ///
+    /// Note: This is split into a separate function as higher-rank trait bounds currently
+    /// cause type inference to misbehave
+    fn try_new(in_array: ArrayRef) -> Result<ArrayStaticFilter> {
+        // Null type has no natural order - return empty hash set
+        if in_array.data_type() == &DataType::Null {
+            return Ok(ArrayStaticFilter {
+                in_array,
+                state: RandomState::new(),
+                map: HashMap::with_hasher(()),
+            });
         }
-    };
 
-    match array.nulls() {
-        Some(nulls) => {
-            BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len())
-                .for_each(insert_value)
-        }
-        None => (0..array.len()).for_each(insert_value),
+        let state = RandomState::new();
+        let mut map: HashMap<usize, (), ()> = HashMap::with_hasher(());
+
+        with_hashes([&in_array], &state, |hashes| -> Result<()> {
+            let cmp = make_comparator(&in_array, &in_array, SortOptions::default())?;
+
+            let insert_value = |idx| {
+                let hash = hashes[idx];
+                if let RawEntryMut::Vacant(v) = map
+                    .raw_entry_mut()
+                    .from_hash(hash, |x| cmp(*x, idx).is_eq())
+                {
+                    v.insert_with_hasher(hash, idx, (), |x| hashes[*x]);
+                }
+            };
+
+            match in_array.nulls() {
+                Some(nulls) => {
+                    BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len())
+                        .for_each(insert_value)
+                }
+                None => (0..in_array.len()).for_each(insert_value),
+            }
+
+            Ok(())
+        })?;
+
+        Ok(Self {
+            in_array,
+            state,
+            map,
+        })
+    }
+}
+
+/// Wrapper for f32 that implements Hash and Eq using bit comparison.
+/// This treats NaN values as equal to each other when they have the same bit pattern.
+#[derive(Clone, Copy)]
+struct OrderedFloat32(f32);
+
+impl Hash for OrderedFloat32 {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.0.to_ne_bytes().hash(state);
+    }
+}
+
+impl PartialEq for OrderedFloat32 {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.to_bits() == other.0.to_bits()
+    }
+}
+
+impl Eq for OrderedFloat32 {}
+
+impl From<f32> for OrderedFloat32 {
+    fn from(v: f32) -> Self {
+        Self(v)
+    }
+}
+
+/// Wrapper for f64 that implements Hash and Eq using bit comparison.
+/// This treats NaN values as equal to each other when they have the same bit pattern.
+#[derive(Clone, Copy)]
+struct OrderedFloat64(f64);
+
+impl Hash for OrderedFloat64 {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.0.to_ne_bytes().hash(state);
+    }
+}
+
+impl PartialEq for OrderedFloat64 {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.to_bits() == other.0.to_bits()
     }
+}
 
-    ArrayHashSet { state, map }
+impl Eq for OrderedFloat64 {}
+
+impl From<f64> for OrderedFloat64 {
+    fn from(v: f64) -> Self {
+        Self(v)
+    }
 }
 
-/// Creates a `Box<dyn Set>` for the given list of `IN` expressions and `batch`
-fn make_set(array: &dyn Array) -> Result<Arc<dyn Set>> {
-    Ok(downcast_primitive_array! {
-        array => Arc::new(ArraySet::new(array, make_hash_set(array))),
-        DataType::Boolean => {
-            let array = as_boolean_array(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
-        },
-        DataType::Utf8 => {
-            let array = as_string_array(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+// Macro to generate specialized StaticFilter implementations for primitive types
+macro_rules! primitive_static_filter {
+    ($Name:ident, $ArrowType:ty) => {
+        struct $Name {
+            null_count: usize,
+            values: HashSet<<$ArrowType as ArrowPrimitiveType>::Native>,
+        }
+
+        impl $Name {
+            fn try_new(in_array: &ArrayRef) -> Result<Self> {
+                let in_array = in_array
+                    .as_primitive_opt::<$ArrowType>()
+                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
+
+                let mut values = HashSet::with_capacity(in_array.len());
+                let null_count = in_array.null_count();
+
+                for v in in_array.iter().flatten() {
+                    values.insert(v);
+                }
+
+                Ok(Self { null_count, values })
+            }
+        }
+
+        impl StaticFilter for $Name {
+            fn null_count(&self) -> usize {
+                self.null_count
+            }
+
+            fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
+                // Handle dictionary arrays by recursing on the values
+                downcast_dictionary_array! {
+                    v => {
+                        let values_contains = self.contains(v.values().as_ref(), negated)?;
+                        let result = take(&values_contains, v.keys(), None)?;
+                        return Ok(downcast_array(result.as_ref()))
+                    }
+                    _ => {}
+                }
+
+                let v = v
+                    .as_primitive_opt::<$ArrowType>()
+                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
+
+                let haystack_has_nulls = self.null_count > 0;
+
+                let needle_values = v.values();
+                let needle_nulls = v.nulls();
+                let needle_has_nulls = v.null_count() > 0;
+
+                // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
+                // ("-" means the value doesn't affect the result)
+                //
+                // | needle_null | haystack_null | negated | in set? | result |
+                // |-------------|---------------|---------|---------|--------|
+                // | true        | -             | false   | -       | null   |
+                // | true        | -             | true    | -       | null   |
+                // | false       | true          | false   | yes     | true   |
+                // | false       | true          | false   | no      | null   |
+                // | false       | true          | true    | yes     | false  |
+                // | false       | true          | true    | no      | null   |
+                // | false       | false         | false   | yes     | true   |
+                // | false       | false         | false   | no      | false  |
+                // | false       | false         | true    | yes     | false  |
+                // | false       | false         | true    | no      | true   |
+
+                // Compute the "contains" result using collect_bool (fast batched approach)
+                // This ignores nulls - we handle them separately
+                let contains_buffer = if negated {
+                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
+                        !self.values.contains(&needle_values[i])
+                    })
+                } else {
+                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
+                        self.values.contains(&needle_values[i])
+                    })
+                };
+
+                // Compute the null mask
+                // Output is null when:
+                // 1. needle value is null, OR
+                // 2. needle value is not in set AND haystack has nulls
+                let result_nulls = match (needle_has_nulls, haystack_has_nulls) {
+                    (false, false) => {
+                        // No nulls anywhere
+                        None
+                    }
+                    (true, false) => {
+                        // Only needle has nulls - just use needle's null mask
+                        needle_nulls.cloned()
+                    }
+                    (false, true) => {
+                        // Only haystack has nulls - result is null when value not in set
+                        // Valid (not null) when original "in set" is true
+                        // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
+                        let validity = if negated {
+                            !&contains_buffer
+                        } else {
+                            contains_buffer.clone()
+                        };
+                        Some(NullBuffer::new(validity))
+                    }
+                    (true, true) => {
+                        // Both have nulls - combine needle nulls with haystack-induced nulls
+                        let needle_validity = needle_nulls.map(|n| n.inner().clone())
+                            .unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
+
+                        // Valid when original "in set" is true (see above)
+                        let haystack_validity = if negated {
+                            !&contains_buffer
+                        } else {
+                            contains_buffer.clone()
+                        };
+
+                        // Combined validity: valid only where both are valid
+                        let combined_validity = &needle_validity & &haystack_validity;
+                        Some(NullBuffer::new(combined_validity))
+                    }
+                };
+
+                Ok(BooleanArray::new(contains_buffer, result_nulls))
+            }
         }
-        DataType::LargeUtf8 => {
-            let array = as_largestring_array(array);
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+    };
+}
+
+// Generate specialized filters for all integer primitive types
+primitive_static_filter!(Int8StaticFilter, Int8Type);
+primitive_static_filter!(Int16StaticFilter, Int16Type);
+primitive_static_filter!(Int32StaticFilter, Int32Type);
+primitive_static_filter!(Int64StaticFilter, Int64Type);
+primitive_static_filter!(UInt8StaticFilter, UInt8Type);
+primitive_static_filter!(UInt16StaticFilter, UInt16Type);
+primitive_static_filter!(UInt32StaticFilter, UInt32Type);
+primitive_static_filter!(UInt64StaticFilter, UInt64Type);
+
+// Macro to generate specialized StaticFilter implementations for float types
+// Floats require a wrapper type (OrderedFloat*) to implement Hash/Eq due to NaN semantics
+macro_rules! float_static_filter {
+    ($Name:ident, $ArrowType:ty, $OrderedType:ty) => {
+        struct $Name {
+            null_count: usize,
+            values: HashSet<$OrderedType>,
         }
-        DataType::Binary => {
-            let array = as_generic_binary_array::<i32>(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+
+        impl $Name {
+            fn try_new(in_array: &ArrayRef) -> Result<Self> {
+                let in_array = in_array
+                    .as_primitive_opt::<$ArrowType>()
+                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
+
+                let mut values = HashSet::with_capacity(in_array.len());
+                let null_count = in_array.null_count();
+
+                for v in in_array.iter().flatten() {
+                    values.insert(<$OrderedType>::from(v));
+                }
+
+                Ok(Self { null_count, values })
+            }
         }
-        DataType::LargeBinary => {
-            let array = as_generic_binary_array::<i64>(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+
+        impl StaticFilter for $Name {
+            fn null_count(&self) -> usize {
+                self.null_count
+            }
+
+            fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
+                // Handle dictionary arrays by recursing on the values
+                downcast_dictionary_array! {
+                    v => {
+                        let values_contains = self.contains(v.values().as_ref(), negated)?;
+                        let result = take(&values_contains, v.keys(), None)?;
+                        return Ok(downcast_array(result.as_ref()))
+                    }
+                    _ => {}
+                }
+
+                let v = v
+                    .as_primitive_opt::<$ArrowType>()
+                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
+
+                let haystack_has_nulls = self.null_count > 0;
+
+                let needle_values = v.values();
+                let needle_nulls = v.nulls();
+                let needle_has_nulls = v.null_count() > 0;
+
+                // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
+                // ("-" means the value doesn't affect the result)
+                //
+                // | needle_null | haystack_null | negated | in set? | result |
+                // |-------------|---------------|---------|---------|--------|
+                // | true        | -             | false   | -       | null   |
+                // | true        | -             | true    | -       | null   |
+                // | false       | true          | false   | yes     | true   |
+                // | false       | true          | false   | no      | null   |
+                // | false       | true          | true    | yes     | false  |
+                // | false       | true          | true    | no      | null   |
+                // | false       | false         | false   | yes     | true   |
+                // | false       | false         | false   | no      | false  |
+                // | false       | false         | true    | yes     | false  |
+                // | false       | false         | true    | no      | true   |
+
+                // Compute the "contains" result using collect_bool (fast batched approach)
+                // This ignores nulls - we handle them separately
+                let contains_buffer = if negated {
+                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
+                        !self.values.contains(&<$OrderedType>::from(needle_values[i]))
+                    })
+                } else {
+                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
+                        self.values.contains(&<$OrderedType>::from(needle_values[i]))
+                    })
+                };
+
+                // Compute the null mask
+                // Output is null when:
+                // 1. needle value is null, OR
+                // 2. needle value is not in set AND haystack has nulls
+                let result_nulls = match (needle_has_nulls, haystack_has_nulls) {
+                    (false, false) => {
+                        // No nulls anywhere
+                        None
+                    }
+                    (true, false) => {
+                        // Only needle has nulls - just use needle's null mask
+                        needle_nulls.cloned()
+                    }
+                    (false, true) => {
+                        // Only haystack has nulls - result is null when value not in set
+                        // Valid (not null) when original "in set" is true
+                        // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
+                        let validity = if negated {
+                            !&contains_buffer
+                        } else {
+                            contains_buffer.clone()
+                        };
+                        Some(NullBuffer::new(validity))
+                    }
+                    (true, true) => {
+                        // Both have nulls - combine needle nulls with haystack-induced nulls
+                        let needle_validity = needle_nulls.map(|n| n.inner().clone())
+                            .unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
+
+                        // Valid when original "in set" is true (see above)
+                        let haystack_validity = if negated {
+                            !&contains_buffer
+                        } else {
+                            contains_buffer.clone()
+                        };
+
+                        // Combined validity: valid only where both are valid
+                        let combined_validity = &needle_validity & &haystack_validity;
+                        Some(NullBuffer::new(combined_validity))
+                    }
+                };
+
+                Ok(BooleanArray::new(contains_buffer, result_nulls))
+            }
         }
-        DataType::Dictionary(_, _) => unreachable!("dictionary should have been flattened"),
-        d => return not_impl_err!("DataType::{d} not supported in InList")
-    })
+    };
 }
 
+// Generate specialized filters for float types using ordered wrappers
+float_static_filter!(Float32StaticFilter, Float32Type, OrderedFloat32);
+float_static_filter!(Float64StaticFilter, Float64Type, OrderedFloat64);
+
 /// Evaluates the list of expressions into an array, flattening any dictionaries
 fn evaluate_list(
     list: &[Arc<dyn PhysicalExpr>],
@@ -231,56 +554,37 @@ fn evaluate_list(
     ScalarValue::iter_to_array(scalars)
 }
 
-fn try_cast_static_filter_to_set(
+/// Try to evaluate a list of expressions as constants.
+///
+/// Returns:
+/// - `Ok(Some(ArrayRef))` if all expressions are constants (can be evaluated on an empty RecordBatch)
+/// - `Ok(None)` if the list contains non-constant expressions
+/// - `Err(...)` only for actual errors (not for non-constant expressions)
+///
+/// This is used to detect when a list contains only literals, casts of literals,
+/// or other constant expressions.
+fn try_evaluate_constant_list(
     list: &[Arc<dyn PhysicalExpr>],
     schema: &Schema,
-) -> Result<Arc<dyn Set>> {
+) -> Result<Option<ArrayRef>> {
     let batch = RecordBatch::new_empty(Arc::new(schema.clone()));
-    make_set(evaluate_list(list, &batch)?.as_ref())
-}
-
-/// Custom equality check function which is used with [`ArrayHashSet`] for existence check.
-trait IsEqual: HashValue {
-    fn is_equal(&self, other: &Self) -> bool;
-}
-
-impl<T: IsEqual + ?Sized> IsEqual for &T {
-    fn is_equal(&self, other: &Self) -> bool {
-        T::is_equal(self, other)
+    match evaluate_list(list, &batch) {
+        Ok(array) => Ok(Some(array)),
+        Err(_) => {
+            // Non-constant expressions can't be evaluated on an empty batch
+            // This is not an error, just means we can't use a static filter
+            Ok(None)
+        }
     }
 }
 
-macro_rules! is_equal {
-    ($($t:ty),+) => {
-        $(impl IsEqual for $t {
-            fn is_equal(&self, other: &Self) -> bool {
-                self == other
-            }
-        })*
-    };
-}
-is_equal!(i8, i16, i32, i64, i128, i256, u8, u16, u32, u64);
-is_equal!(bool, str, [u8]);
-is_equal!(IntervalDayTime, IntervalMonthDayNano);
-
-macro_rules! is_equal_float {
-    ($($t:ty),+) => {
-        $(impl IsEqual for $t {
-            fn is_equal(&self, other: &Self) -> bool {
-                self.to_bits() == other.to_bits()
-            }
-        })*
-    };
-}
-is_equal_float!(half::f16, f32, f64);
-
 impl InListExpr {
     /// Create a new InList expression
-    pub fn new(
+    fn new(
         expr: Arc<dyn PhysicalExpr>,
         list: Vec<Arc<dyn PhysicalExpr>>,
         negated: bool,
-        static_filter: Option<Arc<dyn Set>>,
+        static_filter: Option<Arc<dyn StaticFilter + Send + Sync>>,
     ) -> Self {
         Self {
             expr,
@@ -300,23 +604,85 @@ impl InListExpr {
         &self.list
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.list.is_empty()
+    }
+
+    pub fn len(&self) -> usize {
+        self.list.len()
+    }
+
     /// Is this negated e.g. NOT IN LIST
     pub fn negated(&self) -> bool {
         self.negated
     }
-}
 
-#[macro_export]
-macro_rules! expr_vec_fmt {
-    ( $ARRAY:expr ) => {{
-        $ARRAY
-            .iter()
-            .map(|e| format!("{e}"))
-            .collect::<Vec<String>>()
-            .join(", ")
-    }};
-}
+    /// Create a new InList expression directly from an array, bypassing expression evaluation.
+    ///
+    /// This is more efficient than `in_list()` when you already have the list as an array,
+    /// as it avoids the conversion: `ArrayRef -> Vec<PhysicalExpr> -> ArrayRef -> StaticFilter`.
+    /// Instead it goes directly: `ArrayRef -> StaticFilter`.
+    ///
+    /// The `list` field will be empty when using this constructor, as the array is stored
+    /// directly in the static filter.
+    ///
+    /// This does not make the expression any more performant at runtime, but it does make it slightly
+    /// cheaper to build.
+    pub fn try_new_from_array(
+        expr: Arc<dyn PhysicalExpr>,
+        array: ArrayRef,
+        negated: bool,
+    ) -> Result<Self> {
+        let list = (0..array.len())
+            .map(|i| {
+                let scalar = ScalarValue::try_from_array(array.as_ref(), i)?;
+                Ok(crate::expressions::lit(scalar) as Arc<dyn PhysicalExpr>)
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(Self::new(
+            expr,
+            list,
+            negated,
+            Some(instantiate_static_filter(array)?),
+        ))
+    }
+
+    /// Create a new InList expression, using a static filter when possible.
+    ///
+    /// This validates data types and attempts to create a static filter for constant
+    /// list expressions. Uses specialized StaticFilter implementations for better
+    /// performance (e.g., Int32StaticFilter for Int32).
+    ///
+    /// Returns an error if data types don't match. If the list contains non-constant
+    /// expressions, falls back to dynamic evaluation at runtime.
+    pub fn try_new(
+        expr: Arc<dyn PhysicalExpr>,
+        list: Vec<Arc<dyn PhysicalExpr>>,
+        negated: bool,
+        schema: &Schema,
+    ) -> Result<Self> {
+        // Check the data types match
+        let expr_data_type = expr.data_type(schema)?;
+        for list_expr in list.iter() {
+            let list_expr_data_type = list_expr.data_type(schema)?;
+            assert_or_internal_err!(
+                DFSchema::datatype_is_logically_equal(
+                    &expr_data_type,
+                    &list_expr_data_type
+                ),
+                "The data type inlist should be same, the value type is {expr_data_type}, one of list expr type is {list_expr_data_type}"
+            );
+        }
+
+        // Try to create a static filter if all list expressions are constants
+        let static_filter = match try_evaluate_constant_list(&list, schema)? {
+            Some(in_array) => Some(instantiate_static_filter(in_array)?),
+            None => None, // Non-constant expressions, fall back to dynamic evaluation
+        };
 
+        Ok(Self::new(expr, list, negated, static_filter))
+    }
+}
 impl std::fmt::Display for InListExpr {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         let list = expr_vec_fmt!(self.list);
@@ -351,7 +717,7 @@ impl PhysicalExpr for InListExpr {
         }
 
         if let Some(static_filter) = &self.static_filter {
-            Ok(static_filter.has_nulls())
+            Ok(static_filter.null_count() > 0)
         } else {
             for expr in &self.list {
                 if expr.nullable(input_schema)? {
@@ -366,35 +732,104 @@ impl PhysicalExpr for InListExpr {
         let num_rows = batch.num_rows();
         let value = self.expr.evaluate(batch)?;
         let r = match &self.static_filter {
-            Some(f) => f.contains(value.into_array(num_rows)?.as_ref(), self.negated)?,
+            Some(filter) => {
+                match value {
+                    ColumnarValue::Array(array) => {
+                        filter.contains(&array, self.negated)?
+                    }
+                    ColumnarValue::Scalar(scalar) => {
+                        if scalar.is_null() {
+                            // SQL three-valued logic: null IN (...) is always null
+                            // The code below would handle this correctly but this is a faster path
+                            let nulls = NullBuffer::new_null(num_rows);
+                            return Ok(ColumnarValue::Array(Arc::new(
+                                BooleanArray::new(
+                                    BooleanBuffer::new_unset(num_rows),
+                                    Some(nulls),
+                                ),
+                            )));
+                        }
+                        // Use a 1 row array to avoid code duplication/branching
+                        // Since all we do is compute hash and lookup this should be efficient enough
+                        let array = scalar.to_array()?;
+                        let result_array =
+                            filter.contains(array.as_ref(), self.negated)?;
+                        // Broadcast the single result to all rows
+                        // Must check is_null() to preserve NULL values (SQL three-valued logic)
+                        if result_array.is_null(0) {
+                            let nulls = NullBuffer::new_null(num_rows);
+                            BooleanArray::new(
+                                BooleanBuffer::new_unset(num_rows),
+                                Some(nulls),
+                            )
+                        } else if result_array.value(0) {
+                            BooleanArray::new(BooleanBuffer::new_set(num_rows), None)
+                        } else {
+                            BooleanArray::new(BooleanBuffer::new_unset(num_rows), None)
+                        }
+                    }
+                }
+            }
             None => {
+                // No static filter: iterate through each expression, compare, and OR results
                 let value = value.into_array(num_rows)?;
-                let is_nested = value.data_type().is_nested();
                 let found = self.list.iter().map(|expr| expr.evaluate(batch)).try_fold(
                     BooleanArray::new(BooleanBuffer::new_unset(num_rows), None),
                     |result, expr| -> Result<BooleanArray> {
-                        let rhs = compare_with_eq(
-                            &value,
-                            &expr?.into_array(num_rows)?,
-                            is_nested,
-                        )?;
+                        let rhs = match expr? {
+                            ColumnarValue::Array(array) => {
+                                let cmp = make_comparator(
+                                    value.as_ref(),
+                                    array.as_ref(),
+                                    SortOptions::default(),
+                                )?;
+                                (0..num_rows)
+                                    .map(|i| {
+                                        if value.is_null(i) || array.is_null(i) {
+                                            return None;
+                                        }
+                                        Some(cmp(i, i).is_eq())
+                                    })
+                                    .collect::<BooleanArray>()
+                            }
+                            ColumnarValue::Scalar(scalar) => {
+                                // Check if scalar is null once, before the loop
+                                if scalar.is_null() {
+                                    // If scalar is null, all comparisons return null
+                                    BooleanArray::from(vec![None; num_rows])
+                                } else {
+                                    // Convert scalar to 1-element array
+                                    let array = scalar.to_array()?;
+                                    let cmp = make_comparator(
+                                        value.as_ref(),
+                                        array.as_ref(),
+                                        SortOptions::default(),
+                                    )?;
+                                    // Compare each row of value with the single scalar element
+                                    (0..num_rows)
+                                        .map(|i| {
+                                            if value.is_null(i) {
+                                                None
+                                            } else {
+                                                Some(cmp(i, 0).is_eq())
+                                            }
+                                        })
+                                        .collect::<BooleanArray>()
+                                }
+                            }
+                        };
                         Ok(or_kleene(&result, &rhs)?)
                     },
                 )?;
 
-                if self.negated {
-                    not(&found)?
-                } else {
-                    found
-                }
+                if self.negated { not(&found)? } else { found }
             }
         };
         Ok(ColumnarValue::Array(Arc::new(r)))
     }
 
     fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
-        let mut children = vec![];
-        children.push(&self.expr);
+        let mut children = vec![&self.expr];
         children.extend(&self.list);
         children
     }
@@ -408,7 +843,7 @@ impl PhysicalExpr for InListExpr {
             Arc::clone(&children[0]),
             children[1..].to_vec(),
             self.negated,
-            self.static_filter.clone(),
+            self.static_filter.as_ref().map(Arc::clone),
         )))
     }
 
@@ -443,8 +878,8 @@ impl Hash for InListExpr {
     fn hash<H: Hasher>(&self, state: &mut H) {
         self.expr.hash(state);
         self.negated.hash(state);
-        self.list.hash(state);
         // Add `self.static_filter` when hash is available
+        self.list.hash(state);
     }
 }
 
@@ -455,35 +890,20 @@ pub fn in_list(
     negated: &bool,
     schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    // check the data type
-    let expr_data_type = expr.data_type(schema)?;
-    for list_expr in list.iter() {
-        let list_expr_data_type = list_expr.data_type(schema)?;
-        if !DFSchema::datatype_is_logically_equal(&expr_data_type, &list_expr_data_type) {
-            return internal_err!(
-                "The data type inlist should be same, the value type is {expr_data_type}, one of list expr type is {list_expr_data_type}"
-            );
-        }
-    }
-    let static_filter = try_cast_static_filter_to_set(&list, schema).ok();
-    Ok(Arc::new(InListExpr::new(
-        expr,
-        list,
-        *negated,
-        static_filter,
-    )))
+    Ok(Arc::new(InListExpr::try_new(expr, list, *negated, schema)?))
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions;
     use crate::expressions::{col, lit, try_cast};
+    use arrow::buffer::NullBuffer;
+    use arrow::datatypes::{IntervalDayTime, IntervalMonthDayNano, i256};
     use datafusion_common::plan_err;
     use datafusion_expr::type_coercion::binary::comparison_coercion;
     use datafusion_physical_expr_common::physical_expr::fmt_sql;
     use insta::assert_snapshot;
-    use itertools::Itertools as _;
+    use itertools::Itertools;
 
     type InListCastResult = (Arc<dyn PhysicalExpr>, Vec<Arc<dyn PhysicalExpr>>);
 
@@ -529,7 +949,18 @@ mod tests {
             })
     }
 
-    // applies the in_list expr to an input batch and list
+    /// Test helper macro that evaluates an IN LIST expression with automatic type casting.
+    ///
+    /// # Parameters
+    /// - `$BATCH`: The `RecordBatch` containing the input data to evaluate against
+    /// - `$LIST`: A `Vec<Arc<dyn PhysicalExpr>>` of literal expressions representing the IN list values
+    /// - `$NEGATED`: A `&bool` indicating whether this is a NOT IN operation (true) or IN operation (false)
+    /// - `$EXPECTED`: A `Vec<Option<bool>>` representing the expected boolean results for each row
+    /// - `$COL`: An `Arc<dyn PhysicalExpr>` representing the column expression to evaluate
+    /// - `$SCHEMA`: A `&Schema` reference for the input batch
+    ///
+    /// This macro first applies type casting to the column and list expressions to ensure
+    /// type compatibility, then delegates to `in_list_raw!` to perform the evaluation and assertion.
     macro_rules! in_list {
         ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr, $COL:expr, $SCHEMA:expr) => {{
             let (cast_expr, cast_list_exprs) = in_list_cast($COL, $LIST, $SCHEMA)?;
@@ -544,231 +975,464 @@ mod tests {
         }};
     }
 
-    // applies the in_list expr to an input batch and list without cast
+    /// Test helper macro that evaluates an IN LIST expression without automatic type casting.
+    ///
+    /// # Parameters
+    /// - `$BATCH`: The `RecordBatch` containing the input data to evaluate against
+    /// - `$LIST`: A `Vec<Arc<dyn PhysicalExpr>>` of literal expressions representing the IN list values
+    /// - `$NEGATED`: A `&bool` indicating whether this is a NOT IN operation (true) or IN operation (false)
+    /// - `$EXPECTED`: A `Vec<Option<bool>>` representing the expected boolean results for each row
+    /// - `$COL`: An `Arc<dyn PhysicalExpr>` representing the column expression to evaluate
+    /// - `$SCHEMA`: A `&Schema` reference for the input batch
+    ///
+    /// This macro creates an IN LIST expression, evaluates it against the batch, converts the result
+    /// to a `BooleanArray`, and asserts that it matches the expected output. Use this when the column
+    /// and list expressions are already the correct types and don't require casting.
     macro_rules! in_list_raw {
         ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr, $COL:expr, $SCHEMA:expr) => {{
-            let expr = in_list($COL, $LIST, $NEGATED, $SCHEMA).unwrap();
+            let col_expr = $COL;
+            let expr = in_list(Arc::clone(&col_expr), $LIST, $NEGATED, $SCHEMA).unwrap();
             let result = expr
                 .evaluate(&$BATCH)?
                 .into_array($BATCH.num_rows())
                 .expect("Failed to convert to array");
-            let result =
-                as_boolean_array(&result).expect("failed to downcast to BooleanArray");
+            let result = as_boolean_array(&result);
             let expected = &BooleanArray::from($EXPECTED);
-            assert_eq!(expected, result);
+            assert_eq!(
+                expected,
+                result,
+                "Failed for: {}\n{}: {:?}",
+                fmt_sql(expr.as_ref()),
+                fmt_sql(col_expr.as_ref()),
+                col_expr
+                    .evaluate(&$BATCH)?
+                    .into_array($BATCH.num_rows())
+                    .unwrap()
+            );
         }};
     }
 
+    /// Test case for primitive types following the standard IN LIST pattern.
+    ///
+    /// Each test case represents a data type with:
+    /// - `value_in`: A value that appears in both the test array and the IN list (matches → true)
+    /// - `value_not_in`: A value that appears in the test array but NOT in the IN list (doesn't match → false)
+    /// - `other_list_values`: Additional values in the IN list besides `value_in`
+    /// - `null_value`: Optional null scalar value for NULL handling tests. When None, tests
+    ///   without nulls are run, exercising the `(false, false)` and `(false, true)` branches.
+    struct InListPrimitiveTestCase {
+        name: &'static str,
+        value_in: ScalarValue,
+        value_not_in: ScalarValue,
+        other_list_values: Vec<ScalarValue>,
+        null_value: Option<ScalarValue>,
+    }
+
+    /// Generic test data struct for primitive types.
+    ///
+    /// Holds test values needed for IN LIST tests, allowing the data
+    /// to be declared explicitly and reused across multiple types.
+    #[derive(Clone)]
+    struct PrimitiveTestCaseData<T> {
+        value_in: T,
+        value_not_in: T,
+        other_list_values: Vec<T>,
+    }
+
+    /// Helper to create test cases for any primitive type using generic data.
+    ///
+    /// Uses TryInto for flexible type conversion, allowing test data to be
+    /// declared in any convertible type (e.g., i32 for all integer types).
+    /// Creates a test case WITH null support (for null handling tests).
+    fn primitive_test_case<T, D, F>(
+        name: &'static str,
+        constructor: F,
+        data: PrimitiveTestCaseData<D>,
+    ) -> InListPrimitiveTestCase
+    where
+        D: TryInto<T> + Clone,
+        <D as TryInto<T>>::Error: Debug,
+        F: Fn(Option<T>) -> ScalarValue,
+        T: Clone,
+    {
+        InListPrimitiveTestCase {
+            name,
+            value_in: constructor(Some(data.value_in.try_into().unwrap())),
+            value_not_in: constructor(Some(data.value_not_in.try_into().unwrap())),
+            other_list_values: data
+                .other_list_values
+                .into_iter()
+                .map(|v| constructor(Some(v.try_into().unwrap())))
+                .collect(),
+            null_value: Some(constructor(None)),
+        }
+    }
+
+    /// Helper to create test cases WITHOUT null support.
+    /// These test cases exercise the `(false, true)` branch (no nulls, negated).
+    fn primitive_test_case_no_nulls<T, D, F>(
+        name: &'static str,
+        constructor: F,
+        data: PrimitiveTestCaseData<D>,
+    ) -> InListPrimitiveTestCase
+    where
+        D: TryInto<T> + Clone,
+        <D as TryInto<T>>::Error: Debug,
+        F: Fn(Option<T>) -> ScalarValue,
+        T: Clone,
+    {
+        InListPrimitiveTestCase {
+            name,
+            value_in: constructor(Some(data.value_in.try_into().unwrap())),
+            value_not_in: constructor(Some(data.value_not_in.try_into().unwrap())),
+            other_list_values: data
+                .other_list_values
+                .into_iter()
+                .map(|v| constructor(Some(v.try_into().unwrap())))
+                .collect(),
+            null_value: None,
+        }
+    }
+
+    /// Runs test cases for multiple types, providing detailed SQL error messages on failure.
+    ///
+    /// For each test case, runs IN LIST scenarios based on whether null_value is Some or None:
+    /// - With null_value (Some): 4 tests including null handling
+    /// - Without null_value (None): 2 tests exercising the no-nulls paths
+    fn run_test_cases(test_cases: Vec<InListPrimitiveTestCase>) -> Result<()> {
+        for test_case in test_cases {
+            let test_name = test_case.name;
+
+            // Get the data type from the scalar value
+            let data_type = test_case.value_in.data_type();
+
+            // Build the base list: [value_in, ...other_list_values]
+            let build_base_list = || -> Vec<Arc<dyn PhysicalExpr>> {
+                let mut list = vec![lit(test_case.value_in.clone())];
+                list.extend(test_case.other_list_values.iter().map(|v| lit(v.clone())));
+                list
+            };
+
+            match &test_case.null_value {
+                Some(null_val) => {
+                    // Tests WITH nulls in the needle array
+                    let schema =
+                        Schema::new(vec![Field::new("a", data_type.clone(), true)]);
+
+                    // Create array from scalar values: [value_in, value_not_in, None]
+                    let array = ScalarValue::iter_to_array(vec![
+                        test_case.value_in.clone(),
+                        test_case.value_not_in.clone(),
+                        null_val.clone(),
+                    ])?;
+
+                    let col_a = col("a", &schema)?;
+                    let batch = RecordBatch::try_new(
+                        Arc::new(schema.clone()),
+                        vec![Arc::clone(&array)],
+                    )?;
+
+                    // Test 1: a IN (list) → [true, false, null]
+                    let list = build_base_list();
+                    in_list!(
+                        batch,
+                        list,
+                        &false,
+                        vec![Some(true), Some(false), None],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+
+                    // Test 2: a NOT IN (list) → [false, true, null]
+                    let list = build_base_list();
+                    in_list!(
+                        batch,
+                        list,
+                        &true,
+                        vec![Some(false), Some(true), None],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+
+                    // Test 3: a IN (list, NULL) → [true, null, null]
+                    let mut list = build_base_list();
+                    list.push(lit(null_val.clone()));
+                    in_list!(
+                        batch,
+                        list,
+                        &false,
+                        vec![Some(true), None, None],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+
+                    // Test 4: a NOT IN (list, NULL) → [false, null, null]
+                    let mut list = build_base_list();
+                    list.push(lit(null_val.clone()));
+                    in_list!(
+                        batch,
+                        list,
+                        &true,
+                        vec![Some(false), None, None],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+                }
+                None => {
+                    // Tests WITHOUT nulls - exercises the (false, false) and (false, true) branches
+                    let schema =
+                        Schema::new(vec![Field::new("a", data_type.clone(), false)]);
+
+                    // Create array from scalar values: [value_in, value_not_in] (no NULL)
+                    let array = ScalarValue::iter_to_array(vec![
+                        test_case.value_in.clone(),
+                        test_case.value_not_in.clone(),
+                    ])?;
+
+                    let col_a = col("a", &schema)?;
+                    let batch = RecordBatch::try_new(
+                        Arc::new(schema.clone()),
+                        vec![Arc::clone(&array)],
+                    )?;
+
+                    // Test 1: a IN (list) → [true, false] - exercises (false, false) branch
+                    let list = build_base_list();
+                    in_list!(
+                        batch,
+                        list,
+                        &false,
+                        vec![Some(true), Some(false)],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+
+                    // Test 2: a NOT IN (list) → [false, true] - exercises (false, true) branch
+                    let list = build_base_list();
+                    in_list!(
+                        batch,
+                        list,
+                        &true,
+                        vec![Some(false), Some(true)],
+                        Arc::clone(&col_a),
+                        &schema
+                    );
+
+                    eprintln!(
+                        "Test '{test_name}': exercised (false, true) branch (no nulls, negated)",
+                    );
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Test IN LIST for all integer types (Int8/16/32/64, UInt8/16/32/64).
+    ///
+    /// Test data: 0 (in list), 2 (not in list), [1, 3, 5] (other list values)
     #[test]
-    fn in_list_utf8() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("a"), Some("d"), None]);
+    fn in_list_int_types() -> Result<()> {
+        let int_data = PrimitiveTestCaseData {
+            value_in: 0,
+            value_not_in: 2,
+            other_list_values: vec![1, 3, 5],
+        };
+
+        run_test_cases(vec![
+            // Tests WITH nulls
+            primitive_test_case("int8", ScalarValue::Int8, int_data.clone()),
+            primitive_test_case("int16", ScalarValue::Int16, int_data.clone()),
+            primitive_test_case("int32", ScalarValue::Int32, int_data.clone()),
+            primitive_test_case("int64", ScalarValue::Int64, int_data.clone()),
+            primitive_test_case("uint8", ScalarValue::UInt8, int_data.clone()),
+            primitive_test_case("uint16", ScalarValue::UInt16, int_data.clone()),
+            primitive_test_case("uint32", ScalarValue::UInt32, int_data.clone()),
+            primitive_test_case("uint64", ScalarValue::UInt64, int_data.clone()),
+            // Tests WITHOUT nulls - exercises (false, true) branch
+            primitive_test_case_no_nulls("int32_no_nulls", ScalarValue::Int32, int_data),
+        ])
+    }
+
+    /// Test IN LIST for all string types (Utf8, LargeUtf8, Utf8View).
+    ///
+    /// Test data: "a" (in list), "d" (not in list), ["b", "c"] (other list values)
+    #[test]
+    fn in_list_string_types() -> Result<()> {
+        let string_data = PrimitiveTestCaseData {
+            value_in: "a",
+            value_not_in: "d",
+            other_list_values: vec!["b", "c"],
+        };
+
+        run_test_cases(vec![
+            primitive_test_case("utf8", ScalarValue::Utf8, string_data.clone()),
+            primitive_test_case(
+                "large_utf8",
+                ScalarValue::LargeUtf8,
+                string_data.clone(),
+            ),
+            primitive_test_case("utf8_view", ScalarValue::Utf8View, string_data),
+        ])
+    }
+
+    /// Test IN LIST for all binary types (Binary, LargeBinary, BinaryView).
+    ///
+    /// Test data: [1,2,3] (in list), [1,2,2] (not in list), [[4,5,6], [7,8,9]] (other list values)
+    #[test]
+    fn in_list_binary_types() -> Result<()> {
+        let binary_data = PrimitiveTestCaseData {
+            value_in: vec![1_u8, 2, 3],
+            value_not_in: vec![1_u8, 2, 2],
+            other_list_values: vec![vec![4_u8, 5, 6], vec![7_u8, 8, 9]],
+        };
+
+        run_test_cases(vec![
+            primitive_test_case("binary", ScalarValue::Binary, binary_data.clone()),
+            primitive_test_case(
+                "large_binary",
+                ScalarValue::LargeBinary,
+                binary_data.clone(),
+            ),
+            primitive_test_case("binary_view", ScalarValue::BinaryView, binary_data),
+        ])
+    }
+
+    /// Test IN LIST for date types (Date32, Date64).
+    ///
+    /// Test data: 0 (in list), 2 (not in list), [1, 3] (other list values)
+    #[test]
+    fn in_list_date_types() -> Result<()> {
+        let date_data = PrimitiveTestCaseData {
+            value_in: 0,
+            value_not_in: 2,
+            other_list_values: vec![1, 3],
+        };
+
+        run_test_cases(vec![
+            primitive_test_case("date32", ScalarValue::Date32, date_data.clone()),
+            primitive_test_case("date64", ScalarValue::Date64, date_data),
+        ])
+    }
+
+    /// Test IN LIST for Decimal128 type.
+    ///
+    /// Test data: 0 (in list), 200 (not in list), [100, 300] (other list values) with precision=10, scale=2
+    #[test]
+    fn in_list_decimal() -> Result<()> {
+        run_test_cases(vec![InListPrimitiveTestCase {
+            name: "decimal128",
+            value_in: ScalarValue::Decimal128(Some(0), 10, 2),
+            value_not_in: ScalarValue::Decimal128(Some(200), 10, 2),
+            other_list_values: vec![
+                ScalarValue::Decimal128(Some(100), 10, 2),
+                ScalarValue::Decimal128(Some(300), 10, 2),
+            ],
+            null_value: Some(ScalarValue::Decimal128(None, 10, 2)),
+        }])
+    }
+
+    /// Test IN LIST for timestamp types.
+    ///
+    /// Test data: 0 (in list), 2000 (not in list), [1000, 3000] (other list values)
+    #[test]
+    fn in_list_timestamp_types() -> Result<()> {
+        run_test_cases(vec![
+            InListPrimitiveTestCase {
+                name: "timestamp_nanosecond",
+                value_in: ScalarValue::TimestampNanosecond(Some(0), None),
+                value_not_in: ScalarValue::TimestampNanosecond(Some(2000), None),
+                other_list_values: vec![
+                    ScalarValue::TimestampNanosecond(Some(1000), None),
+                    ScalarValue::TimestampNanosecond(Some(3000), None),
+                ],
+                null_value: Some(ScalarValue::TimestampNanosecond(None, None)),
+            },
+            InListPrimitiveTestCase {
+                name: "timestamp_millisecond_with_tz",
+                value_in: ScalarValue::TimestampMillisecond(
+                    Some(1500000),
+                    Some("+05:00".into()),
+                ),
+                value_not_in: ScalarValue::TimestampMillisecond(
+                    Some(2500000),
+                    Some("+05:00".into()),
+                ),
+                other_list_values: vec![ScalarValue::TimestampMillisecond(
+                    Some(3500000),
+                    Some("+05:00".into()),
+                )],
+                null_value: Some(ScalarValue::TimestampMillisecond(
+                    None,
+                    Some("+05:00".into()),
+                )),
+            },
+            InListPrimitiveTestCase {
+                name: "timestamp_millisecond_mixed_tz",
+                value_in: ScalarValue::TimestampMillisecond(
+                    Some(1500000),
+                    Some("+05:00".into()),
+                ),
+                value_not_in: ScalarValue::TimestampMillisecond(
+                    Some(2500000),
+                    Some("+05:00".into()),
+                ),
+                other_list_values: vec![
+                    ScalarValue::TimestampMillisecond(
+                        Some(3500000),
+                        Some("+01:00".into()),
+                    ),
+                    ScalarValue::TimestampMillisecond(Some(4500000), Some("UTC".into())),
+                ],
+                null_value: Some(ScalarValue::TimestampMillisecond(
+                    None,
+                    Some("+05:00".into()),
+                )),
+            },
+        ])
+    }
+
+    #[test]
+    fn in_list_float64() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Float64, true)]);
+        let a = Float64Array::from(vec![
+            Some(0.0),
+            Some(0.2),
+            None,
+            Some(f64::NAN),
+            Some(-f64::NAN),
+        ]);
         let col_a = col("a", &schema)?;
         let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
 
-        // expression: "a in ("a", "b")"
-        let list = vec![lit("a"), lit("b")];
+        // expression: "a in (0.0, 0.1)"
+        let list = vec![lit(0.0f64), lit(0.1f64)];
         in_list!(
             batch,
             list,
             &false,
-            vec![Some(true), Some(false), None],
+            vec![Some(true), Some(false), None, Some(false), Some(false)],
             Arc::clone(&col_a),
             &schema
         );
 
-        // expression: "a not in ("a", "b")"
-        let list = vec![lit("a"), lit("b")];
+        // expression: "a not in (0.0, 0.1)"
+        let list = vec![lit(0.0f64), lit(0.1f64)];
         in_list!(
             batch,
             list,
             &true,
-            vec![Some(false), Some(true), None],
+            vec![Some(false), Some(true), None, Some(true), Some(true)],
             Arc::clone(&col_a),
             &schema
         );
 
-        // expression: "a in ("a", "b", null)"
-        let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
+        // expression: "a in (0.0, 0.1, NULL)"
+        let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
         in_list!(
             batch,
             list,
             &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in ("a", "b", null)"
-        let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_binary() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Binary, true)]);
-        let a = BinaryArray::from(vec![
-            Some([1, 2, 3].as_slice()),
-            Some([1, 2, 2].as_slice()),
-            None,
-        ]);
-        let col_a = col("a", &schema)?;
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-        // expression: "a in ([1, 2, 3], [4, 5, 6])"
-        let list = vec![lit([1, 2, 3].as_slice()), lit([4, 5, 6].as_slice())];
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(true), Some(false), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in ([1, 2, 3], [4, 5, 6])"
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), Some(true), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in ([1, 2, 3], [4, 5, 6], null)"
-        let list = vec![
-            lit([1, 2, 3].as_slice()),
-            lit([4, 5, 6].as_slice()),
-            lit(ScalarValue::Binary(None)),
-        ];
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in ([1, 2, 3], [4, 5, 6], null)"
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_int64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
-        let a = Int64Array::from(vec![Some(0), Some(2), None]);
-        let col_a = col("a", &schema)?;
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-        // expression: "a in (0, 1)"
-        let list = vec![lit(0i64), lit(1i64)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), Some(false), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in (0, 1)"
-        let list = vec![lit(0i64), lit(1i64)];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), Some(true), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in (0, 1, NULL)"
-        let list = vec![lit(0i64), lit(1i64), lit(ScalarValue::Null)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in (0, 1, NULL)"
-        let list = vec![lit(0i64), lit(1i64), lit(ScalarValue::Null)];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_float64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Float64, true)]);
-        let a = Float64Array::from(vec![
-            Some(0.0),
-            Some(0.2),
-            None,
-            Some(f64::NAN),
-            Some(-f64::NAN),
-        ]);
-        let col_a = col("a", &schema)?;
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-        // expression: "a in (0.0, 0.1)"
-        let list = vec![lit(0.0f64), lit(0.1f64)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), Some(false), None, Some(false), Some(false)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in (0.0, 0.1)"
-        let list = vec![lit(0.0f64), lit(0.1f64)];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), Some(true), None, Some(true), Some(true)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in (0.0, 0.1, NULL)"
-        let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), None, None, None, None],
+            vec![Some(true), None, None, None, None],
             Arc::clone(&col_a),
             &schema
         );
@@ -885,478 +1549,89 @@ mod tests {
         Ok(())
     }
 
+    macro_rules! test_nullable {
+        ($COL:expr, $LIST:expr, $SCHEMA:expr, $EXPECTED:expr) => {{
+            let (cast_expr, cast_list_exprs) = in_list_cast($COL, $LIST, $SCHEMA)?;
+            let expr = in_list(cast_expr, cast_list_exprs, &false, $SCHEMA).unwrap();
+            let result = expr.nullable($SCHEMA)?;
+            assert_eq!($EXPECTED, result);
+        }};
+    }
+
     #[test]
-    fn in_list_date64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Date64, true)]);
-        let a = Date64Array::from(vec![Some(0), Some(2), None]);
-        let col_a = col("a", &schema)?;
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+    fn in_list_nullable() -> Result<()> {
+        let schema = Schema::new(vec![
+            Field::new("c1_nullable", DataType::Int64, true),
+            Field::new("c2_non_nullable", DataType::Int64, false),
+        ]);
 
-        // expression: "a in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Date64(Some(0))),
-            lit(ScalarValue::Date64(Some(1))),
-        ];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), Some(false), None],
-            Arc::clone(&col_a),
-            &schema
-        );
+        let c1_nullable = col("c1_nullable", &schema)?;
+        let c2_non_nullable = col("c2_non_nullable", &schema)?;
 
-        // expression: "a not in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Date64(Some(0))),
-            lit(ScalarValue::Date64(Some(1))),
-        ];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), Some(true), None],
-            Arc::clone(&col_a),
-            &schema
-        );
+        // static_filter has no nulls
+        let list = vec![lit(1_i64), lit(2_i64)];
+        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
+        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
 
-        // expression: "a in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Date64(Some(0))),
-            lit(ScalarValue::Date64(Some(1))),
-            lit(ScalarValue::Null),
-        ];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
+        // static_filter has nulls
+        let list = vec![lit(1_i64), lit(2_i64), lit(ScalarValue::Null)];
+        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
+        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
 
-        // expression: "a not in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Date64(Some(0))),
-            lit(ScalarValue::Date64(Some(1))),
-            lit(ScalarValue::Null),
-        ];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
+        let list = vec![Arc::clone(&c1_nullable)];
+        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
+
+        let list = vec![Arc::clone(&c2_non_nullable)];
+        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
+
+        let list = vec![Arc::clone(&c2_non_nullable), Arc::clone(&c2_non_nullable)];
+        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
 
         Ok(())
     }
 
     #[test]
-    fn in_list_date32() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Date32, true)]);
-        let a = Date32Array::from(vec![Some(0), Some(2), None]);
-        let col_a = col("a", &schema)?;
+    fn in_list_no_cols() -> Result<()> {
+        // test logic when the in_list expression doesn't have any columns
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let a = Int32Array::from(vec![Some(1), Some(2), None]);
         let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
 
-        // expression: "a in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Date32(Some(0))),
-            lit(ScalarValue::Date32(Some(1))),
-        ];
+        let list = vec![lit(ScalarValue::from(1i32)), lit(ScalarValue::from(6i32))];
+
+        // 1 IN (1, 6)
+        let expr = lit(ScalarValue::Int32(Some(1)));
         in_list!(
             batch,
-            list,
+            list.clone(),
             &false,
-            vec![Some(true), Some(false), None],
-            Arc::clone(&col_a),
+            // should have three outputs, as the input batch has three rows
+            vec![Some(true), Some(true), Some(true)],
+            expr,
             &schema
         );
 
-        // expression: "a not in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Date32(Some(0))),
-            lit(ScalarValue::Date32(Some(1))),
-        ];
+        // 2 IN (1, 6)
+        let expr = lit(ScalarValue::Int32(Some(2)));
         in_list!(
             batch,
-            list,
-            &true,
-            vec![Some(false), Some(true), None],
-            Arc::clone(&col_a),
+            list.clone(),
+            &false,
+            // should have three outputs, as the input batch has three rows
+            vec![Some(false), Some(false), Some(false)],
+            expr,
             &schema
         );
 
-        // expression: "a in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Date32(Some(0))),
-            lit(ScalarValue::Date32(Some(1))),
-            lit(ScalarValue::Null),
-        ];
+        // NULL IN (1, 6)
+        let expr = lit(ScalarValue::Int32(None));
         in_list!(
             batch,
-            list,
+            list.clone(),
             &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Date32(Some(0))),
-            lit(ScalarValue::Date32(Some(1))),
-            lit(ScalarValue::Null),
-        ];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_decimal() -> Result<()> {
-        // Now, we can check the NULL type
-        let schema =
-            Schema::new(vec![Field::new("a", DataType::Decimal128(13, 4), true)]);
-        let array = vec![Some(100_0000_i128), None, Some(200_5000_i128)]
-            .into_iter()
-            .collect::<Decimal128Array>();
-        let array = array.with_precision_and_scale(13, 4).unwrap();
-        let col_a = col("a", &schema)?;
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)])?;
-
-        // expression: "a in (100,200), the data type of list is INT32
-        let list = vec![lit(100i32), lit(200i32)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), None, Some(false)],
-            Arc::clone(&col_a),
-            &schema
-        );
-        // expression: "a not in (100,200)
-        let list = vec![lit(100i32), lit(200i32)];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, Some(true)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in (200,NULL), the data type of list is INT32 AND NULL
-        let list = vec![lit(ScalarValue::Int32(Some(100))), lit(ScalarValue::Null)];
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(true), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-        // expression: "a not in (200,NULL), the data type of list is INT32 AND NULL
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a in (200.5, 100), the data type of list is FLOAT32 and INT32
-        let list = vec![lit(200.50f32), lit(100i32)];
-        in_list!(
-            batch,
-            list,
-            &false,
-            vec![Some(true), None, Some(true)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // expression: "a not in (200.5, 100), the data type of list is FLOAT32 and INT32
-        let list = vec![lit(200.50f32), lit(101i32)];
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(true), None, Some(false)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        // test the optimization: set
-        // expression: "a in (99..300), the data type of list is INT32
-        let list = (99i32..300).map(lit).collect::<Vec<_>>();
-
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(true), None, Some(false)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(false), None, Some(true)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_cast_static_filter_to_set() -> Result<()> {
-        // random schema
-        let schema =
-            Schema::new(vec![Field::new("a", DataType::Decimal128(13, 4), true)]);
-
-        // list of phy expr
-        let mut phy_exprs = vec![
-            lit(1i64),
-            expressions::cast(lit(2i32), &schema, DataType::Int64)?,
-            try_cast(lit(3.13f32), &schema, DataType::Int64)?,
-        ];
-        let result = try_cast_static_filter_to_set(&phy_exprs, &schema).unwrap();
-
-        let array = Int64Array::from(vec![1, 2, 3, 4]);
-        let r = result.contains(&array, false).unwrap();
-        assert_eq!(r, BooleanArray::from(vec![true, true, true, false]));
-
-        try_cast_static_filter_to_set(&phy_exprs, &schema).unwrap();
-        // cast(cast(lit())), but the cast to the same data type, one case will be ignored
-        phy_exprs.push(expressions::cast(
-            expressions::cast(lit(2i32), &schema, DataType::Int64)?,
-            &schema,
-            DataType::Int64,
-        )?);
-        try_cast_static_filter_to_set(&phy_exprs, &schema).unwrap();
-
-        phy_exprs.clear();
-
-        // case(cast(lit())), the cast to the diff data type
-        phy_exprs.push(expressions::cast(
-            expressions::cast(lit(2i32), &schema, DataType::Int64)?,
-            &schema,
-            DataType::Int32,
-        )?);
-        try_cast_static_filter_to_set(&phy_exprs, &schema).unwrap();
-
-        // column
-        phy_exprs.push(col("a", &schema)?);
-        assert!(try_cast_static_filter_to_set(&phy_exprs, &schema).is_err());
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_timestamp() -> Result<()> {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Timestamp(TimeUnit::Microsecond, None),
-            true,
-        )]);
-        let a = TimestampMicrosecondArray::from(vec![
-            Some(1388588401000000000),
-            Some(1288588501000000000),
-            None,
-        ]);
-        let col_a = col("a", &schema)?;
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-        let list = vec![
-            lit(ScalarValue::TimestampMicrosecond(
-                Some(1388588401000000000),
-                None,
-            )),
-            lit(ScalarValue::TimestampMicrosecond(
-                Some(1388588401000000001),
-                None,
-            )),
-            lit(ScalarValue::TimestampMicrosecond(
-                Some(1388588401000000002),
-                None,
-            )),
-        ];
-
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(true), Some(false), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        in_list!(
-            batch,
-            list.clone(),
-            &true,
-            vec![Some(false), Some(true), None],
-            Arc::clone(&col_a),
-            &schema
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn in_expr_with_multiple_element_in_list() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Float64, true),
-            Field::new("b", DataType::Float64, true),
-            Field::new("c", DataType::Float64, true),
-        ]);
-        let a = Float64Array::from(vec![
-            Some(0.0),
-            Some(1.0),
-            Some(2.0),
-            Some(f64::NAN),
-            Some(-f64::NAN),
-        ]);
-        let b = Float64Array::from(vec![
-            Some(8.0),
-            Some(1.0),
-            Some(5.0),
-            Some(f64::NAN),
-            Some(3.0),
-        ]);
-        let c = Float64Array::from(vec![
-            Some(6.0),
-            Some(7.0),
-            None,
-            Some(5.0),
-            Some(-f64::NAN),
-        ]);
-        let col_a = col("a", &schema)?;
-        let col_b = col("b", &schema)?;
-        let col_c = col("c", &schema)?;
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
-        )?;
-
-        let list = vec![Arc::clone(&col_b), Arc::clone(&col_c)];
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            vec![Some(false), Some(true), None, Some(true), Some(true)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        in_list!(
-            batch,
-            list,
-            &true,
-            vec![Some(true), Some(false), None, Some(false), Some(false)],
-            Arc::clone(&col_a),
-            &schema
-        );
-
-        Ok(())
-    }
-
-    macro_rules! test_nullable {
-        ($COL:expr, $LIST:expr, $SCHEMA:expr, $EXPECTED:expr) => {{
-            let (cast_expr, cast_list_exprs) = in_list_cast($COL, $LIST, $SCHEMA)?;
-            let expr = in_list(cast_expr, cast_list_exprs, &false, $SCHEMA).unwrap();
-            let result = expr.nullable($SCHEMA)?;
-            assert_eq!($EXPECTED, result);
-        }};
-    }
-
-    #[test]
-    fn in_list_nullable() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("c1_nullable", DataType::Int64, true),
-            Field::new("c2_non_nullable", DataType::Int64, false),
-        ]);
-
-        let c1_nullable = col("c1_nullable", &schema)?;
-        let c2_non_nullable = col("c2_non_nullable", &schema)?;
-
-        // static_filter has no nulls
-        let list = vec![lit(1_i64), lit(2_i64)];
-        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
-        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
-
-        // static_filter has nulls
-        let list = vec![lit(1_i64), lit(2_i64), lit(ScalarValue::Null)];
-        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
-        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
-
-        let list = vec![Arc::clone(&c1_nullable)];
-        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
-
-        let list = vec![Arc::clone(&c2_non_nullable)];
-        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
-
-        let list = vec![Arc::clone(&c2_non_nullable), Arc::clone(&c2_non_nullable)];
-        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_no_cols() -> Result<()> {
-        // test logic when the in_list expression doesn't have any columns
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
-        let a = Int32Array::from(vec![Some(1), Some(2), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-        let list = vec![lit(ScalarValue::from(1i32)), lit(ScalarValue::from(6i32))];
-
-        // 1 IN (1, 6)
-        let expr = lit(ScalarValue::Int32(Some(1)));
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            // should have three outputs, as the input batch has three rows
-            vec![Some(true), Some(true), Some(true)],
-            expr,
-            &schema
-        );
-
-        // 2 IN (1, 6)
-        let expr = lit(ScalarValue::Int32(Some(2)));
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            // should have three outputs, as the input batch has three rows
-            vec![Some(false), Some(false), Some(false)],
-            expr,
-            &schema
-        );
-
-        // NULL IN (1, 6)
-        let expr = lit(ScalarValue::Int32(None));
-        in_list!(
-            batch,
-            list.clone(),
-            &false,
-            // should have three outputs, as the input batch has three rows
-            vec![None, None, None],
-            expr,
+            // should have three outputs, as the input batch has three rows
+            vec![None, None, None],
+            expr,
             &schema
         );
 
@@ -1514,4 +1789,1722 @@ mod tests {
         assert_snapshot!(display_string, @"a@0 NOT IN (SET) ([a, b, NULL])");
         Ok(())
     }
+
+    #[test]
+    fn in_list_struct() -> Result<()> {
+        // Create schema with a struct column
+        let struct_fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Utf8, false),
+        ]);
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Struct(struct_fields.clone()),
+            true,
+        )]);
+
+        // Create test data: array of structs
+        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
+        let struct_array =
+            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
+
+        let col_a = col("a", &schema)?;
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
+
+        // Create literal structs for the IN list
+        // Struct {x: 1, y: "a"}
+        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1])),
+                Arc::new(StringArray::from(vec!["a"])),
+            ],
+            None,
+        )));
+
+        // Struct {x: 3, y: "c"}
+        let struct3 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![3])),
+                Arc::new(StringArray::from(vec!["c"])),
+            ],
+            None,
+        )));
+
+        // Test: a IN ({1, "a"}, {3, "c"})
+        let list = vec![lit(struct1.clone()), lit(struct3.clone())];
+        in_list_raw!(
+            batch,
+            list.clone(),
+            &false,
+            vec![Some(true), Some(false), Some(true)],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // Test: a NOT IN ({1, "a"}, {3, "c"})
+        in_list_raw!(
+            batch,
+            list,
+            &true,
+            vec![Some(false), Some(true), Some(false)],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn in_list_struct_with_nulls() -> Result<()> {
+        // Create schema with a struct column
+        let struct_fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Utf8, false),
+        ]);
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Struct(struct_fields.clone()),
+            true,
+        )]);
+
+        // Create test data with a null struct
+        let x_array = Arc::new(Int32Array::from(vec![1, 2]));
+        let y_array = Arc::new(StringArray::from(vec!["a", "b"]));
+        let struct_array = StructArray::new(
+            struct_fields.clone(),
+            vec![x_array, y_array],
+            Some(NullBuffer::from(vec![true, false])),
+        );
+
+        let col_a = col("a", &schema)?;
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
+
+        // Create literal struct for the IN list
+        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1])),
+                Arc::new(StringArray::from(vec!["a"])),
+            ],
+            None,
+        )));
+
+        // Test: a IN ({1, "a"})
+        let list = vec![lit(struct1.clone())];
+        in_list_raw!(
+            batch,
+            list.clone(),
+            &false,
+            vec![Some(true), None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // Test: a NOT IN ({1, "a"})
+        in_list_raw!(
+            batch,
+            list,
+            &true,
+            vec![Some(false), None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn in_list_struct_with_null_in_list() -> Result<()> {
+        // Create schema with a struct column
+        let struct_fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Utf8, false),
+        ]);
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Struct(struct_fields.clone()),
+            true,
+        )]);
+
+        // Create test data
+        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
+        let struct_array =
+            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
+
+        let col_a = col("a", &schema)?;
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
+
+        // Create literal structs including a NULL
+        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1])),
+                Arc::new(StringArray::from(vec!["a"])),
+            ],
+            None,
+        )));
+
+        let null_struct = ScalarValue::Struct(Arc::new(StructArray::new_null(
+            struct_fields.clone(),
+            1,
+        )));
+
+        // Test: a IN ({1, "a"}, NULL)
+        let list = vec![lit(struct1), lit(null_struct.clone())];
+        in_list_raw!(
+            batch,
+            list.clone(),
+            &false,
+            vec![Some(true), None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // Test: a NOT IN ({1, "a"}, NULL)
+        in_list_raw!(
+            batch,
+            list,
+            &true,
+            vec![Some(false), None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn in_list_nested_struct() -> Result<()> {
+        // Create nested struct schema
+        let inner_struct_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, false),
+        ]);
+        let outer_struct_fields = Fields::from(vec![
+            Field::new(
+                "inner",
+                DataType::Struct(inner_struct_fields.clone()),
+                false,
+            ),
+            Field::new("c", DataType::Int32, false),
+        ]);
+        let schema = Schema::new(vec![Field::new(
+            "x",
+            DataType::Struct(outer_struct_fields.clone()),
+            true,
+        )]);
+
+        // Create test data with nested structs
+        let inner1 = Arc::new(StructArray::new(
+            inner_struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2])),
+                Arc::new(StringArray::from(vec!["x", "y"])),
+            ],
+            None,
+        ));
+        let c_array = Arc::new(Int32Array::from(vec![10, 20]));
+        let outer_array =
+            StructArray::new(outer_struct_fields.clone(), vec![inner1, c_array], None);
+
+        let col_x = col("x", &schema)?;
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(outer_array)])?;
+
+        // Create a nested struct literal matching the first row
+        let inner_match = Arc::new(StructArray::new(
+            inner_struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1])),
+                Arc::new(StringArray::from(vec!["x"])),
+            ],
+            None,
+        ));
+        let outer_match = ScalarValue::Struct(Arc::new(StructArray::new(
+            outer_struct_fields.clone(),
+            vec![inner_match, Arc::new(Int32Array::from(vec![10]))],
+            None,
+        )));
+
+        // Test: x IN ({{1, "x"}, 10})
+        let list = vec![lit(outer_match)];
+        in_list_raw!(
+            batch,
+            list.clone(),
+            &false,
+            vec![Some(true), Some(false)],
+            Arc::clone(&col_x),
+            &schema
+        );
+
+        // Test: x NOT IN ({{1, "x"}, 10})
+        in_list_raw!(
+            batch,
+            list,
+            &true,
+            vec![Some(false), Some(true)],
+            Arc::clone(&col_x),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn in_list_struct_with_exprs_not_array() -> Result<()> {
+        // Test InList using expressions (not the array constructor) with structs
+        // By using InListExpr::new directly, we bypass the array optimization
+        // and use the Exprs variant, testing the expression evaluation path
+
+        // Create schema with a struct column {x: Int32, y: Utf8}
+        let struct_fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Utf8, false),
+        ]);
+        let schema = Schema::new(vec![Field::new(
+            "a",
+            DataType::Struct(struct_fields.clone()),
+            true,
+        )]);
+
+        // Create test data: array of structs [{1, "a"}, {2, "b"}, {3, "c"}]
+        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
+        let struct_array =
+            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
+
+        let col_a = col("a", &schema)?;
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
+
+        // Create struct literals with the SAME shape (so types are compatible)
+        // Struct {x: 1, y: "a"}
+        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1])),
+                Arc::new(StringArray::from(vec!["a"])),
+            ],
+            None,
+        )));
+
+        // Struct {x: 3, y: "c"}
+        let struct3 = ScalarValue::Struct(Arc::new(StructArray::new(
+            struct_fields.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![3])),
+                Arc::new(StringArray::from(vec!["c"])),
+            ],
+            None,
+        )));
+
+        // Create list of struct expressions
+        let list = vec![lit(struct1), lit(struct3)];
+
+        // Use InListExpr::new directly (not in_list()) to bypass array optimization
+        // This creates an InList without a static filter
+        let expr = Arc::new(InListExpr::new(Arc::clone(&col_a), list, false, None));
+
+        // Verify that the expression doesn't have a static filter
+        // by checking the display string does NOT contain "(SET)"
+        let display_string = expr.to_string();
+        assert!(
+            !display_string.contains("(SET)"),
+            "Expected display string to NOT contain '(SET)' (should use Exprs variant), but got: {display_string}",
+        );
+
+        // Evaluate the expression
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+
+        // Expected: first row {1, "a"} matches struct1,
+        //           second row {2, "b"} doesn't match,
+        //           third row {3, "c"} matches struct3
+        let expected = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
+        assert_eq!(result, &expected);
+
+        // Test NOT IN as well
+        let expr_not = Arc::new(InListExpr::new(
+            Arc::clone(&col_a),
+            vec![
+                lit(ScalarValue::Struct(Arc::new(StructArray::new(
+                    struct_fields.clone(),
+                    vec![
+                        Arc::new(Int32Array::from(vec![1])),
+                        Arc::new(StringArray::from(vec!["a"])),
+                    ],
+                    None,
+                )))),
+                lit(ScalarValue::Struct(Arc::new(StructArray::new(
+                    struct_fields.clone(),
+                    vec![
+                        Arc::new(Int32Array::from(vec![3])),
+                        Arc::new(StringArray::from(vec!["c"])),
+                    ],
+                    None,
+                )))),
+            ],
+            true,
+            None,
+        ));
+
+        let result_not = expr_not.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result_not = as_boolean_array(&result_not);
+
+        let expected_not = BooleanArray::from(vec![Some(false), Some(true), Some(false)]);
+        assert_eq!(result_not, &expected_not);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_null_handling_comprehensive() -> Result<()> {
+        // Comprehensive test demonstrating SQL three-valued logic for IN expressions
+        // This test explicitly shows all possible outcomes: true, false, and null
+        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
+
+        // Test data: [1, 2, 3, null]
+        // - 1 will match in both lists
+        // - 2 will not match in either list
+        // - 3 will not match in either list
+        // - null is always null
+        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), None]);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+
+        // Case 1: List WITHOUT null - demonstrates true/false/null outcomes
+        // "a IN (1, 4)" - 1 matches, 2 and 3 don't match, null is null
+        let list = vec![lit(1i64), lit(4i64)];
+        in_list!(
+            batch,
+            list,
+            &false,
+            vec![
+                Some(true),  // 1 is in the list → true
+                Some(false), // 2 is not in the list → false
+                Some(false), // 3 is not in the list → false
+                None,        // null IN (...) → null (SQL three-valued logic)
+            ],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // Case 2: List WITH null - demonstrates null propagation for non-matches
+        // "a IN (1, NULL)" - 1 matches (true), 2/3 don't match but list has null (null), null is null
+        let list = vec![lit(1i64), lit(ScalarValue::Int64(None))];
+        in_list!(
+            batch,
+            list,
+            &false,
+            vec![
+                Some(true), // 1 is in the list → true (found match)
+                None, // 2 is not in list, but list has NULL → null (might match NULL)
+                None, // 3 is not in list, but list has NULL → null (might match NULL)
+                None, // null IN (...) → null (SQL three-valued logic)
+            ],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_with_only_nulls() -> Result<()> {
+        // Edge case: IN list contains ONLY null values
+        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
+        let a = Int64Array::from(vec![Some(1), Some(2), None]);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+
+        // "a IN (NULL, NULL)" - list has only nulls
+        let list = vec![lit(ScalarValue::Int64(None)), lit(ScalarValue::Int64(None))];
+
+        // All results should be NULL because:
+        // - Non-null values (1, 2) can't match anything concrete, but list might contain matching value
+        // - NULL value is always NULL in IN expressions
+        in_list!(
+            batch,
+            list.clone(),
+            &false,
+            vec![None, None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // "a NOT IN (NULL, NULL)" - list has only nulls
+        // All results should still be NULL due to three-valued logic
+        in_list!(
+            batch,
+            list,
+            &true,
+            vec![None, None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_multiple_nulls_deduplication() -> Result<()> {
+        // Test that multiple NULLs in the list are handled correctly
+        // This verifies deduplication doesn't break null handling
+        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
+        let col_a = col("a", &schema)?;
+
+        // Create array with multiple nulls: [1, 2, NULL, NULL, 3, NULL]
+        let array = Arc::new(Int64Array::from(vec![
+            Some(1),
+            Some(2),
+            None,
+            None,
+            Some(3),
+            None,
+        ])) as ArrayRef;
+
+        // Create InListExpr from array
+        let expr = Arc::new(InListExpr::try_new_from_array(
+            Arc::clone(&col_a),
+            array,
+            false,
+        )?) as Arc<dyn PhysicalExpr>;
+
+        // Create test data: [1, 2, 3, 4, null]
+        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), Some(4), None]);
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+
+        // Evaluate the expression
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+
+        // Expected behavior with multiple NULLs in list:
+        // - Values in the list (1,2,3) → true
+        // - Values not in the list (4) → NULL (because list contains NULL)
+        // - NULL input → NULL
+        let expected = BooleanArray::from(vec![
+            Some(true), // 1 is in list
+            Some(true), // 2 is in list
+            Some(true), // 3 is in list
+            None,       // 4 not in list, but list has NULLs
+            None,       // NULL input
+        ]);
+        assert_eq!(result, &expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_not_in_null_handling_comprehensive() -> Result<()> {
+        // Comprehensive test demonstrating SQL three-valued logic for NOT IN expressions
+        // This test explicitly shows all possible outcomes for NOT IN: true, false, and null
+        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
+
+        // Test data: [1, 2, 3, null]
+        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), None]);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+
+        // Case 1: List WITHOUT null - demonstrates true/false/null outcomes for NOT IN
+        // "a NOT IN (1, 4)" - 1 matches (false), 2 and 3 don't match (true), null is null
+        let list = vec![lit(1i64), lit(4i64)];
+        in_list!(
+            batch,
+            list,
+            &true,
+            vec![
+                Some(false), // 1 is in the list → NOT IN returns false
+                Some(true),  // 2 is not in the list → NOT IN returns true
+                Some(true),  // 3 is not in the list → NOT IN returns true
+                None,        // null NOT IN (...) → null (SQL three-valued logic)
+            ],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // Case 2: List WITH null - demonstrates null propagation for NOT IN
+        // "a NOT IN (1, NULL)" - 1 matches (false), 2/3 don't match but list has null (null), null is null
+        let list = vec![lit(1i64), lit(ScalarValue::Int64(None))];
+        in_list!(
+            batch,
+            list,
+            &true,
+            vec![
+                Some(false), // 1 is in the list → NOT IN returns false
+                None, // 2 is not in known values, but list has NULL → null (can't prove it's not in list)
+                None, // 3 is not in known values, but list has NULL → null (can't prove it's not in list)
+                None, // null NOT IN (...) → null (SQL three-valued logic)
+            ],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_null_type_column() -> Result<()> {
+        // Test with a column that has DataType::Null (not just nullable values)
+        // All values in a NullArray are null by definition
+        let schema = Schema::new(vec![Field::new("a", DataType::Null, true)]);
+        let a = NullArray::new(3);
+        let col_a = col("a", &schema)?;
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+
+        // "null_column IN (1, 2)" - comparing Null type against Int64 list
+        // Note: This tests type coercion behavior between Null and Int64
+        let list = vec![lit(1i64), lit(2i64)];
+
+        // All results should be NULL because:
+        // - Every value in the column is null (DataType::Null)
+        // - null IN (anything) always returns null per SQL three-valued logic
+        in_list!(
+            batch,
+            list.clone(),
+            &false,
+            vec![None, None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        // "null_column NOT IN (1, 2)"
+        // Same behavior for NOT IN - null NOT IN (anything) is still null
+        in_list!(
+            batch,
+            list,
+            &true,
+            vec![None, None, None],
+            Arc::clone(&col_a),
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_null_type_list() -> Result<()> {
+        // Test with a list that has DataType::Null
+        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
+        let a = Int64Array::from(vec![Some(1), Some(2), None]);
+        let col_a = col("a", &schema)?;
+
+        // Create a NullArray as the list
+        let null_array = Arc::new(NullArray::new(2)) as ArrayRef;
+
+        // Try to create InListExpr with a NullArray list
+        // This tests whether try_new_from_array can handle Null type arrays
+        let expr = Arc::new(InListExpr::try_new_from_array(
+            Arc::clone(&col_a),
+            null_array,
+            false,
+        )?) as Arc<dyn PhysicalExpr>;
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+
+        // If it succeeds, all results should be NULL
+        // because the list contains only null type values
+        let expected = BooleanArray::from(vec![None, None, None]);
+        assert_eq!(result, &expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_null_type_both() -> Result<()> {
+        // Test when both column and list are DataType::Null
+        let schema = Schema::new(vec![Field::new("a", DataType::Null, true)]);
+        let a = NullArray::new(3);
+        let col_a = col("a", &schema)?;
+
+        // Create a NullArray as the list
+        let null_array = Arc::new(NullArray::new(2)) as ArrayRef;
+
+        // Try to create InListExpr with both Null types
+        let expr = Arc::new(InListExpr::try_new_from_array(
+            Arc::clone(&col_a),
+            null_array,
+            false,
+        )?) as Arc<dyn PhysicalExpr>;
+
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_boolean_array(&result);
+
+        // If successful, all results should be NULL
+        // null IN [null, null] -> null
+        let expected = BooleanArray::from(vec![None, None, None]);
+        assert_eq!(result, &expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_comprehensive_null_handling() -> Result<()> {
+        // Comprehensive test for IN LIST operations with various NULL handling scenarios.
+        // This test covers the key cases validated against DuckDB as the source of truth.
+        //
+        // Note: Some scalar literal tests (like NULL IN (1, 2)) are omitted as they
+        // appear to expose an issue with static filter optimization. These are covered
+        // by existing tests like in_list_no_cols().
+
+        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
+        let col_b = col("b", &schema)?;
+        let null_i32 = ScalarValue::Int32(None);
+
+        // Helper to create a batch
+        let make_batch = |values: Vec<Option<i32>>| -> Result<RecordBatch> {
+            let array = Arc::new(Int32Array::from(values));
+            Ok(RecordBatch::try_new(Arc::clone(&schema), vec![array])?)
+        };
+
+        // Helper to run a test
+        let run_test = |batch: &RecordBatch,
+                        expr: Arc<dyn PhysicalExpr>,
+                        list: Vec<Arc<dyn PhysicalExpr>>,
+                        expected: Vec<Option<bool>>|
+         -> Result<()> {
+            let in_expr = in_list(expr, list, &false, schema.as_ref())?;
+            let result = in_expr.evaluate(batch)?.into_array(batch.num_rows())?;
+            let result = as_boolean_array(&result);
+            assert_eq!(result, &BooleanArray::from(expected));
+            Ok(())
+        };
+
+        // ========================================================================
+        // COLUMN TESTS - col(b) IN [1, 2]
+        // ========================================================================
+
+        // [1] IN (1, 2) => [TRUE]
+        let batch = make_batch(vec![Some(1)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), lit(2i32)],
+            vec![Some(true)],
+        )?;
+
+        // [1, 2] IN (1, 2) => [TRUE, TRUE]
+        let batch = make_batch(vec![Some(1), Some(2)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), lit(2i32)],
+            vec![Some(true), Some(true)],
+        )?;
+
+        // [3, 4] IN (1, 2) => [FALSE, FALSE]
+        let batch = make_batch(vec![Some(3), Some(4)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), lit(2i32)],
+            vec![Some(false), Some(false)],
+        )?;
+
+        // [1, NULL] IN (1, 2) => [TRUE, NULL]
+        let batch = make_batch(vec![Some(1), None])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), lit(2i32)],
+            vec![Some(true), None],
+        )?;
+
+        // [3, NULL] IN (1, 2) => [FALSE, NULL] (no match, NULL is NULL)
+        let batch = make_batch(vec![Some(3), None])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), lit(2i32)],
+            vec![Some(false), None],
+        )?;
+
+        // ========================================================================
+        // COLUMN WITH NULL IN LIST - col(b) IN [NULL, 1]
+        // ========================================================================
+
+        // [1] IN (NULL, 1) => [TRUE] (found match)
+        let batch = make_batch(vec![Some(1)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(null_i32.clone()), lit(1i32)],
+            vec![Some(true)],
+        )?;
+
+        // [2] IN (NULL, 1) => [NULL] (no match, but list has NULL)
+        let batch = make_batch(vec![Some(2)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(null_i32.clone()), lit(1i32)],
+            vec![None],
+        )?;
+
+        // [NULL] IN (NULL, 1) => [NULL]
+        let batch = make_batch(vec![None])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(null_i32.clone()), lit(1i32)],
+            vec![None],
+        )?;
+
+        // ========================================================================
+        // COLUMN WITH ALL NULLS IN LIST - col(b) IN [NULL, NULL]
+        // ========================================================================
+
+        // [1] IN (NULL, NULL) => [NULL]
+        let batch = make_batch(vec![Some(1)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(null_i32.clone()), lit(null_i32.clone())],
+            vec![None],
+        )?;
+
+        // [NULL] IN (NULL, NULL) => [NULL]
+        let batch = make_batch(vec![None])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(null_i32.clone()), lit(null_i32.clone())],
+            vec![None],
+        )?;
+
+        // ========================================================================
+        // LITERAL IN LIST WITH COLUMN - lit(1) IN [2, col(b)]
+        // ========================================================================
+
+        // 1 IN (2, [1]) => [TRUE] (matches column value)
+        let batch = make_batch(vec![Some(1)])?;
+        run_test(
+            &batch,
+            lit(1i32),
+            vec![lit(2i32), Arc::clone(&col_b)],
+            vec![Some(true)],
+        )?;
+
+        // 1 IN (2, [3]) => [FALSE] (no match)
+        let batch = make_batch(vec![Some(3)])?;
+        run_test(
+            &batch,
+            lit(1i32),
+            vec![lit(2i32), Arc::clone(&col_b)],
+            vec![Some(false)],
+        )?;
+
+        // 1 IN (2, [NULL]) => [NULL] (no match, column is NULL)
+        let batch = make_batch(vec![None])?;
+        run_test(
+            &batch,
+            lit(1i32),
+            vec![lit(2i32), Arc::clone(&col_b)],
+            vec![None],
+        )?;
+
+        // ========================================================================
+        // COLUMN IN LIST CONTAINING ITSELF - col(b) IN [1, col(b)]
+        // ========================================================================
+
+        // [1] IN (1, [1]) => [TRUE] (always matches - either list literal or itself)
+        let batch = make_batch(vec![Some(1)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), Arc::clone(&col_b)],
+            vec![Some(true)],
+        )?;
+
+        // [2] IN (1, [2]) => [TRUE] (matches itself)
+        let batch = make_batch(vec![Some(2)])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), Arc::clone(&col_b)],
+            vec![Some(true)],
+        )?;
+
+        // [NULL] IN (1, [NULL]) => [NULL] (NULL is never equal to anything)
+        let batch = make_batch(vec![None])?;
+        run_test(
+            &batch,
+            Arc::clone(&col_b),
+            vec![lit(1i32), Arc::clone(&col_b)],
+            vec![None],
+        )?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_scalar_literal_cases() -> Result<()> {
+        // Test scalar literal cases (both NULL and non-NULL) to ensure SQL three-valued
+        // logic is correctly implemented. This covers the important case where a scalar
+        // value is tested against a list containing NULL.
+
+        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
+        let null_i32 = ScalarValue::Int32(None);
+
+        // Helper to create a batch
+        let make_batch = |values: Vec<Option<i32>>| -> Result<RecordBatch> {
+            let array = Arc::new(Int32Array::from(values));
+            Ok(RecordBatch::try_new(Arc::clone(&schema), vec![array])?)
+        };
+
+        // Helper to run a test
+        let run_test = |batch: &RecordBatch,
+                        expr: Arc<dyn PhysicalExpr>,
+                        list: Vec<Arc<dyn PhysicalExpr>>,
+                        negated: bool,
+                        expected: Vec<Option<bool>>|
+         -> Result<()> {
+            let in_expr = in_list(expr, list, &negated, schema.as_ref())?;
+            let result = in_expr.evaluate(batch)?.into_array(batch.num_rows())?;
+            let result = as_boolean_array(&result);
+            let expected_array = BooleanArray::from(expected);
+            assert_eq!(
+                result,
+                &expected_array,
+                "Expected {:?}, got {:?}",
+                expected_array,
+                result.iter().collect::<Vec<_>>()
+            );
+            Ok(())
+        };
+
+        let batch = make_batch(vec![Some(1)])?;
+
+        // ========================================================================
+        // NULL LITERAL TESTS
+        // According to SQL semantics, NULL IN (any_list) should always return NULL
+        // ========================================================================
+
+        // NULL IN (1, 1) => NULL
+        run_test(
+            &batch,
+            lit(null_i32.clone()),
+            vec![lit(1i32), lit(1i32)],
+            false,
+            vec![None],
+        )?;
+
+        // NULL IN (NULL, 1) => NULL
+        run_test(
+            &batch,
+            lit(null_i32.clone()),
+            vec![lit(null_i32.clone()), lit(1i32)],
+            false,
+            vec![None],
+        )?;
+
+        // NULL IN (NULL, NULL) => NULL
+        run_test(
+            &batch,
+            lit(null_i32.clone()),
+            vec![lit(null_i32.clone()), lit(null_i32.clone())],
+            false,
+            vec![None],
+        )?;
+
+        // ========================================================================
+        // NON-NULL SCALAR LITERALS WITH NULL IN LIST - Int32
+        // When a scalar value is NOT in a list containing NULL, the result is NULL
+        // When a scalar value IS in the list, the result is TRUE (NULL doesn't matter)
+        // ========================================================================
+
+        // 3 IN (0, 1, 2, NULL) => NULL (not in list, but list has NULL)
+        run_test(
+            &batch,
+            lit(3i32),
+            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
+            false,
+            vec![None],
+        )?;
+
+        // 3 NOT IN (0, 1, 2, NULL) => NULL (not in list, but list has NULL)
+        run_test(
+            &batch,
+            lit(3i32),
+            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
+            true,
+            vec![None],
+        )?;
+
+        // 1 IN (0, 1, 2, NULL) => TRUE (found match, NULL doesn't matter)
+        run_test(
+            &batch,
+            lit(1i32),
+            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
+            false,
+            vec![Some(true)],
+        )?;
+
+        // 1 NOT IN (0, 1, 2, NULL) => FALSE (found match, NULL doesn't matter)
+        run_test(
+            &batch,
+            lit(1i32),
+            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
+            true,
+            vec![Some(false)],
+        )?;
+
+        // ========================================================================
+        // NON-NULL SCALAR LITERALS WITH NULL IN LIST - String
+        // Same semantics as Int32 but with string type
+        // ========================================================================
+
+        let schema_str =
+            Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
+        let batch_str = RecordBatch::try_new(
+            Arc::clone(&schema_str),
+            vec![Arc::new(StringArray::from(vec![Some("dummy")]))],
+        )?;
+        let null_str = ScalarValue::Utf8(None);
+
+        let run_test_str = |expr: Arc<dyn PhysicalExpr>,
+                            list: Vec<Arc<dyn PhysicalExpr>>,
+                            negated: bool,
+                            expected: Vec<Option<bool>>|
+         -> Result<()> {
+            let in_expr = in_list(expr, list, &negated, schema_str.as_ref())?;
+            let result = in_expr
+                .evaluate(&batch_str)?
+                .into_array(batch_str.num_rows())?;
+            let result = as_boolean_array(&result);
+            let expected_array = BooleanArray::from(expected);
+            assert_eq!(
+                result,
+                &expected_array,
+                "Expected {:?}, got {:?}",
+                expected_array,
+                result.iter().collect::<Vec<_>>()
+            );
+            Ok(())
+        };
+
+        // 'c' IN ('a', 'b', NULL) => NULL (not in list, but list has NULL)
+        run_test_str(
+            lit("c"),
+            vec![lit("a"), lit("b"), lit(null_str.clone())],
+            false,
+            vec![None],
+        )?;
+
+        // 'c' NOT IN ('a', 'b', NULL) => NULL (not in list, but list has NULL)
+        run_test_str(
+            lit("c"),
+            vec![lit("a"), lit("b"), lit(null_str.clone())],
+            true,
+            vec![None],
+        )?;
+
+        // 'a' IN ('a', 'b', NULL) => TRUE (found match, NULL doesn't matter)
+        run_test_str(
+            lit("a"),
+            vec![lit("a"), lit("b"), lit(null_str.clone())],
+            false,
+            vec![Some(true)],
+        )?;
+
+        // 'a' NOT IN ('a', 'b', NULL) => FALSE (found match, NULL doesn't matter)
+        run_test_str(
+            lit("a"),
+            vec![lit("a"), lit("b"), lit(null_str.clone())],
+            true,
+            vec![Some(false)],
+        )?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_tuple_cases() -> Result<()> {
+        // Test tuple/struct cases from the original request: (lit, lit) IN (lit, lit)
+        // These test row-wise comparisons like (1, 2) IN ((1, 2), (3, 4))
+
+        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
+
+        // Helper to create struct scalars for tuple comparisons
+        let make_struct = |v1: Option<i32>, v2: Option<i32>| -> ScalarValue {
+            let fields = Fields::from(vec![
+                Field::new("field_0", DataType::Int32, true),
+                Field::new("field_1", DataType::Int32, true),
+            ]);
+            ScalarValue::Struct(Arc::new(StructArray::new(
+                fields,
+                vec![
+                    Arc::new(Int32Array::from(vec![v1])),
+                    Arc::new(Int32Array::from(vec![v2])),
+                ],
+                None,
+            )))
+        };
+
+        // Need a single row batch for scalar tests
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Int32Array::from(vec![Some(1)]))],
+        )?;
+
+        // Helper to run tuple tests
+        let run_tuple_test = |lhs: ScalarValue,
+                              list: Vec<ScalarValue>,
+                              expected: Vec<Option<bool>>|
+         -> Result<()> {
+            let expr = in_list(
+                lit(lhs),
+                list.into_iter().map(lit).collect(),
+                &false,
+                schema.as_ref(),
+            )?;
+            let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+            let result = as_boolean_array(&result);
+            assert_eq!(result, &BooleanArray::from(expected));
+            Ok(())
+        };
+
+        // (NULL, NULL) IN ((1, 2)) => FALSE (tuples don't match)
+        run_tuple_test(
+            make_struct(None, None),
+            vec![make_struct(Some(1), Some(2))],
+            vec![Some(false)],
+        )?;
+
+        // (NULL, NULL) IN ((NULL, 1)) => FALSE
+        run_tuple_test(
+            make_struct(None, None),
+            vec![make_struct(None, Some(1))],
+            vec![Some(false)],
+        )?;
+
+        // (NULL, NULL) IN ((NULL, NULL)) => TRUE (exact match including nulls)
+        run_tuple_test(
+            make_struct(None, None),
+            vec![make_struct(None, None)],
+            vec![Some(true)],
+        )?;
+
+        // (NULL, 1) IN ((1, 2)) => FALSE
+        run_tuple_test(
+            make_struct(None, Some(1)),
+            vec![make_struct(Some(1), Some(2))],
+            vec![Some(false)],
+        )?;
+
+        // (NULL, 1) IN ((NULL, 1)) => TRUE (exact match)
+        run_tuple_test(
+            make_struct(None, Some(1)),
+            vec![make_struct(None, Some(1))],
+            vec![Some(true)],
+        )?;
+
+        // (NULL, 1) IN ((NULL, NULL)) => FALSE
+        run_tuple_test(
+            make_struct(None, Some(1)),
+            vec![make_struct(None, None)],
+            vec![Some(false)],
+        )?;
+
+        // (1, 2) IN ((1, 2)) => TRUE
+        run_tuple_test(
+            make_struct(Some(1), Some(2)),
+            vec![make_struct(Some(1), Some(2))],
+            vec![Some(true)],
+        )?;
+
+        // (1, 3) IN ((1, 2)) => FALSE
+        run_tuple_test(
+            make_struct(Some(1), Some(3)),
+            vec![make_struct(Some(1), Some(2))],
+            vec![Some(false)],
+        )?;
+
+        // (4, 4) IN ((1, 2)) => FALSE
+        run_tuple_test(
+            make_struct(Some(4), Some(4)),
+            vec![make_struct(Some(1), Some(2))],
+            vec![Some(false)],
+        )?;
+
+        // (1, 1) IN ((NULL, 1)) => FALSE
+        run_tuple_test(
+            make_struct(Some(1), Some(1)),
+            vec![make_struct(None, Some(1))],
+            vec![Some(false)],
+        )?;
+
+        // (1, 1) IN ((NULL, NULL)) => FALSE
+        run_tuple_test(
+            make_struct(Some(1), Some(1)),
+            vec![make_struct(None, None)],
+            vec![Some(false)],
+        )?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_dictionary_int32() -> Result<()> {
+        // Create schema with dictionary-encoded Int32 column
+        let dict_type =
+            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32));
+        let schema = Schema::new(vec![Field::new("a", dict_type.clone(), false)]);
+        let col_a = col("a", &schema)?;
+
+        // Create IN list with Int32 literals: (100, 200, 300)
+        let list = vec![lit(100i32), lit(200i32), lit(300i32)];
+
+        // Create InListExpr via in_list() - this uses Int32StaticFilter for Int32 lists
+        let expr = in_list(col_a, list, &false, &schema)?;
+
+        // Create dictionary-encoded batch with values [100, 200, 500]
+        // Dictionary: keys [0, 1, 2] -> values [100, 200, 500]
+        // Using values clearly distinct from keys to avoid confusion
+        let keys = Int8Array::from(vec![0, 1, 2]);
+        let values = Int32Array::from(vec![100, 200, 500]);
+        let dict_array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values))?);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![dict_array])?;
+
+        // Expected: [100 IN (100,200,300), 200 IN (100,200,300), 500 IN (100,200,300)] = [true, true, false]
+        let result = expr.evaluate(&batch)?.into_array(3)?;
+        let result = as_boolean_array(&result);
+        assert_eq!(result, &BooleanArray::from(vec![true, true, false]));
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_dictionary_types() -> Result<()> {
+        // Helper functions for creating dictionary literals
+        fn dict_lit_int64(key_type: DataType, value: i64) -> Arc<dyn PhysicalExpr> {
+            lit(ScalarValue::Dictionary(
+                Box::new(key_type),
+                Box::new(ScalarValue::Int64(Some(value))),
+            ))
+        }
+
+        fn dict_lit_float64(key_type: DataType, value: f64) -> Arc<dyn PhysicalExpr> {
+            lit(ScalarValue::Dictionary(
+                Box::new(key_type),
+                Box::new(ScalarValue::Float64(Some(value))),
+            ))
+        }
+
+        // Test case structures
+        struct DictNeedleTest {
+            list_values: Vec<Arc<dyn PhysicalExpr>>,
+            expected: Vec<Option<bool>>,
+        }
+
+        struct DictionaryInListTestCase {
+            name: &'static str,
+            dict_type: DataType,
+            dict_keys: Vec<Option<i8>>,
+            dict_values: ArrayRef,
+            list_values_no_null: Vec<Arc<dyn PhysicalExpr>>,
+            list_values_with_null: Vec<Arc<dyn PhysicalExpr>>,
+            expected_1: Vec<Option<bool>>,
+            expected_2: Vec<Option<bool>>,
+            expected_3: Vec<Option<bool>>,
+            expected_4: Vec<Option<bool>>,
+            dict_needle_test: Option<DictNeedleTest>,
+        }
+
+        // Test harness function
+        fn run_dictionary_in_list_test(
+            test_case: DictionaryInListTestCase,
+        ) -> Result<()> {
+            // Create schema with dictionary type
+            let schema =
+                Schema::new(vec![Field::new("a", test_case.dict_type.clone(), true)]);
+            let col_a = col("a", &schema)?;
+
+            // Create dictionary array from keys and values
+            let keys = Int8Array::from(test_case.dict_keys.clone());
+            let dict_array: ArrayRef =
+                Arc::new(DictionaryArray::try_new(keys, test_case.dict_values)?);
+            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![dict_array])?;
+
+            let exp1 = test_case.expected_1.clone();
+            let exp2 = test_case.expected_2.clone();
+            let exp3 = test_case.expected_3.clone();
+            let exp4 = test_case.expected_4;
+
+            // Test 1: a IN (values_no_null)
+            in_list!(
+                batch,
+                test_case.list_values_no_null.clone(),
+                &false,
+                exp1,
+                Arc::clone(&col_a),
+                &schema
+            );
+
+            // Test 2: a NOT IN (values_no_null)
+            in_list!(
+                batch,
+                test_case.list_values_no_null.clone(),
+                &true,
+                exp2,
+                Arc::clone(&col_a),
+                &schema
+            );
+
+            // Test 3: a IN (values_with_null)
+            in_list!(
+                batch,
+                test_case.list_values_with_null.clone(),
+                &false,
+                exp3,
+                Arc::clone(&col_a),
+                &schema
+            );
+
+            // Test 4: a NOT IN (values_with_null)
+            in_list!(
+                batch,
+                test_case.list_values_with_null,
+                &true,
+                exp4,
+                Arc::clone(&col_a),
+                &schema
+            );
+
+            // Optional: Dictionary needle test (if provided)
+            if let Some(needle_test) = test_case.dict_needle_test {
+                in_list_raw!(
+                    batch,
+                    needle_test.list_values,
+                    &false,
+                    needle_test.expected,
+                    Arc::clone(&col_a),
+                    &schema
+                );
+            }
+
+            Ok(())
+        }
+
+        // Test case 1: UTF8
+        // Dictionary: keys [0, 1, null] → values ["a", "d", -]
+        // Rows: ["a", "d", null]
+        let utf8_case = DictionaryInListTestCase {
+            name: "dictionary_utf8",
+            dict_type: DataType::Dictionary(
+                Box::new(DataType::Int8),
+                Box::new(DataType::Utf8),
+            ),
+            dict_keys: vec![Some(0), Some(1), None],
+            dict_values: Arc::new(StringArray::from(vec![Some("a"), Some("d")])),
+            list_values_no_null: vec![lit("a"), lit("b")],
+            list_values_with_null: vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))],
+            expected_1: vec![Some(true), Some(false), None],
+            expected_2: vec![Some(false), Some(true), None],
+            expected_3: vec![Some(true), None, None],
+            expected_4: vec![Some(false), None, None],
+            dict_needle_test: None,
+        };
+
+        // Test case 2: Int64 with dictionary needles
+        // Dictionary: keys [0, 1, null] → values [10, 20, -]
+        // Rows: [10, 20, null]
+        let int64_case = DictionaryInListTestCase {
+            name: "dictionary_int64",
+            dict_type: DataType::Dictionary(
+                Box::new(DataType::Int8),
+                Box::new(DataType::Int64),
+            ),
+            dict_keys: vec![Some(0), Some(1), None],
+            dict_values: Arc::new(Int64Array::from(vec![Some(10), Some(20)])),
+            list_values_no_null: vec![lit(10i64), lit(15i64)],
+            list_values_with_null: vec![
+                lit(10i64),
+                lit(15i64),
+                lit(ScalarValue::Int64(None)),
+            ],
+            expected_1: vec![Some(true), Some(false), None],
+            expected_2: vec![Some(false), Some(true), None],
+            expected_3: vec![Some(true), None, None],
+            expected_4: vec![Some(false), None, None],
+            dict_needle_test: Some(DictNeedleTest {
+                list_values: vec![
+                    dict_lit_int64(DataType::Int16, 10),
+                    dict_lit_int64(DataType::Int16, 15),
+                ],
+                expected: vec![Some(true), Some(false), None],
+            }),
+        };
+
+        // Test case 3: Float64 with NaN and dictionary needles
+        // Dictionary: keys [0, 1, null, 2] → values [1.5, 3.7, NaN, -]
+        // Rows: [1.5, 3.7, null, NaN]
+        // Note: NaN is a value (not null), so it goes in the values array
+        let float64_case = DictionaryInListTestCase {
+            name: "dictionary_float64",
+            dict_type: DataType::Dictionary(
+                Box::new(DataType::Int8),
+                Box::new(DataType::Float64),
+            ),
+            dict_keys: vec![Some(0), Some(1), None, Some(2)],
+            dict_values: Arc::new(Float64Array::from(vec![
+                Some(1.5),      // index 0
+                Some(3.7),      // index 1
+                Some(f64::NAN), // index 2
+            ])),
+            list_values_no_null: vec![lit(1.5f64), lit(2.0f64)],
+            list_values_with_null: vec![
+                lit(1.5f64),
+                lit(2.0f64),
+                lit(ScalarValue::Float64(None)),
+            ],
+            // Test 1: a IN (1.5, 2.0) → [true, false, null, false]
+            // NaN is false because NaN not in list and no NULL in list
+            expected_1: vec![Some(true), Some(false), None, Some(false)],
+            // Test 2: a NOT IN (1.5, 2.0) → [false, true, null, true]
+            // NaN is true because NaN not in list
+            expected_2: vec![Some(false), Some(true), None, Some(true)],
+            // Test 3: a IN (1.5, 2.0, NULL) → [true, null, null, null]
+            // 3.7 and NaN become null due to NULL in list (three-valued logic)
+            expected_3: vec![Some(true), None, None, None],
+            // Test 4: a NOT IN (1.5, 2.0, NULL) → [false, null, null, null]
+            // 3.7 and NaN become null due to NULL in list
+            expected_4: vec![Some(false), None, None, None],
+            dict_needle_test: Some(DictNeedleTest {
+                list_values: vec![
+                    dict_lit_float64(DataType::UInt16, 1.5),
+                    dict_lit_float64(DataType::UInt16, 2.0),
+                ],
+                expected: vec![Some(true), Some(false), None, Some(false)],
+            }),
+        };
+
+        // Execute all test cases
+        let test_name = utf8_case.name;
+        run_dictionary_in_list_test(utf8_case).map_err(|e| {
+            datafusion_common::DataFusionError::Execution(format!(
+                "Dictionary test '{test_name}' failed: {e}"
+            ))
+        })?;
+
+        let test_name = int64_case.name;
+        run_dictionary_in_list_test(int64_case).map_err(|e| {
+            datafusion_common::DataFusionError::Execution(format!(
+                "Dictionary test '{test_name}' failed: {e}"
+            ))
+        })?;
+
+        let test_name = float64_case.name;
+        run_dictionary_in_list_test(float64_case).map_err(|e| {
+            datafusion_common::DataFusionError::Execution(format!(
+                "Dictionary test '{test_name}' failed: {e}"
+            ))
+        })?;
+
+        // Additional test: Dictionary deduplication with repeated keys
+        // This tests that multiple rows with the same key (pointing to the same value)
+        // are evaluated correctly
+        let dedup_case = DictionaryInListTestCase {
+            name: "dictionary_deduplication",
+            dict_type: DataType::Dictionary(
+                Box::new(DataType::Int8),
+                Box::new(DataType::Utf8),
+            ),
+            // Keys: [0, 1, 0, 1, null] - keys 0 and 1 are repeated
+            // This creates data: ["a", "d", "a", "d", null]
+            dict_keys: vec![Some(0), Some(1), Some(0), Some(1), None],
+            dict_values: Arc::new(StringArray::from(vec![Some("a"), Some("d")])),
+            list_values_no_null: vec![lit("a"), lit("b")],
+            list_values_with_null: vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))],
+            // Test 1: a IN ("a", "b") → [true, false, true, false, null]
+            // Rows 0 and 2 both have key 0 → "a", so both are true
+            expected_1: vec![Some(true), Some(false), Some(true), Some(false), None],
+            // Test 2: a NOT IN ("a", "b") → [false, true, false, true, null]
+            expected_2: vec![Some(false), Some(true), Some(false), Some(true), None],
+            // Test 3: a IN ("a", "b", NULL) → [true, null, true, null, null]
+            // "d" becomes null due to NULL in list
+            expected_3: vec![Some(true), None, Some(true), None, None],
+            // Test 4: a NOT IN ("a", "b", NULL) → [false, null, false, null, null]
+            expected_4: vec![Some(false), None, Some(false), None, None],
+            dict_needle_test: None,
+        };
+
+        let test_name = dedup_case.name;
+        run_dictionary_in_list_test(dedup_case).map_err(|e| {
+            datafusion_common::DataFusionError::Execution(format!(
+                "Dictionary test '{test_name}' failed: {e}"
+            ))
+        })?;
+
+        // Additional test for Float64 NaN in IN list
+        let dict_type =
+            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Float64));
+        let schema = Schema::new(vec![Field::new("a", dict_type.clone(), true)]);
+        let col_a = col("a", &schema)?;
+
+        let keys = Int8Array::from(vec![Some(0), Some(1), None, Some(2)]);
+        let values = Float64Array::from(vec![Some(1.5), Some(3.7), Some(f64::NAN)]);
+        let dict_array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values))?);
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![dict_array])?;
+
+        // Test: a IN (1.5, 2.0, NaN)
+        let list_with_nan = vec![lit(1.5f64), lit(2.0f64), lit(f64::NAN)];
+        in_list!(
+            batch,
+            list_with_nan,
+            &false,
+            vec![Some(true), Some(false), None, Some(true)],
+            col_a,
+            &schema
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_in_list_esoteric_types() -> Result<()> {
+        // Test esoteric/less common types to validate the transform and mapping flow.
+        // These types are reinterpreted to base primitive types (e.g., Timestamp -> UInt64,
+        // Interval -> Decimal128, Float16 -> UInt16). We just need to verify basic
+        // functionality works - no need for comprehensive null handling tests.
+
+        // Helper: simple IN test that expects [Some(true), Some(false)]
+        let test_type = |data_type: DataType,
+                         in_array: ArrayRef,
+                         list_values: Vec<ScalarValue>|
+         -> Result<()> {
+            let schema = Schema::new(vec![Field::new("a", data_type.clone(), false)]);
+            let col_a = col("a", &schema)?;
+            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![in_array])?;
+
+            let list = list_values.into_iter().map(lit).collect();
+            in_list!(
+                batch,
+                list,
+                &false,
+                vec![Some(true), Some(false)],
+                col_a,
+                &schema
+            );
+            Ok(())
+        };
+
+        // Timestamp types (all units map to Int64 -> UInt64)
+        test_type(
+            DataType::Timestamp(TimeUnit::Second, None),
+            Arc::new(TimestampSecondArray::from(vec![Some(1000), Some(2000)])),
+            vec![
+                ScalarValue::TimestampSecond(Some(1000), None),
+                ScalarValue::TimestampSecond(Some(1500), None),
+            ],
+        )?;
+
+        test_type(
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            Arc::new(TimestampMillisecondArray::from(vec![
+                Some(1000000),
+                Some(2000000),
+            ])),
+            vec![
+                ScalarValue::TimestampMillisecond(Some(1000000), None),
+                ScalarValue::TimestampMillisecond(Some(1500000), None),
+            ],
+        )?;
+
+        test_type(
+            DataType::Timestamp(TimeUnit::Microsecond, None),
+            Arc::new(TimestampMicrosecondArray::from(vec![
+                Some(1000000000),
+                Some(2000000000),
+            ])),
+            vec![
+                ScalarValue::TimestampMicrosecond(Some(1000000000), None),
+                ScalarValue::TimestampMicrosecond(Some(1500000000), None),
+            ],
+        )?;
+
+        // Time32 and Time64 (map to Int32 -> UInt32 and Int64 -> UInt64 respectively)
+        test_type(
+            DataType::Time32(TimeUnit::Second),
+            Arc::new(Time32SecondArray::from(vec![Some(3600), Some(7200)])),
+            vec![
+                ScalarValue::Time32Second(Some(3600)),
+                ScalarValue::Time32Second(Some(5400)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Time32(TimeUnit::Millisecond),
+            Arc::new(Time32MillisecondArray::from(vec![
+                Some(3600000),
+                Some(7200000),
+            ])),
+            vec![
+                ScalarValue::Time32Millisecond(Some(3600000)),
+                ScalarValue::Time32Millisecond(Some(5400000)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Time64(TimeUnit::Microsecond),
+            Arc::new(Time64MicrosecondArray::from(vec![
+                Some(3600000000),
+                Some(7200000000),
+            ])),
+            vec![
+                ScalarValue::Time64Microsecond(Some(3600000000)),
+                ScalarValue::Time64Microsecond(Some(5400000000)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Time64(TimeUnit::Nanosecond),
+            Arc::new(Time64NanosecondArray::from(vec![
+                Some(3600000000000),
+                Some(7200000000000),
+            ])),
+            vec![
+                ScalarValue::Time64Nanosecond(Some(3600000000000)),
+                ScalarValue::Time64Nanosecond(Some(5400000000000)),
+            ],
+        )?;
+
+        // Duration types (map to Int64 -> UInt64)
+        test_type(
+            DataType::Duration(TimeUnit::Second),
+            Arc::new(DurationSecondArray::from(vec![Some(86400), Some(172800)])),
+            vec![
+                ScalarValue::DurationSecond(Some(86400)),
+                ScalarValue::DurationSecond(Some(129600)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Duration(TimeUnit::Millisecond),
+            Arc::new(DurationMillisecondArray::from(vec![
+                Some(86400000),
+                Some(172800000),
+            ])),
+            vec![
+                ScalarValue::DurationMillisecond(Some(86400000)),
+                ScalarValue::DurationMillisecond(Some(129600000)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Duration(TimeUnit::Microsecond),
+            Arc::new(DurationMicrosecondArray::from(vec![
+                Some(86400000000),
+                Some(172800000000),
+            ])),
+            vec![
+                ScalarValue::DurationMicrosecond(Some(86400000000)),
+                ScalarValue::DurationMicrosecond(Some(129600000000)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Duration(TimeUnit::Nanosecond),
+            Arc::new(DurationNanosecondArray::from(vec![
+                Some(86400000000000),
+                Some(172800000000000),
+            ])),
+            vec![
+                ScalarValue::DurationNanosecond(Some(86400000000000)),
+                ScalarValue::DurationNanosecond(Some(129600000000000)),
+            ],
+        )?;
+
+        // Interval types (map to 16-byte Decimal128Type)
+        test_type(
+            DataType::Interval(IntervalUnit::YearMonth),
+            Arc::new(IntervalYearMonthArray::from(vec![Some(12), Some(24)])),
+            vec![
+                ScalarValue::IntervalYearMonth(Some(12)),
+                ScalarValue::IntervalYearMonth(Some(18)),
+            ],
+        )?;
+
+        test_type(
+            DataType::Interval(IntervalUnit::DayTime),
+            Arc::new(IntervalDayTimeArray::from(vec![
+                Some(IntervalDayTime {
+                    days: 1,
+                    milliseconds: 0,
+                }),
+                Some(IntervalDayTime {
+                    days: 2,
+                    milliseconds: 0,
+                }),
+            ])),
+            vec![
+                ScalarValue::IntervalDayTime(Some(IntervalDayTime {
+                    days: 1,
+                    milliseconds: 0,
+                })),
+                ScalarValue::IntervalDayTime(Some(IntervalDayTime {
+                    days: 1,
+                    milliseconds: 500,
+                })),
+            ],
+        )?;
+
+        test_type(
+            DataType::Interval(IntervalUnit::MonthDayNano),
+            Arc::new(IntervalMonthDayNanoArray::from(vec![
+                Some(IntervalMonthDayNano {
+                    months: 1,
+                    days: 0,
+                    nanoseconds: 0,
+                }),
+                Some(IntervalMonthDayNano {
+                    months: 2,
+                    days: 0,
+                    nanoseconds: 0,
+                }),
+            ])),
+            vec![
+                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
+                    months: 1,
+                    days: 0,
+                    nanoseconds: 0,
+                })),
+                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
+                    months: 1,
+                    days: 15,
+                    nanoseconds: 0,
+                })),
+            ],
+        )?;
+
+        // Decimal256 (maps to Decimal128Type for 16-byte width)
+        // Need to use with_precision_and_scale() to set the metadata
+        let precision = 38;
+        let scale = 10;
+        test_type(
+            DataType::Decimal256(precision, scale),
+            Arc::new(
+                Decimal256Array::from(vec![
+                    Some(i256::from(12345)),
+                    Some(i256::from(67890)),
+                ])
+                .with_precision_and_scale(precision, scale)?,
+            ),
+            vec![
+                ScalarValue::Decimal256(Some(i256::from(12345)), precision, scale),
+                ScalarValue::Decimal256(Some(i256::from(54321)), precision, scale),
+            ],
+        )?;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/like.rs b/datafusion/physical-expr/src/expressions/like.rs
index e86c778d51619..fc49ca35f0498 100644
--- a/datafusion/physical-expr/src/expressions/like.rs
+++ b/datafusion/physical-expr/src/expressions/like.rs
@@ -18,8 +18,8 @@
 use crate::PhysicalExpr;
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_common::{Result, assert_or_internal_err};
+use datafusion_expr::{ColumnarValue, Operator};
 use datafusion_physical_expr_common::datum::apply_cmp;
 use std::hash::Hash;
 use std::{any::Any, sync::Arc};
@@ -118,14 +118,13 @@ impl PhysicalExpr for LikeExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        use arrow::compute::*;
         let lhs = self.expr.evaluate(batch)?;
         let rhs = self.pattern.evaluate(batch)?;
         match (self.negated, self.case_insensitive) {
-            (false, false) => apply_cmp(&lhs, &rhs, like),
-            (false, true) => apply_cmp(&lhs, &rhs, ilike),
-            (true, false) => apply_cmp(&lhs, &rhs, nlike),
-            (true, true) => apply_cmp(&lhs, &rhs, nilike),
+            (false, false) => apply_cmp(Operator::LikeMatch, &lhs, &rhs),
+            (false, true) => apply_cmp(Operator::ILikeMatch, &lhs, &rhs),
+            (true, false) => apply_cmp(Operator::NotLikeMatch, &lhs, &rhs),
+            (true, true) => apply_cmp(Operator::NotILikeMatch, &lhs, &rhs),
         }
     }
 
@@ -170,11 +169,10 @@ pub fn like(
 ) -> Result<Arc<dyn PhysicalExpr>> {
     let expr_type = &expr.data_type(input_schema)?;
     let pattern_type = &pattern.data_type(input_schema)?;
-    if !expr_type.eq(pattern_type) && !can_like_type(expr_type) {
-        return internal_err!(
-            "The type of {expr_type} AND {pattern_type} of like physical should be same"
-        );
-    }
+    assert_or_internal_err!(
+        expr_type.eq(pattern_type) || can_like_type(expr_type),
+        "The type of {expr_type} AND {pattern_type} of like physical should be same"
+    );
     Ok(Arc::new(LikeExpr::new(
         negated,
         case_insensitive,
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index 94e91d43a1c48..1f3fefc60b7ad 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -137,6 +137,7 @@ impl PhysicalExpr for Literal {
 }
 
 /// Create a literal expression
+#[expect(clippy::needless_pass_by_value)]
 pub fn lit<T: datafusion_expr::Literal>(value: T) -> Arc<dyn PhysicalExpr> {
     match value.lit() {
         Expr::Literal(v, _) => Arc::new(Literal::new(v)),
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 59d675753d985..c9e02708d6c28 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -35,24 +35,24 @@ mod not;
 mod try_cast;
 mod unknown_column;
 
+pub use crate::PhysicalSortExpr;
 /// Module with some convenient methods used in expression building
 pub use crate::aggregate::stats::StatsType;
-pub use crate::PhysicalSortExpr;
 
-pub use binary::{binary, similar_to, BinaryExpr};
-pub use case::{case, CaseExpr};
-pub use cast::{cast, CastExpr};
+pub use binary::{BinaryExpr, binary, similar_to};
+pub use case::{CaseExpr, case};
+pub use cast::{CastExpr, cast};
 pub use cast_column::CastColumnExpr;
-pub use column::{col, with_new_schema, Column};
+pub use column::{Column, col, with_new_schema};
 pub use datafusion_expr::utils::format_state_name;
 pub use dynamic_filters::DynamicFilterPhysicalExpr;
-pub use in_list::{in_list, InListExpr};
-pub use is_not_null::{is_not_null, IsNotNullExpr};
-pub use is_null::{is_null, IsNullExpr};
-pub use like::{like, LikeExpr};
-pub use literal::{lit, Literal};
-pub use negative::{negative, NegativeExpr};
+pub use in_list::{InListExpr, in_list};
+pub use is_not_null::{IsNotNullExpr, is_not_null};
+pub use is_null::{IsNullExpr, is_null};
+pub use like::{LikeExpr, like};
+pub use literal::{Literal, lit};
+pub use negative::{NegativeExpr, negative};
 pub use no_op::NoOp;
-pub use not::{not, NotExpr};
-pub use try_cast::{try_cast, TryCastExpr};
+pub use not::{NotExpr, not};
+pub use try_cast::{TryCastExpr, try_cast};
 pub use unknown_column::UnKnownColumn;
diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs
index fa7224768a777..0c9476bebaaf0 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -29,15 +29,15 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::statistics::Distribution::{
     self, Bernoulli, Exponential, Gaussian, Generic, Uniform,
 };
 use datafusion_expr::{
-    type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
     ColumnarValue,
+    type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
 };
 
 /// Negative expression
@@ -205,10 +205,10 @@ pub fn negative(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{col, Column};
+    use crate::expressions::{Column, col};
 
     use arrow::array::*;
-    use arrow::datatypes::DataType::{Float32, Float64, Int16, Int32, Int64, Int8};
+    use arrow::datatypes::DataType::{Float32, Float64, Int8, Int16, Int32, Int64};
     use arrow::datatypes::*;
     use datafusion_common::cast::as_primitive_array;
     use datafusion_common::{DataFusionError, ScalarValue};
@@ -277,11 +277,13 @@ mod tests {
         );
 
         // Bernoulli
-        assert!(negative_expr
-            .evaluate_statistics(&[&Distribution::new_bernoulli(ScalarValue::from(
-                0.75
-            ))?])
-            .is_err());
+        assert!(
+            negative_expr
+                .evaluate_statistics(&[&Distribution::new_bernoulli(ScalarValue::from(
+                    0.75
+                ))?])
+                .is_err()
+        );
 
         // Exponential
         assert_eq!(
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs
index 94610996c6b00..ff44a60a862d0 100644
--- a/datafusion/physical-expr/src/expressions/no_op.rs
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -26,7 +26,7 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::ColumnarValue;
 
 /// A place holder expression, can not be evaluated.
diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs
index 8184ef601e543..a29ab2ff40f5c 100644
--- a/datafusion/physical-expr/src/expressions/not.rs
+++ b/datafusion/physical-expr/src/expressions/not.rs
@@ -26,10 +26,10 @@ use crate::PhysicalExpr;
 
 use arrow::datatypes::{DataType, FieldRef, Schema};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{cast::as_boolean_array, internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, cast::as_boolean_array, internal_err};
+use datafusion_expr::ColumnarValue;
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::statistics::Distribution::{self, Bernoulli};
-use datafusion_expr::ColumnarValue;
 
 /// Not expression
 #[derive(Debug, Eq)]
@@ -155,16 +155,16 @@ impl PhysicalExpr for NotExpr {
         match (parent, children[0]) {
             (Bernoulli(parent), Bernoulli(child)) => {
                 let parent_range = parent.range();
-                let result = if parent_range == Interval::CERTAINLY_TRUE {
-                    if child.range() == Interval::CERTAINLY_TRUE {
+                let result = if parent_range == Interval::TRUE {
+                    if child.range() == Interval::TRUE {
                         None
                     } else {
                         Some(vec![Distribution::new_bernoulli(ScalarValue::new_zero(
                             &child.data_type(),
                         )?)?])
                     }
-                } else if parent_range == Interval::CERTAINLY_FALSE {
-                    if child.range() == Interval::CERTAINLY_FALSE {
+                } else if parent_range == Interval::FALSE {
+                    if child.range() == Interval::FALSE {
                         None
                     } else {
                         Some(vec![Distribution::new_bernoulli(ScalarValue::new_one(
@@ -196,7 +196,7 @@ mod tests {
     use std::sync::LazyLock;
 
     use super::*;
-    use crate::expressions::{col, Column};
+    use crate::expressions::{Column, col};
 
     use arrow::{array::BooleanArray, datatypes::*};
     use datafusion_physical_expr_common::physical_expr::fmt_sql;
@@ -265,28 +265,31 @@ mod tests {
         let expr = not(a)?;
 
         // Uniform with non-boolean bounds
-        assert!(expr
-            .evaluate_statistics(&[&Distribution::new_uniform(
+        assert!(
+            expr.evaluate_statistics(&[&Distribution::new_uniform(
                 Interval::make_unbounded(&DataType::Float64)?
             )?])
-            .is_err());
+            .is_err()
+        );
 
         // Exponential
-        assert!(expr
-            .evaluate_statistics(&[&Distribution::new_exponential(
+        assert!(
+            expr.evaluate_statistics(&[&Distribution::new_exponential(
                 ScalarValue::from(1.0),
                 ScalarValue::from(1.0),
                 true
             )?])
-            .is_err());
+            .is_err()
+        );
 
         // Gaussian
-        assert!(expr
-            .evaluate_statistics(&[&Distribution::new_gaussian(
+        assert!(
+            expr.evaluate_statistics(&[&Distribution::new_gaussian(
                 ScalarValue::from(1.0),
                 ScalarValue::from(1.0),
             )?])
-            .is_err());
+            .is_err()
+        );
 
         // Bernoulli
         assert_eq!(
@@ -310,24 +313,26 @@ mod tests {
             Distribution::new_bernoulli(ScalarValue::from(0.75))?
         );
 
-        assert!(expr
-            .evaluate_statistics(&[&Distribution::new_generic(
+        assert!(
+            expr.evaluate_statistics(&[&Distribution::new_generic(
                 ScalarValue::Null,
                 ScalarValue::Null,
                 ScalarValue::Null,
                 Interval::make_unbounded(&DataType::UInt8)?
             )?])
-            .is_err());
+            .is_err()
+        );
 
         // Unknown with non-boolean interval as range
-        assert!(expr
-            .evaluate_statistics(&[&Distribution::new_generic(
+        assert!(
+            expr.evaluate_statistics(&[&Distribution::new_generic(
                 ScalarValue::Null,
                 ScalarValue::Null,
                 ScalarValue::Null,
                 Interval::make_unbounded(&DataType::Float64)?
             )?])
-            .is_err());
+            .is_err()
+        );
 
         Ok(())
     }
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs
index b32aabbe5b006..c9ace3239c645 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -27,7 +27,7 @@ use arrow::datatypes::{DataType, FieldRef, Schema};
 use arrow::record_batch::RecordBatch;
 use compute::can_cast_types;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_expr::ColumnarValue;
 
 /// TRY_CAST expression casts an expression to a specific data type and returns NULL on invalid cast
@@ -155,8 +155,8 @@ mod tests {
     };
     use arrow::{
         array::{
-            Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-            Int8Array, TimestampNanosecondArray, UInt32Array,
+            Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array,
+            Int64Array, TimestampNanosecondArray, UInt32Array,
         },
         datatypes::*,
     };
diff --git a/datafusion/physical-expr/src/expressions/unknown_column.rs b/datafusion/physical-expr/src/expressions/unknown_column.rs
index 2face4eb6bdb6..f06d880985f4a 100644
--- a/datafusion/physical-expr/src/expressions/unknown_column.rs
+++ b/datafusion/physical-expr/src/expressions/unknown_column.rs
@@ -27,7 +27,7 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::ColumnarValue;
 
 #[derive(Debug, Clone, Eq)]
diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs
index 573cc88db7ab5..e5e9304ab1d99 100644
--- a/datafusion/physical-expr/src/intervals/cp_solver.rs
+++ b/datafusion/physical-expr/src/intervals/cp_solver.rs
@@ -148,19 +148,19 @@ use std::sync::Arc;
 use super::utils::{
     convert_duration_type_to_interval, convert_interval_type_to_duration, get_inverse_op,
 };
-use crate::expressions::{BinaryExpr, Literal};
-use crate::utils::{build_dag, ExprTreeNode};
 use crate::PhysicalExpr;
+use crate::expressions::{BinaryExpr, Literal};
+use crate::utils::{ExprTreeNode, build_dag};
 
 use arrow::datatypes::{DataType, Schema};
-use datafusion_common::{internal_err, not_impl_err, Result};
-use datafusion_expr::interval_arithmetic::{apply_operator, satisfy_greater, Interval};
+use datafusion_common::{Result, internal_err, not_impl_err};
 use datafusion_expr::Operator;
+use datafusion_expr::interval_arithmetic::{Interval, apply_operator, satisfy_greater};
 
+use petgraph::Outgoing;
 use petgraph::graph::NodeIndex;
 use petgraph::stable_graph::{DefaultIx, StableGraph};
 use petgraph::visit::{Bfs, Dfs, DfsPostOrder, EdgeRef};
-use petgraph::Outgoing;
 
 /// This object implements a directed acyclic expression graph (DAEG) that
 /// is used to compute ranges for expressions through interval arithmetic.
@@ -345,7 +345,7 @@ pub fn propagate_comparison(
     left_child: &Interval,
     right_child: &Interval,
 ) -> Result<Option<(Interval, Interval)>> {
-    if parent == &Interval::CERTAINLY_TRUE {
+    if parent == &Interval::TRUE {
         match op {
             Operator::Eq => left_child.intersect(right_child).map(|result| {
                 result.map(|intersection| (intersection.clone(), intersection))
@@ -360,7 +360,7 @@ pub fn propagate_comparison(
                 "The operator must be a comparison operator to propagate intervals"
             ),
         }
-    } else if parent == &Interval::CERTAINLY_FALSE {
+    } else if parent == &Interval::FALSE {
         match op {
             Operator::Eq => {
                 // TODO: Propagation is not possible until we support interval sets.
@@ -518,10 +518,10 @@ impl ExprIntervalGraph {
         // (1) given_range ⊇ bounds => Nothing to propagate
         // (2) ∅ ⊂ (given_range ∩ bounds) ⊂ bounds => Can propagate
         // (3) Disjoint sets => Infeasible
-        if given_range.contains(bounds)? == Interval::CERTAINLY_TRUE {
+        if given_range.contains(bounds)? == Interval::TRUE {
             // First case:
             Ok(PropagationResult::CannotPropagate)
-        } else if bounds.contains(&given_range)? != Interval::CERTAINLY_FALSE {
+        } else if bounds.contains(&given_range)? != Interval::FALSE {
             // Second case:
             let result = self.propagate_constraints(given_range);
             self.update_intervals(leaf_bounds);
@@ -643,7 +643,7 @@ impl ExprIntervalGraph {
             let node_interval = self.graph[node].interval();
             // Special case: true OR could in principle be propagated by 3 interval sets,
             // (i.e. left true, or right true, or both true) however we do not support this yet.
-            if node_interval == &Interval::CERTAINLY_TRUE
+            if node_interval == &Interval::TRUE
                 && self.graph[node]
                     .expr
                     .as_any()
@@ -780,7 +780,7 @@ mod tests {
     use rand::{Rng, SeedableRng};
     use rstest::*;
 
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     fn experiment(
         expr: Arc<dyn PhysicalExpr>,
         exprs_with_interval: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
@@ -815,8 +815,7 @@ mod tests {
             .map(|((_, interval), (_, index))| (*index, interval.clone()))
             .collect_vec();
 
-        let exp_result =
-            graph.update_ranges(&mut col_stat_nodes[..], Interval::CERTAINLY_TRUE)?;
+        let exp_result = graph.update_ranges(&mut col_stat_nodes[..], Interval::TRUE)?;
         assert_eq!(exp_result, result);
         col_stat_nodes.iter().zip(expected_nodes.iter()).for_each(
             |((_, calculated_interval_node), (_, expected))| {
@@ -1575,12 +1574,7 @@ mod tests {
                 Interval::make(None, Some(999_i64))?,
                 Interval::make(Some(1000_i64), Some(1000_i64))?,
             ))),
-            propagate_comparison(
-                &Operator::Lt,
-                &Interval::CERTAINLY_TRUE,
-                &left,
-                &right
-            )?
+            propagate_comparison(&Operator::Lt, &Interval::TRUE, &left, &right)?
         );
 
         let left =
@@ -1604,12 +1598,7 @@ mod tests {
                     ScalarValue::TimestampNanosecond(Some(1000), None),
                 )?
             ))),
-            propagate_comparison(
-                &Operator::Lt,
-                &Interval::CERTAINLY_TRUE,
-                &left,
-                &right
-            )?
+            propagate_comparison(&Operator::Lt, &Interval::TRUE, &left, &right)?
         );
 
         let left = Interval::make_unbounded(&DataType::Timestamp(
@@ -1635,12 +1624,7 @@ mod tests {
                     ScalarValue::TimestampNanosecond(Some(1000), Some("+05:00".into())),
                 )?
             ))),
-            propagate_comparison(
-                &Operator::Lt,
-                &Interval::CERTAINLY_TRUE,
-                &left,
-                &right
-            )?
+            propagate_comparison(&Operator::Lt, &Interval::TRUE, &left, &right)?
         );
 
         Ok(())
@@ -1653,38 +1637,38 @@ mod tests {
             Operator::Or,
             Arc::new(Column::new("b", 1)),
         ));
-        let parent = Interval::CERTAINLY_FALSE;
+        let parent = Interval::FALSE;
         let children_set = vec![
-            vec![&Interval::CERTAINLY_FALSE, &Interval::UNCERTAIN],
-            vec![&Interval::UNCERTAIN, &Interval::CERTAINLY_FALSE],
-            vec![&Interval::CERTAINLY_FALSE, &Interval::CERTAINLY_FALSE],
-            vec![&Interval::UNCERTAIN, &Interval::UNCERTAIN],
+            vec![&Interval::FALSE, &Interval::TRUE_OR_FALSE],
+            vec![&Interval::TRUE_OR_FALSE, &Interval::FALSE],
+            vec![&Interval::FALSE, &Interval::FALSE],
+            vec![&Interval::TRUE_OR_FALSE, &Interval::TRUE_OR_FALSE],
         ];
         for children in children_set {
             assert_eq!(
                 expr.propagate_constraints(&parent, &children)?.unwrap(),
-                vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_FALSE],
+                vec![Interval::FALSE, Interval::FALSE],
             );
         }
 
-        let parent = Interval::CERTAINLY_FALSE;
+        let parent = Interval::FALSE;
         let children_set = vec![
-            vec![&Interval::CERTAINLY_TRUE, &Interval::UNCERTAIN],
-            vec![&Interval::UNCERTAIN, &Interval::CERTAINLY_TRUE],
+            vec![&Interval::TRUE, &Interval::TRUE_OR_FALSE],
+            vec![&Interval::TRUE_OR_FALSE, &Interval::TRUE],
         ];
         for children in children_set {
             assert_eq!(expr.propagate_constraints(&parent, &children)?, None,);
         }
 
-        let parent = Interval::CERTAINLY_TRUE;
-        let children = vec![&Interval::CERTAINLY_FALSE, &Interval::UNCERTAIN];
+        let parent = Interval::TRUE;
+        let children = vec![&Interval::FALSE, &Interval::TRUE_OR_FALSE];
         assert_eq!(
             expr.propagate_constraints(&parent, &children)?.unwrap(),
-            vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_TRUE]
+            vec![Interval::FALSE, Interval::TRUE]
         );
 
-        let parent = Interval::CERTAINLY_TRUE;
-        let children = vec![&Interval::UNCERTAIN, &Interval::UNCERTAIN];
+        let parent = Interval::TRUE;
+        let children = vec![&Interval::TRUE_OR_FALSE, &Interval::TRUE_OR_FALSE];
         assert_eq!(
             expr.propagate_constraints(&parent, &children)?.unwrap(),
             // Empty means unchanged intervals.
@@ -1701,25 +1685,22 @@ mod tests {
             Operator::And,
             Arc::new(Column::new("b", 1)),
         ));
-        let parent = Interval::CERTAINLY_FALSE;
+        let parent = Interval::FALSE;
         let children_and_results_set = vec![
             (
-                vec![&Interval::CERTAINLY_TRUE, &Interval::UNCERTAIN],
-                vec![Interval::CERTAINLY_TRUE, Interval::CERTAINLY_FALSE],
+                vec![&Interval::TRUE, &Interval::TRUE_OR_FALSE],
+                vec![Interval::TRUE, Interval::FALSE],
             ),
             (
-                vec![&Interval::UNCERTAIN, &Interval::CERTAINLY_TRUE],
-                vec![Interval::CERTAINLY_FALSE, Interval::CERTAINLY_TRUE],
+                vec![&Interval::TRUE_OR_FALSE, &Interval::TRUE],
+                vec![Interval::FALSE, Interval::TRUE],
             ),
             (
-                vec![&Interval::UNCERTAIN, &Interval::UNCERTAIN],
+                vec![&Interval::TRUE_OR_FALSE, &Interval::TRUE_OR_FALSE],
                 // Empty means unchanged intervals.
                 vec![],
             ),
-            (
-                vec![&Interval::CERTAINLY_FALSE, &Interval::UNCERTAIN],
-                vec![],
-            ),
+            (vec![&Interval::FALSE, &Interval::TRUE_OR_FALSE], vec![]),
         ];
         for (children, result) in children_and_results_set {
             assert_eq!(
diff --git a/datafusion/physical-expr/src/intervals/test_utils.rs b/datafusion/physical-expr/src/intervals/test_utils.rs
index c3d38a974ab02..805ffd27613ee 100644
--- a/datafusion/physical-expr/src/intervals/test_utils.rs
+++ b/datafusion/physical-expr/src/intervals/test_utils.rs
@@ -19,13 +19,13 @@
 
 use std::sync::Arc;
 
-use crate::expressions::{binary, BinaryExpr, Literal};
 use crate::PhysicalExpr;
+use crate::expressions::{BinaryExpr, Literal, binary};
 use arrow::datatypes::Schema;
 use datafusion_common::{DataFusionError, ScalarValue};
 use datafusion_expr::Operator;
 
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 /// This test function generates a conjunctive statement with two numeric
 /// terms with the following form:
 /// left_col (op_1) a  >/>= right_col (op_2) b AND left_col (op_3) c </<= right_col (op_4) d
@@ -61,7 +61,7 @@ pub fn gen_conjunctive_numerical_expr(
     Arc::new(BinaryExpr::new(left_expr, Operator::And, right_expr))
 }
 
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 /// This test function generates a conjunctive statement with
 /// two scalar values with the following form:
 /// left_col (op_1) a  > right_col (op_2) b AND left_col (op_3) c < right_col (op_4) d
diff --git a/datafusion/physical-expr/src/intervals/utils.rs b/datafusion/physical-expr/src/intervals/utils.rs
index 22752a00e9259..3cada63a34ace 100644
--- a/datafusion/physical-expr/src/intervals/utils.rs
+++ b/datafusion/physical-expr/src/intervals/utils.rs
@@ -20,15 +20,15 @@
 use std::sync::Arc;
 
 use crate::{
-    expressions::{BinaryExpr, CastExpr, Column, Literal, NegativeExpr},
     PhysicalExpr,
+    expressions::{BinaryExpr, CastExpr, Column, Literal, NegativeExpr},
 };
 
 use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::{internal_err, Result, ScalarValue};
-use datafusion_expr::interval_arithmetic::Interval;
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::Operator;
+use datafusion_expr::interval_arithmetic::Interval;
 
 /// Indicates whether interval arithmetic is supported for the given expression.
 /// Currently, we do not support all [`PhysicalExpr`]s for interval calculations.
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index aa8c9e50fd71e..988e14c28e17c 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 // Backward compatibility
 pub mod aggregate;
@@ -51,9 +54,9 @@ pub mod execution_props {
 }
 
 pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, NullState};
-pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
+pub use analysis::{AnalysisContext, ExprBoundaries, analyze};
 pub use equivalence::{
-    calculate_union, AcrossPartitions, ConstExpr, EquivalenceProperties,
+    AcrossPartitions, ConstExpr, EquivalenceProperties, calculate_union,
 };
 pub use partitioning::{Distribution, Partitioning};
 pub use physical_expr::{
diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs
index d6b2b1b046f75..54e1cd3675d1e 100644
--- a/datafusion/physical-expr/src/partitioning.rs
+++ b/datafusion/physical-expr/src/partitioning.rs
@@ -18,8 +18,8 @@
 //! [`Partitioning`] and [`Distribution`] for `ExecutionPlans`
 
 use crate::{
-    equivalence::ProjectionMapping, expressions::UnKnownColumn, physical_exprs_equal,
-    EquivalenceProperties, PhysicalExpr,
+    EquivalenceProperties, PhysicalExpr, equivalence::ProjectionMapping,
+    expressions::UnKnownColumn, physical_exprs_equal,
 };
 use datafusion_physical_expr_common::physical_expr::format_physical_expr_list;
 use std::fmt;
@@ -139,6 +139,28 @@ impl Display for Partitioning {
         }
     }
 }
+
+/// Represents how a [`Partitioning`] satisfies a [`Distribution`] requirement.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PartitioningSatisfaction {
+    /// The partitioning does not satisfy the distribution requirement
+    NotSatisfied,
+    /// The partitioning exactly matches the distribution requirement
+    Exact,
+    /// The partitioning satisfies the distribution requirement via subset logic
+    Subset,
+}
+
+impl PartitioningSatisfaction {
+    pub fn is_satisfied(&self) -> bool {
+        matches!(self, Self::Exact | Self::Subset)
+    }
+
+    pub fn is_subset(&self) -> bool {
+        matches!(self, Self::Subset)
+    }
+}
+
 impl Partitioning {
     /// Returns the number of partitions in this partitioning scheme
     pub fn partition_count(&self) -> usize {
@@ -148,51 +170,104 @@ impl Partitioning {
         }
     }
 
-    /// Returns true when the guarantees made by this [`Partitioning`] are sufficient to
-    /// satisfy the partitioning scheme mandated by the `required` [`Distribution`].
+    /// Returns true if `subset_exprs` is a subset of `exprs`.
+    /// For example: Hash(a, b) is subset of Hash(a) since a partition with all occurrences of
+    /// a distinct (a) must also contain all occurrences of a distinct (a, b) with the same (a).
+    fn is_subset_partitioning(
+        subset_exprs: &[Arc<dyn PhysicalExpr>],
+        superset_exprs: &[Arc<dyn PhysicalExpr>],
+    ) -> bool {
+        // Require strict subset: fewer expressions, not equal
+        if subset_exprs.is_empty() || subset_exprs.len() >= superset_exprs.len() {
+            return false;
+        }
+
+        subset_exprs.iter().all(|subset_expr| {
+            superset_exprs
+                .iter()
+                .any(|superset_expr| subset_expr.eq(superset_expr))
+        })
+    }
+
+    #[deprecated(since = "52.0.0", note = "Use satisfaction instead")]
     pub fn satisfy(
         &self,
         required: &Distribution,
         eq_properties: &EquivalenceProperties,
     ) -> bool {
+        self.satisfaction(required, eq_properties, false)
+            == PartitioningSatisfaction::Exact
+    }
+
+    /// Returns how this [`Partitioning`] satisfies the partitioning scheme mandated
+    /// by the `required` [`Distribution`].
+    pub fn satisfaction(
+        &self,
+        required: &Distribution,
+        eq_properties: &EquivalenceProperties,
+        allow_subset: bool,
+    ) -> PartitioningSatisfaction {
         match required {
-            Distribution::UnspecifiedDistribution => true,
-            Distribution::SinglePartition if self.partition_count() == 1 => true,
+            Distribution::UnspecifiedDistribution => PartitioningSatisfaction::Exact,
+            Distribution::SinglePartition if self.partition_count() == 1 => {
+                PartitioningSatisfaction::Exact
+            }
             // When partition count is 1, hash requirement is satisfied.
-            Distribution::HashPartitioned(_) if self.partition_count() == 1 => true,
-            Distribution::HashPartitioned(required_exprs) => {
-                match self {
-                    // Here we do not check the partition count for hash partitioning and assumes the partition count
-                    // and hash functions in the system are the same. In future if we plan to support storage partition-wise joins,
-                    // then we need to have the partition count and hash functions validation.
-                    Partitioning::Hash(partition_exprs, _) => {
-                        let fast_match =
-                            physical_exprs_equal(required_exprs, partition_exprs);
-                        // If the required exprs do not match, need to leverage the eq_properties provided by the child
-                        // and normalize both exprs based on the equivalent groups.
-                        if !fast_match {
-                            let eq_groups = eq_properties.eq_group();
-                            if !eq_groups.is_empty() {
-                                let normalized_required_exprs = required_exprs
-                                    .iter()
-                                    .map(|e| eq_groups.normalize_expr(Arc::clone(e)))
-                                    .collect::<Vec<_>>();
-                                let normalized_partition_exprs = partition_exprs
-                                    .iter()
-                                    .map(|e| eq_groups.normalize_expr(Arc::clone(e)))
-                                    .collect::<Vec<_>>();
-                                return physical_exprs_equal(
-                                    &normalized_required_exprs,
-                                    &normalized_partition_exprs,
-                                );
-                            }
+            Distribution::HashPartitioned(_) if self.partition_count() == 1 => {
+                PartitioningSatisfaction::Exact
+            }
+            Distribution::HashPartitioned(required_exprs) => match self {
+                // Here we do not check the partition count for hash partitioning and assumes the partition count
+                // and hash functions in the system are the same. In future if we plan to support storage partition-wise joins,
+                // then we need to have the partition count and hash functions validation.
+                Partitioning::Hash(partition_exprs, _) => {
+                    // Empty hash partitioning is invalid
+                    if partition_exprs.is_empty() || required_exprs.is_empty() {
+                        return PartitioningSatisfaction::NotSatisfied;
+                    }
+
+                    // Fast path: exact match
+                    if physical_exprs_equal(required_exprs, partition_exprs) {
+                        return PartitioningSatisfaction::Exact;
+                    }
+
+                    // Normalization path using equivalence groups
+                    let eq_groups = eq_properties.eq_group();
+                    if !eq_groups.is_empty() {
+                        let normalized_required_exprs = required_exprs
+                            .iter()
+                            .map(|e| eq_groups.normalize_expr(Arc::clone(e)))
+                            .collect::<Vec<_>>();
+                        let normalized_partition_exprs = partition_exprs
+                            .iter()
+                            .map(|e| eq_groups.normalize_expr(Arc::clone(e)))
+                            .collect::<Vec<_>>();
+                        if physical_exprs_equal(
+                            &normalized_required_exprs,
+                            &normalized_partition_exprs,
+                        ) {
+                            return PartitioningSatisfaction::Exact;
                         }
-                        fast_match
+
+                        if allow_subset
+                            && Self::is_subset_partitioning(
+                                &normalized_partition_exprs,
+                                &normalized_required_exprs,
+                            )
+                        {
+                            return PartitioningSatisfaction::Subset;
+                        }
+                    } else if allow_subset
+                        && Self::is_subset_partitioning(partition_exprs, required_exprs)
+                    {
+                        return PartitioningSatisfaction::Subset;
                     }
-                    _ => false,
+
+                    PartitioningSatisfaction::NotSatisfied
                 }
-            }
-            _ => false,
+                _ => PartitioningSatisfaction::NotSatisfied,
+            },
+            _ => PartitioningSatisfaction::NotSatisfied,
         }
     }
 
@@ -317,11 +392,21 @@ mod tests {
 
         for distribution in distribution_types {
             let result = (
-                single_partition.satisfy(&distribution, &eq_properties),
-                unspecified_partition.satisfy(&distribution, &eq_properties),
-                round_robin_partition.satisfy(&distribution, &eq_properties),
-                hash_partition1.satisfy(&distribution, &eq_properties),
-                hash_partition2.satisfy(&distribution, &eq_properties),
+                single_partition
+                    .satisfaction(&distribution, &eq_properties, true)
+                    .is_satisfied(),
+                unspecified_partition
+                    .satisfaction(&distribution, &eq_properties, true)
+                    .is_satisfied(),
+                round_robin_partition
+                    .satisfaction(&distribution, &eq_properties, true)
+                    .is_satisfied(),
+                hash_partition1
+                    .satisfaction(&distribution, &eq_properties, true)
+                    .is_satisfied(),
+                hash_partition2
+                    .satisfaction(&distribution, &eq_properties, true)
+                    .is_satisfied(),
             );
 
             match distribution {
@@ -339,4 +424,425 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_partitioning_satisfy_by_subset() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let col_c: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("c", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([a]) vs Hash([a, b])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                ]),
+                PartitioningSatisfaction::Subset,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a]) vs Hash([a, b, c])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                    Arc::clone(&col_c),
+                ]),
+                PartitioningSatisfaction::Subset,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a, b]) vs Hash([a, b, c])",
+                Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                    Arc::clone(&col_c),
+                ]),
+                PartitioningSatisfaction::Subset,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([b]) vs Hash([a, b, c])",
+                Partitioning::Hash(vec![Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                    Arc::clone(&col_c),
+                ]),
+                PartitioningSatisfaction::Subset,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([b, a]) vs Hash([a, b, c])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                    Arc::clone(&col_c),
+                ]),
+                PartitioningSatisfaction::Subset,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_current_superset() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let col_c: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("c", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([a, b]) vs Hash([a])",
+                Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a, b, c]) vs Hash([a])",
+                Partitioning::Hash(
+                    vec![Arc::clone(&col_a), Arc::clone(&col_b), Arc::clone(&col_c)],
+                    4,
+                ),
+                Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a, b, c]) vs Hash([a, b])",
+                Partitioning::Hash(
+                    vec![Arc::clone(&col_a), Arc::clone(&col_b), Arc::clone(&col_c)],
+                    4,
+                ),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                ]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_partial_overlap() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let col_c: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("c", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![(
+            "Partial overlap: Hash([a, c]) vs Hash([a, b])",
+            Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_c)], 4),
+            Distribution::HashPartitioned(vec![Arc::clone(&col_a), Arc::clone(&col_b)]),
+            PartitioningSatisfaction::NotSatisfied,
+            PartitioningSatisfaction::NotSatisfied,
+        )];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_no_overlap() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let col_c: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("c", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([a]) vs Hash([b, c])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_b),
+                    Arc::clone(&col_c),
+                ]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a, b]) vs Hash([c])",
+                Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&col_c)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_exact_match() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([a, b]) vs Hash([a, b])",
+                Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                ]),
+                PartitioningSatisfaction::Exact,
+                PartitioningSatisfaction::Exact,
+            ),
+            (
+                "Hash([a]) vs Hash([a])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+                PartitioningSatisfaction::Exact,
+                PartitioningSatisfaction::Exact,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_unknown() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+        ]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let col_b: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("b", &schema)?);
+        let unknown: Arc<dyn PhysicalExpr> = Arc::new(UnKnownColumn::new("dropped"));
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([unknown]) vs Hash([a, b])",
+                Partitioning::Hash(vec![Arc::clone(&unknown)], 4),
+                Distribution::HashPartitioned(vec![
+                    Arc::clone(&col_a),
+                    Arc::clone(&col_b),
+                ]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a, b]) vs Hash([unknown])",
+                Partitioning::Hash(vec![Arc::clone(&col_a), Arc::clone(&col_b)], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&unknown)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([unknown]) vs Hash([unknown])",
+                Partitioning::Hash(vec![Arc::clone(&unknown)], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&unknown)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_partitioning_empty_hash() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, false)]));
+
+        let col_a: Arc<dyn PhysicalExpr> =
+            Arc::new(Column::new_with_schema("a", &schema)?);
+        let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+        let test_cases = vec![
+            (
+                "Hash([]) vs Hash([a])",
+                Partitioning::Hash(vec![], 4),
+                Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([a]) vs Hash([])",
+                Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+                Distribution::HashPartitioned(vec![]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+            (
+                "Hash([]) vs Hash([])",
+                Partitioning::Hash(vec![], 4),
+                Distribution::HashPartitioned(vec![]),
+                PartitioningSatisfaction::NotSatisfied,
+                PartitioningSatisfaction::NotSatisfied,
+            ),
+        ];
+
+        for (desc, partition, required, expected_with_subset, expected_without_subset) in
+            test_cases
+        {
+            let result = partition.satisfaction(&required, &eq_properties, true);
+            assert_eq!(
+                result, expected_with_subset,
+                "Failed for {desc} with subset enabled"
+            );
+
+            let result = partition.satisfaction(&required, &eq_properties, false);
+            assert_eq!(
+                result, expected_without_subset,
+                "Failed for {desc} with subset disabled"
+            );
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index c658a8eddc233..e750bfd79d77d 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -18,13 +18,13 @@
 use std::sync::Arc;
 
 use crate::expressions::{self, Column};
-use crate::{create_physical_expr, LexOrdering, PhysicalSortExpr};
+use crate::{LexOrdering, PhysicalSortExpr, create_physical_expr};
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::{Schema, SchemaRef};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{plan_err, Result};
 use datafusion_common::{DFSchema, HashMap};
+use datafusion_common::{Result, plan_err};
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::{Expr, SortExpr};
 
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index 7790380dffd56..84a6aa4309872 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -19,22 +19,22 @@ use std::sync::Arc;
 
 use crate::ScalarFunctionExpr;
 use crate::{
-    expressions::{self, binary, like, similar_to, Column, Literal},
     PhysicalExpr,
+    expressions::{self, Column, Literal, binary, like, similar_to},
 };
 
 use arrow::datatypes::Schema;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
-    exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema,
+    DFSchema, Result, ScalarValue, ToDFSchema, exec_err, not_impl_err, plan_err,
 };
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction};
-use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::var_provider::VarType;
+use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::{
-    binary_expr, lit, Between, BinaryExpr, Expr, Like, Operator, TryCast,
+    Between, BinaryExpr, Expr, Like, Operator, TryCast, binary_expr, lit,
 };
 
 /// [PhysicalExpr] evaluate DataFusion expressions such as `A + 1`, or `CAST(c1
@@ -105,6 +105,7 @@ use datafusion_expr::{
 /// * `e` - The logical expression
 /// * `input_dfschema` - The DataFusion schema for the input, used to resolve `Column` references
 ///   to qualified or unqualified fields by name.
+#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
 pub fn create_physical_expr(
     e: &Expr,
     input_dfschema: &DFSchema,
@@ -417,7 +418,7 @@ mod tests {
     use arrow::array::{ArrayRef, BooleanArray, RecordBatch, StringArray};
     use arrow::datatypes::{DataType, Field};
 
-    use datafusion_expr::{col, lit};
+    use datafusion_expr::{Operator, col, lit};
 
     use super::*;
 
@@ -445,4 +446,34 @@ mod tests {
 
         Ok(())
     }
+
+    /// Test that deeply nested expressions do not cause a stack overflow.
+    ///
+    /// This test only runs when the `recursive_protection` feature is enabled,
+    /// as it would overflow the stack otherwise.
+    #[test]
+    #[cfg_attr(not(feature = "recursive_protection"), ignore)]
+    fn test_deeply_nested_binary_expr() -> Result<()> {
+        // Create a deeply nested binary expression tree: ((((a + a) + a) + a) + ... )
+        // With 1000 levels of nesting, this would overflow the stack without recursion protection.
+        let depth = 1000;
+
+        let mut expr = col("a");
+        for _ in 0..depth {
+            expr = Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(expr),
+                op: Operator::Plus,
+                right: Box::new(col("a")),
+            });
+        }
+
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let df_schema = DFSchema::try_from(schema)?;
+
+        // This should not stack overflow
+        let _physical_expr =
+            create_physical_expr(&expr, &df_schema, &ExecutionProps::new())?;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs
index a120ab427e1de..8d4afb5d19701 100644
--- a/datafusion/physical-expr/src/projection.rs
+++ b/datafusion/physical-expr/src/projection.rs
@@ -15,23 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! [`ProjectionExpr`] and [`ProjectionExprs`] for representing projections.
+
 use std::ops::Deref;
 use std::sync::Arc;
 
+use crate::PhysicalExpr;
 use crate::expressions::Column;
 use crate::utils::collect_columns;
-use crate::PhysicalExpr;
 
+use arrow::array::{RecordBatch, RecordBatchOptions};
 use arrow::datatypes::{Field, Schema, SchemaRef};
-use datafusion_common::stats::{ColumnStatistics, Precision};
+use datafusion_common::stats::ColumnStatistics;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
+use datafusion_common::{
+    Result, assert_or_internal_err, internal_datafusion_err, plan_err,
+};
 
+use datafusion_physical_expr_common::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_expr_common::metrics::ExpressionEvaluatorMetrics;
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays_with_metrics;
 use indexmap::IndexMap;
 use itertools::Itertools;
 
-/// A projection expression as used by projection operations.
+/// An expression used by projection operations.
 ///
 /// The expression is evaluated and the result is stored in a column
 /// with the name specified by `alias`.
@@ -39,6 +48,8 @@ use itertools::Itertools;
 /// For example, the SQL expression `a + b AS sum_ab` would be represented
 /// as a `ProjectionExpr` where `expr` is the expression `a + b`
 /// and `alias` is the string `sum_ab`.
+///
+/// See [`ProjectionExprs`] for a collection of projection expressions.
 #[derive(Debug, Clone)]
 pub struct ProjectionExpr {
     /// The expression that will be evaluated.
@@ -47,6 +58,15 @@ pub struct ProjectionExpr {
     pub alias: String,
 }
 
+impl PartialEq for ProjectionExpr {
+    fn eq(&self, other: &Self) -> bool {
+        let ProjectionExpr { expr, alias } = self;
+        expr.eq(&other.expr) && *alias == other.alias
+    }
+}
+
+impl Eq for ProjectionExpr {}
+
 impl std::fmt::Display for ProjectionExpr {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         if self.expr.to_string() == self.alias {
@@ -59,7 +79,8 @@ impl std::fmt::Display for ProjectionExpr {
 
 impl ProjectionExpr {
     /// Create a new projection expression
-    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
+    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: impl Into<String>) -> Self {
+        let alias = alias.into();
         Self { expr, alias }
     }
 
@@ -94,12 +115,15 @@ impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
     }
 }
 
-/// A collection of projection expressions.
+/// A collection of  [`ProjectionExpr`] instances, representing a complete
+/// projection operation.
 ///
-/// This struct encapsulates multiple `ProjectionExpr` instances,
-/// representing a complete projection operation and provides
-/// methods to manipulate and analyze the projection as a whole.
-#[derive(Debug, Clone)]
+/// Projection operations are used in query plans to select specific columns or
+/// compute new columns based on existing ones.
+///
+/// See [`ProjectionExprs::from_indices`] to select a subset of columns by
+/// indices.
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ProjectionExprs {
     exprs: Vec<ProjectionExpr>,
 }
@@ -192,7 +216,7 @@ impl ProjectionExprs {
     /// assert_eq!(projection_with_dups.as_ref()[1].alias, "a"); // duplicate
     /// assert_eq!(projection_with_dups.as_ref()[2].alias, "b");
     /// ```
-    pub fn from_indices(indices: &[usize], schema: &SchemaRef) -> Self {
+    pub fn from_indices(indices: &[usize], schema: &Schema) -> Self {
         let projection_exprs = indices.iter().map(|&i| {
             let field = schema.field(i);
             ProjectionExpr {
@@ -227,6 +251,49 @@ impl ProjectionExprs {
         self.exprs.iter().map(|e| Arc::clone(&e.expr))
     }
 
+    /// Apply a fallible transformation to the [`PhysicalExpr`] of each projection.
+    ///
+    /// This method transforms the expression in each [`ProjectionExpr`] while preserving
+    /// the alias. This is useful for rewriting expressions, such as when adapting
+    /// expressions to a different schema.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::sync::Arc;
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_common::Result;
+    /// use datafusion_physical_expr::expressions::Column;
+    /// use datafusion_physical_expr::projection::ProjectionExprs;
+    /// use datafusion_physical_expr::PhysicalExpr;
+    ///
+    /// // Create a schema and projection
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("a", DataType::Int32, false),
+    ///     Field::new("b", DataType::Int32, false),
+    /// ]));
+    /// let projection = ProjectionExprs::from_indices(&[0, 1], &schema);
+    ///
+    /// // Transform each expression (this example just clones them)
+    /// let transformed = projection.try_map_exprs(|expr| Ok(expr))?;
+    /// assert_eq!(transformed.as_ref().len(), 2);
+    /// # Ok::<(), datafusion_common::DataFusionError>(())
+    /// ```
+    pub fn try_map_exprs<F>(self, mut f: F) -> Result<Self>
+    where
+        F: FnMut(Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>>,
+    {
+        let exprs = self
+            .exprs
+            .into_iter()
+            .map(|mut proj| {
+                proj.expr = f(proj.expr)?;
+                Ok(proj)
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(Self::new(exprs))
+    }
+
     /// Apply another projection on top of this projection, returning the combined projection.
     /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
     /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 as c2`.
@@ -351,6 +418,14 @@ impl ProjectionExprs {
     ///
     /// Use [`column_indices()`](Self::column_indices) instead if the projection may contain
     /// non-column expressions or if you need a deduplicated sorted list.
+    ///
+    /// # Panics
+    ///
+    /// Panics if any expression in the projection is not a simple column reference.
+    #[deprecated(
+        since = "52.0.0",
+        note = "Use column_indices() instead. This method will be removed in 58.0.0 or 6 months after 52.0.0 is released, whichever comes first."
+    )]
     pub fn ordered_column_indices(&self) -> Vec<usize> {
         self.exprs
             .iter()
@@ -396,40 +471,154 @@ impl ProjectionExprs {
         ))
     }
 
+    /// Create a new [`Projector`] from this projection and an input schema.
+    ///
+    /// A [`Projector`] can be used to apply this projection to record batches.
+    ///
+    /// # Errors
+    /// This function returns an error if the output schema cannot be constructed from the input schema
+    /// with the given projection expressions.
+    /// For example, if an expression only works with integer columns but the input schema has a string column at that index.
+    pub fn make_projector(&self, input_schema: &Schema) -> Result<Projector> {
+        let output_schema = Arc::new(self.project_schema(input_schema)?);
+        Ok(Projector {
+            projection: self.clone(),
+            output_schema,
+            expression_metrics: None,
+        })
+    }
+
+    pub fn create_expression_metrics(
+        &self,
+        metrics: &ExecutionPlanMetricsSet,
+        partition: usize,
+    ) -> ExpressionEvaluatorMetrics {
+        let labels: Vec<String> = self
+            .exprs
+            .iter()
+            .map(|proj_expr| {
+                let expr_sql = fmt_sql(proj_expr.expr.as_ref()).to_string();
+                if proj_expr.expr.to_string() == proj_expr.alias {
+                    expr_sql
+                } else {
+                    format!("{expr_sql} AS {}", proj_expr.alias)
+                }
+            })
+            .collect();
+        ExpressionEvaluatorMetrics::new(metrics, partition, labels)
+    }
+
     /// Project statistics according to this projection.
     /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
     /// if the input statistics has column statistics for columns `a`, `b`, and `c`, the output statistics would have column statistics for columns `x` and `y`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_common::stats::{ColumnStatistics, Precision, Statistics};
+    /// use datafusion_physical_expr::projection::ProjectionExprs;
+    /// use datafusion_common::Result;
+    /// use datafusion_common::ScalarValue;
+    /// use std::sync::Arc;
+    ///
+    /// fn main() -> Result<()> {
+    ///     // Input schema: a: Int32, b: Int32, c: Int32
+    ///     let input_schema = Arc::new(Schema::new(vec![
+    ///         Field::new("a", DataType::Int32, false),
+    ///         Field::new("b", DataType::Int32, false),
+    ///         Field::new("c", DataType::Int32, false),
+    ///     ]));
+    ///
+    ///     // Input statistics with column stats for a, b, c
+    ///     let input_stats = Statistics {
+    ///         num_rows: Precision::Exact(100),
+    ///         total_byte_size: Precision::Exact(1200),
+    ///         column_statistics: vec![
+    ///             // Column a stats
+    ///             ColumnStatistics::new_unknown()
+    ///                 .with_null_count(Precision::Exact(0))
+    ///                 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(0))))
+    ///                 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(100))))
+    ///                 .with_distinct_count(Precision::Exact(100)),
+    ///             // Column b stats
+    ///             ColumnStatistics::new_unknown()
+    ///                 .with_null_count(Precision::Exact(0))
+    ///                 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(10))))
+    ///                 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(60))))
+    ///                 .with_distinct_count(Precision::Exact(50)),
+    ///             // Column c stats
+    ///             ColumnStatistics::new_unknown()
+    ///                 .with_null_count(Precision::Exact(5))
+    ///                 .with_min_value(Precision::Exact(ScalarValue::Int32(Some(-10))))
+    ///                 .with_max_value(Precision::Exact(ScalarValue::Int32(Some(200))))
+    ///                 .with_distinct_count(Precision::Exact(25)),
+    ///         ],
+    ///     };
+    ///
+    ///     // Create a projection that selects columns c and a (indices 2 and 0)
+    ///     let projection = ProjectionExprs::from_indices(&[2, 0], &input_schema);
+    ///
+    ///     // Compute output schema
+    ///     let output_schema = projection.project_schema(&input_schema)?;
+    ///
+    ///     // Project the statistics
+    ///     let output_stats = projection.project_statistics(input_stats, &output_schema)?;
+    ///
+    ///     // The output should have 2 column statistics (for c and a, in that order)
+    ///     assert_eq!(output_stats.column_statistics.len(), 2);
+    ///
+    ///     // First column in output is c (was at index 2)
+    ///     assert_eq!(
+    ///         output_stats.column_statistics[0].min_value,
+    ///         Precision::Exact(ScalarValue::Int32(Some(-10)))
+    ///     );
+    ///     assert_eq!(
+    ///         output_stats.column_statistics[0].null_count,
+    ///         Precision::Exact(5)
+    ///     );
+    ///
+    ///     // Second column in output is a (was at index 0)
+    ///     assert_eq!(
+    ///         output_stats.column_statistics[1].min_value,
+    ///         Precision::Exact(ScalarValue::Int32(Some(0)))
+    ///     );
+    ///     assert_eq!(
+    ///         output_stats.column_statistics[1].distinct_count,
+    ///         Precision::Exact(100)
+    ///     );
+    ///
+    ///     // Total byte size is recalculated based on projected columns
+    ///     assert_eq!(
+    ///         output_stats.total_byte_size,
+    ///         Precision::Exact(800), // each Int32 column is 4 bytes * 100 rows * 2 columns
+    ///     );
+    ///
+    ///     // Number of rows remains the same
+    ///     assert_eq!(output_stats.num_rows, Precision::Exact(100));
+    ///
+    ///     Ok(())
+    /// }
+    /// ```
     pub fn project_statistics(
         &self,
         mut stats: datafusion_common::Statistics,
-        input_schema: &Schema,
+        output_schema: &Schema,
     ) -> Result<datafusion_common::Statistics> {
-        let mut primitive_row_size = 0;
-        let mut primitive_row_size_possible = true;
         let mut column_statistics = vec![];
 
         for proj_expr in &self.exprs {
             let expr = &proj_expr.expr;
             let col_stats = if let Some(col) = expr.as_any().downcast_ref::<Column>() {
-                stats.column_statistics[col.index()].clone()
+                std::mem::take(&mut stats.column_statistics[col.index()])
             } else {
                 // TODO stats: estimate more statistics from expressions
                 // (expressions should compute their statistics themselves)
                 ColumnStatistics::new_unknown()
             };
             column_statistics.push(col_stats);
-            let data_type = expr.data_type(input_schema)?;
-            if let Some(value) = data_type.primitive_width() {
-                primitive_row_size += value;
-                continue;
-            }
-            primitive_row_size_possible = false;
-        }
-
-        if primitive_row_size_possible {
-            stats.total_byte_size =
-                Precision::Exact(primitive_row_size).multiply(&stats.num_rows);
         }
+        stats.calculate_total_byte_size(output_schema);
         stats.column_statistics = column_statistics;
         Ok(stats)
     }
@@ -444,6 +633,79 @@ impl<'a> IntoIterator for &'a ProjectionExprs {
     }
 }
 
+/// Applies a projection to record batches.
+///
+/// A [`Projector`] uses a set of projection expressions to transform
+/// and a pre-computed output schema to project record batches accordingly.
+///
+/// The main reason to use a `Projector` is to avoid repeatedly computing
+/// the output schema for each batch, which can be costly if the projection
+/// expressions are complex.
+#[derive(Clone, Debug)]
+pub struct Projector {
+    projection: ProjectionExprs,
+    output_schema: SchemaRef,
+    /// If `Some`, metrics will be tracked for projection evaluation.
+    expression_metrics: Option<ExpressionEvaluatorMetrics>,
+}
+
+impl Projector {
+    /// Construct the projector with metrics. After execution, related metrics will
+    /// be tracked inside `ExecutionPlanMetricsSet`
+    ///
+    /// See [`ExpressionEvaluatorMetrics`] for details.
+    pub fn with_metrics(
+        &self,
+        metrics: &ExecutionPlanMetricsSet,
+        partition: usize,
+    ) -> Self {
+        let expr_metrics = self
+            .projection
+            .create_expression_metrics(metrics, partition);
+        Self {
+            expression_metrics: Some(expr_metrics),
+            projection: self.projection.clone(),
+            output_schema: Arc::clone(&self.output_schema),
+        }
+    }
+
+    /// Project a record batch according to this projector's expressions.
+    ///
+    /// # Errors
+    /// This function returns an error if any expression evaluation fails
+    /// or if the output schema of the resulting record batch does not match
+    /// the pre-computed output schema of the projector.
+    pub fn project_batch(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let arrays = evaluate_expressions_to_arrays_with_metrics(
+            self.projection.exprs.iter().map(|p| &p.expr),
+            batch,
+            self.expression_metrics.as_ref(),
+        )?;
+
+        if arrays.is_empty() {
+            let options =
+                RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
+            RecordBatch::try_new_with_options(
+                Arc::clone(&self.output_schema),
+                arrays,
+                &options,
+            )
+            .map_err(Into::into)
+        } else {
+            RecordBatch::try_new(Arc::clone(&self.output_schema), arrays)
+                .map_err(Into::into)
+        }
+    }
+
+    pub fn output_schema(&self) -> &SchemaRef {
+        &self.output_schema
+    }
+
+    pub fn projection(&self) -> &ProjectionExprs {
+        &self.projection
+    }
+}
+
 impl IntoIterator for ProjectionExprs {
     type Item = ProjectionExpr;
     type IntoIter = std::vec::IntoIter<ProjectionExpr>;
@@ -545,7 +807,13 @@ pub fn update_expr(
         })
         .data()?;
 
-    Ok((state == RewriteState::RewrittenValid).then_some(new_expr))
+    match state {
+        RewriteState::RewrittenInvalid => Ok(None),
+        // Both Unchanged and RewrittenValid are valid:
+        // - Unchanged means no columns to rewrite (e.g., literals)
+        // - RewrittenValid means columns were successfully rewritten
+        RewriteState::Unchanged | RewriteState::RewrittenValid => Ok(Some(new_expr)),
+    }
 }
 
 /// Stores target expressions, along with their indices, that associate with a
@@ -625,13 +893,11 @@ impl ProjectionMapping {
                     let idx = col.index();
                     let matching_field = input_schema.field(idx);
                     let matching_name = matching_field.name();
-                    if col.name() != matching_name {
-                        return internal_err!(
-                            "Input field name {} does not match with the projection expression {}",
-                            matching_name,
-                            col.name()
-                        );
-                    }
+                    assert_or_internal_err!(
+                        col.name() == matching_name,
+                        "Input field name {matching_name} does not match with the projection expression {}",
+                        col.name()
+                    );
                     let matching_column = Column::new(matching_name, idx);
                     Ok(Transformed::yes(Arc::new(matching_column)))
                 }
@@ -773,14 +1039,15 @@ pub(crate) mod tests {
     use std::collections::HashMap;
 
     use super::*;
-    use crate::equivalence::{convert_to_orderings, EquivalenceProperties};
-    use crate::expressions::{col, BinaryExpr, Literal};
+    use crate::equivalence::{EquivalenceProperties, convert_to_orderings};
+    use crate::expressions::{BinaryExpr, Literal, col};
     use crate::utils::tests::TestScalarUDF;
     use crate::{PhysicalExprRef, ScalarFunctionExpr};
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use datafusion_common::config::ConfigOptions;
+    use datafusion_common::stats::Precision;
     use datafusion_common::{ScalarValue, Statistics};
     use datafusion_expr::{Operator, ScalarUDF};
     use insta::assert_snapshot;
@@ -1647,6 +1914,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -1654,6 +1922,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -1661,6 +1930,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
                     sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
             ],
         }
@@ -1689,11 +1959,15 @@ pub(crate) mod tests {
             },
         ]);
 
-        let result = projection.project_statistics(source, &schema).unwrap();
+        let result = projection
+            .project_statistics(source, &projection.project_schema(&schema).unwrap())
+            .unwrap();
 
         let expected = Statistics {
             num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(23),
+            // Because there is a variable length Utf8 column we cannot calculate exact byte size after projection
+            // Thus we set it to Inexact (originally it was Exact(23))
+            total_byte_size: Precision::Inexact(23),
             column_statistics: vec![
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -1701,6 +1975,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(5),
@@ -1708,6 +1983,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -1731,7 +2007,9 @@ pub(crate) mod tests {
             },
         ]);
 
-        let result = projection.project_statistics(source, &schema).unwrap();
+        let result = projection
+            .project_statistics(source, &projection.project_schema(&schema).unwrap())
+            .unwrap();
 
         let expected = Statistics {
             num_rows: Precision::Exact(5),
@@ -1743,6 +2021,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
                     sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(5),
@@ -1750,6 +2029,7 @@ pub(crate) mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -2009,6 +2289,94 @@ pub(crate) mod tests {
         );
     }
 
+    #[test]
+    fn test_merge_empty_projection_with_literal() -> Result<()> {
+        // This test reproduces the issue from roundtrip_empty_projection test
+        // Query like: SELECT 1 FROM table
+        // where the file scan needs no columns (empty projection)
+        // but we project a literal on top
+
+        // Empty base projection (no columns needed from file)
+        let base_projection = ProjectionExprs::new(vec![]);
+
+        // Top projection with a literal expression: SELECT 1
+        let top_projection = ProjectionExprs::new(vec![ProjectionExpr {
+            expr: Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
+            alias: "Int64(1)".to_string(),
+        }]);
+
+        // This should succeed - literals don't reference columns so they should
+        // pass through unchanged when merged with an empty projection
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_snapshot!(format!("{merged}"), @"Projection[1 AS Int64(1)]");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_update_expr_with_literal() -> Result<()> {
+        // Test that update_expr correctly handles expressions without column references
+        let literal_expr: Arc<dyn PhysicalExpr> =
+            Arc::new(Literal::new(ScalarValue::Int64(Some(42))));
+        let empty_projection: Vec<ProjectionExpr> = vec![];
+
+        // Updating a literal with an empty projection should return the literal unchanged
+        let result = update_expr(&literal_expr, &empty_projection, true)?;
+        assert!(result.is_some(), "Literal expression should be valid");
+
+        let result_expr = result.unwrap();
+        assert_eq!(
+            result_expr
+                .as_any()
+                .downcast_ref::<Literal>()
+                .unwrap()
+                .value(),
+            &ScalarValue::Int64(Some(42))
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_update_expr_with_complex_literal_expr() -> Result<()> {
+        // Test update_expr with an expression containing both literals and a column
+        // This tests the case where we have: literal + column
+        let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(Literal::new(ScalarValue::Int64(Some(10)))),
+            Operator::Plus,
+            Arc::new(Column::new("x", 0)),
+        ));
+
+        // Base projection that maps column 0 to a different expression
+        let base_projection = vec![ProjectionExpr {
+            expr: Arc::new(Column::new("a", 5)),
+            alias: "x".to_string(),
+        }];
+
+        // The expression should be updated: 10 + x@0 becomes 10 + a@5
+        let result = update_expr(&expr, &base_projection, true)?;
+        assert!(result.is_some(), "Expression should be valid");
+
+        let result_expr = result.unwrap();
+        let binary = result_expr
+            .as_any()
+            .downcast_ref::<BinaryExpr>()
+            .expect("Should be a BinaryExpr");
+
+        // Left side should still be the literal
+        assert!(binary.left().as_any().downcast_ref::<Literal>().is_some());
+
+        // Right side should be updated to reference column at index 5
+        let right_col = binary
+            .right()
+            .as_any()
+            .downcast_ref::<Column>()
+            .expect("Right should be a Column");
+        assert_eq!(right_col.index(), 5);
+
+        Ok(())
+    }
+
     #[test]
     fn test_project_schema_simple_columns() -> Result<()> {
         // Input schema: [col0: Int64, col1: Utf8, col2: Float32]
@@ -2128,7 +2496,10 @@ pub(crate) mod tests {
             },
         ]);
 
-        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+        let output_stats = projection.project_statistics(
+            input_stats,
+            &projection.project_schema(&input_schema)?,
+        )?;
 
         // Row count should be preserved
         assert_eq!(output_stats.num_rows, Precision::Exact(5));
@@ -2180,7 +2551,10 @@ pub(crate) mod tests {
             },
         ]);
 
-        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+        let output_stats = projection.project_statistics(
+            input_stats,
+            &projection.project_schema(&input_schema)?,
+        )?;
 
         // Row count should be preserved
         assert_eq!(output_stats.num_rows, Precision::Exact(5));
@@ -2224,7 +2598,10 @@ pub(crate) mod tests {
             },
         ]);
 
-        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+        let output_stats = projection.project_statistics(
+            input_stats,
+            &projection.project_schema(&input_schema)?,
+        )?;
 
         // Row count should be preserved
         assert_eq!(output_stats.num_rows, Precision::Exact(5));
@@ -2246,7 +2623,10 @@ pub(crate) mod tests {
 
         let projection = ProjectionExprs::new(vec![]);
 
-        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+        let output_stats = projection.project_statistics(
+            input_stats,
+            &projection.project_schema(&input_schema)?,
+        )?;
 
         // Row count should be preserved
         assert_eq!(output_stats.num_rows, Precision::Exact(5));
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 743d5b99cde95..e6a6db75bebd7 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -34,19 +34,19 @@ use std::fmt::{self, Debug, Formatter};
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use crate::expressions::Literal;
 use crate::PhysicalExpr;
+use crate::expressions::Literal;
 
 use arrow::array::{Array, RecordBatch};
 use arrow::datatypes::{DataType, FieldRef, Schema};
 use datafusion_common::config::{ConfigEntry, ConfigOptions};
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, internal_err};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
 use datafusion_expr::{
-    expr_vec_fmt, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF,
-    Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, Volatility,
+    expr_vec_fmt,
 };
 
 /// Physical expression of a scalar function
@@ -283,19 +283,22 @@ impl PhysicalExpr for ScalarFunctionExpr {
             config_options: Arc::clone(&self.config_options),
         })?;
 
-        if let ColumnarValue::Array(array) = &output {
-            if array.len() != batch.num_rows() {
-                // If the arguments are a non-empty slice of scalar values, we can assume that
-                // returning a one-element array is equivalent to returning a scalar.
-                let preserve_scalar =
-                    array.len() == 1 && !input_empty && input_all_scalar;
-                return if preserve_scalar {
-                    ScalarValue::try_from_array(array, 0).map(ColumnarValue::Scalar)
-                } else {
-                    internal_err!("UDF {} returned a different number of rows than expected. Expected: {}, Got: {}",
-                            self.name, batch.num_rows(), array.len())
-                };
-            }
+        if let ColumnarValue::Array(array) = &output
+            && array.len() != batch.num_rows()
+        {
+            // If the arguments are a non-empty slice of scalar values, we can assume that
+            // returning a one-element array is equivalent to returning a scalar.
+            let preserve_scalar = array.len() == 1 && !input_empty && input_all_scalar;
+            return if preserve_scalar {
+                ScalarValue::try_from_array(array, 0).map(ColumnarValue::Scalar)
+            } else {
+                internal_err!(
+                    "UDF {} returned a different number of rows than expected. Expected: {}, Got: {}",
+                    self.name,
+                    batch.num_rows(),
+                    array.len()
+                )
+            };
         }
         Ok(output)
     }
diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
new file mode 100644
index 0000000000000..65111b2911654
--- /dev/null
+++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Constant expression evaluation for the physical expression simplifier
+
+use std::sync::Arc;
+
+use arrow::array::new_null_array;
+use arrow::datatypes::{DataType, Field, Schema};
+use arrow::record_batch::RecordBatch;
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_physical_expr_common::physical_expr::is_volatile;
+
+use crate::PhysicalExpr;
+use crate::expressions::{Column, Literal};
+
+/// Simplify expressions that consist only of literals by evaluating them.
+///
+/// This function checks if all children of the given expression are literals.
+/// If so, it evaluates the expression against a dummy RecordBatch and returns
+/// the result as a new Literal.
+///
+/// # Example transformations
+/// - `1 + 2` -> `3`
+/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
+/// - `'hello' || ' world'` -> `'hello world'`
+pub fn simplify_const_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+    if is_volatile(expr) || has_column_references(expr) {
+        return Ok(Transformed::no(Arc::clone(expr)));
+    }
+
+    // Create a 1-row dummy batch for evaluation
+    let batch = create_dummy_batch()?;
+
+    // Evaluate the expression
+    match expr.evaluate(&batch) {
+        Ok(ColumnarValue::Scalar(scalar)) => {
+            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
+        }
+        Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
+            // Some operations return an array even for scalar inputs
+            let scalar = ScalarValue::try_from_array(&arr, 0)?;
+            Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
+        }
+        Ok(_) => {
+            // Unexpected result - keep original expression
+            Ok(Transformed::no(Arc::clone(expr)))
+        }
+        Err(_) => {
+            // On error, keep original expression
+            // The expression might succeed at runtime due to short-circuit evaluation
+            // or other runtime conditions
+            Ok(Transformed::no(Arc::clone(expr)))
+        }
+    }
+}
+
+/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
+///
+/// The batch is never actually accessed for data - it's just needed because
+/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
+/// that only contain literals, the batch content is irrelevant.
+///
+/// This is the same approach used in the logical expression `ConstEvaluator`.
+fn create_dummy_batch() -> Result<RecordBatch> {
+    // RecordBatch requires at least one column
+    let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
+    let col = new_null_array(&DataType::Null, 1);
+    Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
+}
+
+/// Check if this expression has any column references.
+pub fn has_column_references(expr: &Arc<dyn PhysicalExpr>) -> bool {
+    let mut has_columns = false;
+    expr.apply(|expr| {
+        if expr.as_any().downcast_ref::<Column>().is_some() {
+            has_columns = true;
+            Ok(TreeNodeRecursion::Stop)
+        } else {
+            Ok(TreeNodeRecursion::Continue)
+        }
+    })
+    .expect("apply should not fail");
+    has_columns
+}
diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs
index 80d6ee0a7b914..97395f4fe8a27 100644
--- a/datafusion/physical-expr/src/simplifier/mod.rs
+++ b/datafusion/physical-expr/src/simplifier/mod.rs
@@ -18,16 +18,17 @@
 //! Simplifier for Physical Expressions
 
 use arrow::datatypes::Schema;
-use datafusion_common::{
-    tree_node::{Transformed, TreeNode, TreeNodeRewriter},
-    Result,
-};
+use datafusion_common::{Result, tree_node::TreeNode};
 use std::sync::Arc;
 
-use crate::PhysicalExpr;
+use crate::{PhysicalExpr, simplifier::not::simplify_not_expr};
 
+pub mod const_evaluator;
+pub mod not;
 pub mod unwrap_cast;
 
+const MAX_LOOP_COUNT: usize = 5;
+
 /// Simplifies physical expressions by applying various optimizations
 ///
 /// This can be useful after adapting expressions from a table schema
@@ -44,36 +45,50 @@ impl<'a> PhysicalExprSimplifier<'a> {
     }
 
     /// Simplify a physical expression
-    pub fn simplify(
-        &mut self,
-        expr: Arc<dyn PhysicalExpr>,
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        Ok(expr.rewrite(self)?.data)
-    }
-}
+    pub fn simplify(&self, expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
+        let mut current_expr = expr;
+        let mut count = 0;
+        let schema = self.schema;
 
-impl<'a> TreeNodeRewriter for PhysicalExprSimplifier<'a> {
-    type Node = Arc<dyn PhysicalExpr>;
+        while count < MAX_LOOP_COUNT {
+            count += 1;
+            let result = current_expr.transform(|node| {
+                #[cfg(test)]
+                let original_type = node.data_type(schema).unwrap();
 
-    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
-        // Apply unwrap cast optimization
-        #[cfg(test)]
-        let original_type = node.data_type(self.schema).unwrap();
-        let unwrapped = unwrap_cast::unwrap_cast_in_comparison(node, self.schema)?;
-        #[cfg(test)]
-        assert_eq!(
-            unwrapped.data.data_type(self.schema).unwrap(),
-            original_type,
-            "Simplified expression should have the same data type as the original"
-        );
-        Ok(unwrapped)
+                // Apply NOT expression simplification first, then unwrap cast optimization,
+                // then constant expression evaluation
+                let rewritten = simplify_not_expr(&node, schema)?
+                    .transform_data(|node| {
+                        unwrap_cast::unwrap_cast_in_comparison(node, schema)
+                    })?
+                    .transform_data(|node| const_evaluator::simplify_const_expr(&node))?;
+
+                #[cfg(test)]
+                assert_eq!(
+                    rewritten.data.data_type(schema).unwrap(),
+                    original_type,
+                    "Simplified expression should have the same data type as the original"
+                );
+
+                Ok(rewritten)
+            })?;
+
+            if !result.transformed {
+                return Ok(result.data);
+            }
+            current_expr = result.data;
+        }
+        Ok(current_expr)
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{col, lit, BinaryExpr, CastExpr, Literal, TryCastExpr};
+    use crate::expressions::{
+        BinaryExpr, CastExpr, Literal, NotExpr, TryCastExpr, col, in_list, lit,
+    };
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::ScalarValue;
     use datafusion_expr::Operator;
@@ -86,10 +101,45 @@ mod tests {
         ])
     }
 
+    fn not_test_schema() -> Schema {
+        Schema::new(vec![
+            Field::new("a", DataType::Boolean, false),
+            Field::new("b", DataType::Boolean, false),
+            Field::new("c", DataType::Int32, false),
+        ])
+    }
+
+    /// Helper function to extract a Literal from a PhysicalExpr
+    fn as_literal(expr: &Arc<dyn PhysicalExpr>) -> &Literal {
+        expr.as_any()
+            .downcast_ref::<Literal>()
+            .unwrap_or_else(|| panic!("Expected Literal, got: {expr}"))
+    }
+
+    /// Helper function to extract a BinaryExpr from a PhysicalExpr
+    fn as_binary(expr: &Arc<dyn PhysicalExpr>) -> &BinaryExpr {
+        expr.as_any()
+            .downcast_ref::<BinaryExpr>()
+            .unwrap_or_else(|| panic!("Expected BinaryExpr, got: {expr}"))
+    }
+
+    /// Assert that simplifying `input` produces `expected`
+    fn assert_not_simplify(
+        simplifier: &PhysicalExprSimplifier,
+        input: Arc<dyn PhysicalExpr>,
+        expected: Arc<dyn PhysicalExpr>,
+    ) {
+        let result = simplifier.simplify(Arc::clone(&input)).unwrap();
+        assert_eq!(
+            &result, &expected,
+            "Simplification should transform:\n  input: {input}\n  to:    {expected}\n  got:   {result}"
+        );
+    }
+
     #[test]
     fn test_simplify() {
         let schema = test_schema();
-        let mut simplifier = PhysicalExprSimplifier::new(&schema);
+        let simplifier = PhysicalExprSimplifier::new(&schema);
 
         // Create: cast(c2 as INT32) != INT32(99)
         let column_expr = col("c2", &schema).unwrap();
@@ -101,7 +151,7 @@ mod tests {
         // Apply full simplification (uses TreeNodeRewriter)
         let optimized = simplifier.simplify(binary_expr).unwrap();
 
-        let optimized_binary = optimized.as_any().downcast_ref::<BinaryExpr>().unwrap();
+        let optimized_binary = as_binary(&optimized);
 
         // Should be optimized to: c2 != INT64(99) (c2 is INT64, literal cast to match)
         let left_expr = optimized_binary.left();
@@ -109,18 +159,14 @@ mod tests {
             left_expr.as_any().downcast_ref::<CastExpr>().is_none()
                 && left_expr.as_any().downcast_ref::<TryCastExpr>().is_none()
         );
-        let right_literal = optimized_binary
-            .right()
-            .as_any()
-            .downcast_ref::<Literal>()
-            .unwrap();
+        let right_literal = as_literal(optimized_binary.right());
         assert_eq!(right_literal.value(), &ScalarValue::Int64(Some(99)));
     }
 
     #[test]
     fn test_nested_expression_simplification() {
         let schema = test_schema();
-        let mut simplifier = PhysicalExprSimplifier::new(&schema);
+        let simplifier = PhysicalExprSimplifier::new(&schema);
 
         // Create nested expression: (cast(c1 as INT64) > INT64(5)) OR (cast(c2 as INT32) <= INT32(10))
         let c1_expr = col("c1", &schema).unwrap();
@@ -138,14 +184,10 @@ mod tests {
         // Apply simplification
         let optimized = simplifier.simplify(or_expr).unwrap();
 
-        let or_binary = optimized.as_any().downcast_ref::<BinaryExpr>().unwrap();
+        let or_binary = as_binary(&optimized);
 
         // Verify left side: c1 > INT32(5)
-        let left_binary = or_binary
-            .left()
-            .as_any()
-            .downcast_ref::<BinaryExpr>()
-            .unwrap();
+        let left_binary = as_binary(or_binary.left());
         let left_left_expr = left_binary.left();
         assert!(
             left_left_expr.as_any().downcast_ref::<CastExpr>().is_none()
@@ -154,19 +196,11 @@ mod tests {
                     .downcast_ref::<TryCastExpr>()
                     .is_none()
         );
-        let left_literal = left_binary
-            .right()
-            .as_any()
-            .downcast_ref::<Literal>()
-            .unwrap();
+        let left_literal = as_literal(left_binary.right());
         assert_eq!(left_literal.value(), &ScalarValue::Int32(Some(5)));
 
         // Verify right side: c2 <= INT64(10)
-        let right_binary = or_binary
-            .right()
-            .as_any()
-            .downcast_ref::<BinaryExpr>()
-            .unwrap();
+        let right_binary = as_binary(or_binary.right());
         let right_left_expr = right_binary.left();
         assert!(
             right_left_expr
@@ -178,11 +212,403 @@ mod tests {
                     .downcast_ref::<TryCastExpr>()
                     .is_none()
         );
-        let right_literal = right_binary
-            .right()
-            .as_any()
-            .downcast_ref::<Literal>()
-            .unwrap();
+        let right_literal = as_literal(right_binary.right());
         assert_eq!(right_literal.value(), &ScalarValue::Int64(Some(10)));
     }
+
+    #[test]
+    fn test_double_negation_elimination() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(NOT(c > 5)) -> c > 5
+        let inner_expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::Gt,
+            lit(ScalarValue::Int32(Some(5))),
+        ));
+        let inner_not = Arc::new(NotExpr::new(Arc::clone(&inner_expr)));
+        let double_not: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(inner_not));
+
+        let expected = inner_expr;
+        assert_not_simplify(&simplifier, double_not, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_not_literal() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(TRUE) -> FALSE
+        let not_true = Arc::new(NotExpr::new(lit(ScalarValue::Boolean(Some(true)))));
+        let expected = lit(ScalarValue::Boolean(Some(false)));
+        assert_not_simplify(&simplifier, not_true, expected);
+
+        // NOT(FALSE) -> TRUE
+        let not_false = Arc::new(NotExpr::new(lit(ScalarValue::Boolean(Some(false)))));
+        let expected = lit(ScalarValue::Boolean(Some(true)));
+        assert_not_simplify(&simplifier, not_false, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_negate_comparison() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(c = 5) -> c != 5
+        let not_eq = Arc::new(NotExpr::new(Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::Eq,
+            lit(ScalarValue::Int32(Some(5))),
+        ))));
+        let expected = Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::NotEq,
+            lit(ScalarValue::Int32(Some(5))),
+        ));
+        assert_not_simplify(&simplifier, not_eq, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_demorgans_law_and() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(a AND b) -> NOT a OR NOT b
+        let and_expr = Arc::new(BinaryExpr::new(
+            col("a", &schema)?,
+            Operator::And,
+            col("b", &schema)?,
+        ));
+        let not_and: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(and_expr));
+
+        let expected: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(NotExpr::new(col("a", &schema)?)),
+            Operator::Or,
+            Arc::new(NotExpr::new(col("b", &schema)?)),
+        ));
+        assert_not_simplify(&simplifier, not_and, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_demorgans_law_or() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(a OR b) -> NOT a AND NOT b
+        let or_expr = Arc::new(BinaryExpr::new(
+            col("a", &schema)?,
+            Operator::Or,
+            col("b", &schema)?,
+        ));
+        let not_or: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(or_expr));
+
+        let expected: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(NotExpr::new(col("a", &schema)?)),
+            Operator::And,
+            Arc::new(NotExpr::new(col("b", &schema)?)),
+        ));
+        assert_not_simplify(&simplifier, not_or, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_demorgans_with_comparison_simplification() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(c = 1 AND c = 2) -> c != 1 OR c != 2
+        let eq1 = Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::Eq,
+            lit(ScalarValue::Int32(Some(1))),
+        ));
+        let eq2 = Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::Eq,
+            lit(ScalarValue::Int32(Some(2))),
+        ));
+        let and_expr = Arc::new(BinaryExpr::new(eq1, Operator::And, eq2));
+        let not_and: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(and_expr));
+
+        let expected: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(BinaryExpr::new(
+                col("c", &schema)?,
+                Operator::NotEq,
+                lit(ScalarValue::Int32(Some(1))),
+            )),
+            Operator::Or,
+            Arc::new(BinaryExpr::new(
+                col("c", &schema)?,
+                Operator::NotEq,
+                lit(ScalarValue::Int32(Some(2))),
+            )),
+        ));
+        assert_not_simplify(&simplifier, not_and, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_not_of_not_and_not() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(NOT(a) AND NOT(b)) -> a OR b
+        let not_a = Arc::new(NotExpr::new(col("a", &schema)?));
+        let not_b = Arc::new(NotExpr::new(col("b", &schema)?));
+        let and_expr = Arc::new(BinaryExpr::new(not_a, Operator::And, not_b));
+        let not_and: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(and_expr));
+
+        let expected: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            col("a", &schema)?,
+            Operator::Or,
+            col("b", &schema)?,
+        ));
+        assert_not_simplify(&simplifier, not_and, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_not_in_list() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(c IN (1, 2, 3)) -> c NOT IN (1, 2, 3)
+        let list = vec![
+            lit(ScalarValue::Int32(Some(1))),
+            lit(ScalarValue::Int32(Some(2))),
+            lit(ScalarValue::Int32(Some(3))),
+        ];
+        let in_list_expr = in_list(col("c", &schema)?, list.clone(), &false, &schema)?;
+        let not_in: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(in_list_expr));
+
+        let expected = in_list(col("c", &schema)?, list, &true, &schema)?;
+        assert_not_simplify(&simplifier, not_in, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_not_not_in_list() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(c NOT IN (1, 2, 3)) -> c IN (1, 2, 3)
+        let list = vec![
+            lit(ScalarValue::Int32(Some(1))),
+            lit(ScalarValue::Int32(Some(2))),
+            lit(ScalarValue::Int32(Some(3))),
+        ];
+        let not_in_list_expr = in_list(col("c", &schema)?, list.clone(), &true, &schema)?;
+        let not_not_in: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(not_in_list_expr));
+
+        let expected = in_list(col("c", &schema)?, list, &false, &schema)?;
+        assert_not_simplify(&simplifier, not_not_in, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_double_not_in_list() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // NOT(NOT(c IN (1, 2, 3))) -> c IN (1, 2, 3)
+        let list = vec![
+            lit(ScalarValue::Int32(Some(1))),
+            lit(ScalarValue::Int32(Some(2))),
+            lit(ScalarValue::Int32(Some(3))),
+        ];
+        let in_list_expr = in_list(col("c", &schema)?, list.clone(), &false, &schema)?;
+        let not_in = Arc::new(NotExpr::new(in_list_expr));
+        let double_not: Arc<dyn PhysicalExpr> = Arc::new(NotExpr::new(not_in));
+
+        let expected = in_list(col("c", &schema)?, list, &false, &schema)?;
+        assert_not_simplify(&simplifier, double_not, expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_deeply_nested_not() -> Result<()> {
+        let schema = not_test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // Create a deeply nested NOT expression: NOT(NOT(NOT(...NOT(c > 5)...)))
+        // This tests that we don't get stack overflow with many nested NOTs.
+        // With recursive_protection enabled (default), this should work by
+        // automatically growing the stack as needed.
+        let inner_expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            col("c", &schema)?,
+            Operator::Gt,
+            lit(ScalarValue::Int32(Some(5))),
+        ));
+
+        let mut expr = Arc::clone(&inner_expr);
+        // Create 200 layers of NOT to test deep recursion handling
+        for _ in 0..200 {
+            expr = Arc::new(NotExpr::new(expr));
+        }
+
+        // With 200 NOTs (even number), should simplify back to the original expression
+        let expected = inner_expr;
+        assert_not_simplify(&simplifier, Arc::clone(&expr), expected);
+
+        // Manually dismantle the deep input expression to avoid Stack Overflow on Drop
+        // If we just let `expr` go out of scope, Rust's recursive Drop will blow the stack
+        // even with recursive_protection, because Drop doesn't use the #[recursive] attribute.
+        // We peel off layers one by one to avoid deep recursion in Drop.
+        while let Some(not_expr) = expr.as_any().downcast_ref::<NotExpr>() {
+            // Clone the child (Arc increment).
+            // Now child has 2 refs: one in parent, one in `child`.
+            let child = Arc::clone(not_expr.arg());
+
+            // Reassign `expr` to `child`.
+            // This drops the old `expr` (Parent).
+            // Parent refcount -> 0, Parent is dropped.
+            // Parent drops its reference to Child.
+            // Child refcount decrements 2 -> 1.
+            // Child is NOT dropped recursively because we still hold it in `expr`
+            expr = child;
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_simplify_literal_binary_expr() {
+        let schema = Schema::empty();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // 1 + 2 -> 3
+        let expr: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(lit(1i32), Operator::Plus, lit(2i32)));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(literal.value(), &ScalarValue::Int32(Some(3)));
+    }
+
+    #[test]
+    fn test_simplify_literal_comparison() {
+        let schema = Schema::empty();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // 5 > 3 -> true
+        let expr: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(lit(5i32), Operator::Gt, lit(3i32)));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(literal.value(), &ScalarValue::Boolean(Some(true)));
+
+        // 2 > 3 -> false
+        let expr: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(lit(2i32), Operator::Gt, lit(3i32)));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(literal.value(), &ScalarValue::Boolean(Some(false)));
+    }
+
+    #[test]
+    fn test_simplify_nested_literal_expr() {
+        let schema = Schema::empty();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // (1 + 2) * 3 -> 9
+        let inner: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(lit(1i32), Operator::Plus, lit(2i32)));
+        let expr: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(inner, Operator::Multiply, lit(3i32)));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(literal.value(), &ScalarValue::Int32(Some(9)));
+    }
+
+    #[test]
+    fn test_simplify_deeply_nested_literals() {
+        let schema = Schema::empty();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // ((1 + 2) * 3) + ((4 - 1) * 2) -> 9 + 6 -> 15
+        let left: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(BinaryExpr::new(lit(1i32), Operator::Plus, lit(2i32))),
+            Operator::Multiply,
+            lit(3i32),
+        ));
+        let right: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            Arc::new(BinaryExpr::new(lit(4i32), Operator::Minus, lit(1i32))),
+            Operator::Multiply,
+            lit(2i32),
+        ));
+        let expr: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(left, Operator::Plus, right));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(literal.value(), &ScalarValue::Int32(Some(15)));
+    }
+
+    #[test]
+    fn test_no_simplify_with_column() {
+        let schema = test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // c1 + 2 should NOT be simplified (has column reference)
+        let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            col("c1", &schema).unwrap(),
+            Operator::Plus,
+            lit(2i32),
+        ));
+        let result = simplifier.simplify(expr).unwrap();
+        // Should remain a BinaryExpr, not become a Literal
+        assert!(result.as_any().downcast_ref::<BinaryExpr>().is_some());
+    }
+
+    #[test]
+    fn test_partial_simplify_with_column() {
+        let schema = test_schema();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // (1 + 2) + c1 should simplify the literal part: 3 + c1
+        let literal_part: Arc<dyn PhysicalExpr> =
+            Arc::new(BinaryExpr::new(lit(1i32), Operator::Plus, lit(2i32)));
+        let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            literal_part,
+            Operator::Plus,
+            col("c1", &schema).unwrap(),
+        ));
+        let result = simplifier.simplify(expr).unwrap();
+
+        // Should be a BinaryExpr with a Literal(3) on the left
+        let binary = as_binary(&result);
+        let left_literal = as_literal(binary.left());
+        assert_eq!(left_literal.value(), &ScalarValue::Int32(Some(3)));
+    }
+
+    #[test]
+    fn test_simplify_literal_string_concat() {
+        let schema = Schema::empty();
+        let simplifier = PhysicalExprSimplifier::new(&schema);
+
+        // 'hello' || ' world' -> 'hello world'
+        let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
+            lit("hello"),
+            Operator::StringConcat,
+            lit(" world"),
+        ));
+        let result = simplifier.simplify(expr).unwrap();
+        let literal = as_literal(&result);
+        assert_eq!(
+            literal.value(),
+            &ScalarValue::Utf8(Some("hello world".to_string()))
+        );
+    }
 }
diff --git a/datafusion/physical-expr/src/simplifier/not.rs b/datafusion/physical-expr/src/simplifier/not.rs
new file mode 100644
index 0000000000000..9b65d5cba95a5
--- /dev/null
+++ b/datafusion/physical-expr/src/simplifier/not.rs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Simplify NOT expressions in physical expressions
+//!
+//! This module provides optimizations for NOT expressions such as:
+//! - Double negation elimination: NOT(NOT(expr)) -> expr
+//! - NOT with binary comparisons: NOT(a = b) -> a != b
+//! - NOT with IN expressions: NOT(a IN (list)) -> a NOT IN (list)
+//! - De Morgan's laws: NOT(A AND B) -> NOT A OR NOT B
+//! - Constant folding: NOT(TRUE) -> FALSE, NOT(FALSE) -> TRUE
+//!
+//! This function is designed to work with TreeNodeRewriter's f_up traversal,
+//! which means children are already simplified when this function is called.
+//! The TreeNodeRewriter will automatically call this function repeatedly until
+//! no more transformations are possible.
+
+use std::sync::Arc;
+
+use arrow::datatypes::Schema;
+use datafusion_common::{Result, ScalarValue, tree_node::Transformed};
+use datafusion_expr::Operator;
+
+use crate::PhysicalExpr;
+use crate::expressions::{BinaryExpr, InListExpr, Literal, NotExpr, in_list, lit};
+
+/// Attempts to simplify NOT expressions by applying one level of transformation
+///
+/// This function applies a single simplification rule and returns. When used with
+/// TreeNodeRewriter, multiple passes will automatically be applied until no more
+/// transformations are possible.
+pub fn simplify_not_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+    schema: &Schema,
+) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+    // Check if this is a NOT expression
+    let not_expr = match expr.as_any().downcast_ref::<NotExpr>() {
+        Some(not_expr) => not_expr,
+        None => return Ok(Transformed::no(Arc::clone(expr))),
+    };
+
+    let inner_expr = not_expr.arg();
+
+    // Handle NOT(NOT(expr)) -> expr (double negation elimination)
+    if let Some(inner_not) = inner_expr.as_any().downcast_ref::<NotExpr>() {
+        return Ok(Transformed::yes(Arc::clone(inner_not.arg())));
+    }
+
+    // Handle NOT(literal) -> !literal
+    if let Some(literal) = inner_expr.as_any().downcast_ref::<Literal>() {
+        if let ScalarValue::Boolean(Some(val)) = literal.value() {
+            return Ok(Transformed::yes(lit(ScalarValue::Boolean(Some(!val)))));
+        }
+        if let ScalarValue::Boolean(None) = literal.value() {
+            return Ok(Transformed::yes(lit(ScalarValue::Boolean(None))));
+        }
+    }
+
+    // Handle NOT(IN list) -> NOT IN list
+    if let Some(in_list_expr) = inner_expr.as_any().downcast_ref::<InListExpr>() {
+        let negated = !in_list_expr.negated();
+        let new_in_list = in_list(
+            Arc::clone(in_list_expr.expr()),
+            in_list_expr.list().to_vec(),
+            &negated,
+            schema,
+        )?;
+        return Ok(Transformed::yes(new_in_list));
+    }
+
+    // Handle NOT(binary_expr)
+    if let Some(binary_expr) = inner_expr.as_any().downcast_ref::<BinaryExpr>() {
+        if let Some(negated_op) = binary_expr.op().negate() {
+            let new_binary = Arc::new(BinaryExpr::new(
+                Arc::clone(binary_expr.left()),
+                negated_op,
+                Arc::clone(binary_expr.right()),
+            ));
+            return Ok(Transformed::yes(new_binary));
+        }
+
+        // Handle De Morgan's laws for AND/OR
+        match binary_expr.op() {
+            Operator::And => {
+                // NOT(A AND B) -> NOT A OR NOT B
+                let not_left: Arc<dyn PhysicalExpr> =
+                    Arc::new(NotExpr::new(Arc::clone(binary_expr.left())));
+                let not_right: Arc<dyn PhysicalExpr> =
+                    Arc::new(NotExpr::new(Arc::clone(binary_expr.right())));
+                let new_binary =
+                    Arc::new(BinaryExpr::new(not_left, Operator::Or, not_right));
+                return Ok(Transformed::yes(new_binary));
+            }
+            Operator::Or => {
+                // NOT(A OR B) -> NOT A AND NOT B
+                let not_left: Arc<dyn PhysicalExpr> =
+                    Arc::new(NotExpr::new(Arc::clone(binary_expr.left())));
+                let not_right: Arc<dyn PhysicalExpr> =
+                    Arc::new(NotExpr::new(Arc::clone(binary_expr.right())));
+                let new_binary =
+                    Arc::new(BinaryExpr::new(not_left, Operator::And, not_right));
+                return Ok(Transformed::yes(new_binary));
+            }
+            _ => {}
+        }
+    }
+
+    // If no simplification possible, return the original expression
+    Ok(Transformed::no(Arc::clone(expr)))
+}
diff --git a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
index d409ce9cb5bf2..ae6da9c5e0dc5 100644
--- a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
+++ b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
@@ -35,14 +35,14 @@ use std::sync::Arc;
 
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::{
-    tree_node::{Transformed, TreeNode},
     Result, ScalarValue,
+    tree_node::{Transformed, TreeNode},
 };
 use datafusion_expr::Operator;
 use datafusion_expr_common::casts::try_cast_literal_to_type;
 
-use crate::expressions::{lit, BinaryExpr, CastExpr, Literal, TryCastExpr};
 use crate::PhysicalExpr;
+use crate::expressions::{BinaryExpr, CastExpr, Literal, TryCastExpr, lit};
 
 /// Attempts to unwrap casts in comparison expressions.
 pub(crate) fn unwrap_cast_in_comparison(
@@ -50,10 +50,10 @@ pub(crate) fn unwrap_cast_in_comparison(
     schema: &Schema,
 ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
     expr.transform_down(|e| {
-        if let Some(binary) = e.as_any().downcast_ref::<BinaryExpr>() {
-            if let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)? {
-                return Ok(Transformed::yes(unwrapped));
-            }
+        if let Some(binary) = e.as_any().downcast_ref::<BinaryExpr>()
+            && let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)?
+        {
+            return Ok(Transformed::yes(unwrapped));
         }
         Ok(Transformed::no(e))
     })
@@ -68,17 +68,15 @@ fn try_unwrap_cast_binary(
     if let (Some((inner_expr, _cast_type)), Some(literal)) = (
         extract_cast_info(binary.left()),
         binary.right().as_any().downcast_ref::<Literal>(),
-    ) {
-        if binary.op().supports_propagation() {
-            if let Some(unwrapped) = try_unwrap_cast_comparison(
-                Arc::clone(inner_expr),
-                literal.value(),
-                *binary.op(),
-                schema,
-            )? {
-                return Ok(Some(unwrapped));
-            }
-        }
+    ) && binary.op().supports_propagation()
+        && let Some(unwrapped) = try_unwrap_cast_comparison(
+            Arc::clone(inner_expr),
+            literal.value(),
+            *binary.op(),
+            schema,
+        )?
+    {
+        return Ok(Some(unwrapped));
     }
 
     // Case 2: literal op cast(right_expr)
@@ -87,17 +85,16 @@ fn try_unwrap_cast_binary(
         extract_cast_info(binary.right()),
     ) {
         // For literal op cast(expr), we need to swap the operator
-        if let Some(swapped_op) = binary.op().swap() {
-            if binary.op().supports_propagation() {
-                if let Some(unwrapped) = try_unwrap_cast_comparison(
-                    Arc::clone(inner_expr),
-                    literal.value(),
-                    swapped_op,
-                    schema,
-                )? {
-                    return Ok(Some(unwrapped));
-                }
-            }
+        if let Some(swapped_op) = binary.op().swap()
+            && binary.op().supports_propagation()
+            && let Some(unwrapped) = try_unwrap_cast_comparison(
+                Arc::clone(inner_expr),
+                literal.value(),
+                swapped_op,
+                schema,
+            )?
+        {
+            return Ok(Some(unwrapped));
         }
         // If the operator cannot be swapped, we skip this optimization case
         // but don't prevent other optimizations
diff --git a/datafusion/physical-expr/src/statistics/stats_solver.rs b/datafusion/physical-expr/src/statistics/stats_solver.rs
index ec58076caf3b1..5665f7d1bee41 100644
--- a/datafusion/physical-expr/src/statistics/stats_solver.rs
+++ b/datafusion/physical-expr/src/statistics/stats_solver.rs
@@ -20,18 +20,18 @@ use std::sync::Arc;
 use crate::expressions::Literal;
 use crate::intervals::cp_solver::PropagationResult;
 use crate::physical_expr::PhysicalExpr;
-use crate::utils::{build_dag, ExprTreeNode};
+use crate::utils::{ExprTreeNode, build_dag};
 
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::statistics::Distribution;
 use datafusion_expr_common::interval_arithmetic::Interval;
 
+use petgraph::Outgoing;
 use petgraph::adj::DefaultIx;
 use petgraph::prelude::Bfs;
 use petgraph::stable_graph::{NodeIndex, StableGraph};
 use petgraph::visit::DfsPostOrder;
-use petgraph::Outgoing;
 
 /// This object implements a directed acyclic expression graph (DAEG) that
 /// is used to compute statistics/distributions for expressions hierarchically.
@@ -156,7 +156,7 @@ impl ExprStatisticsGraph {
                 // If the given statistics enable us to obtain a more precise
                 // range for the root, update it:
                 let subset = root_range.contains(given_range)?;
-                self.graph[self.root].dist = if subset == Interval::CERTAINLY_TRUE {
+                self.graph[self.root].dist = if subset == Interval::TRUE {
                     // Given statistics is strictly more informative, use it as is:
                     given_stats
                 } else {
@@ -205,7 +205,7 @@ impl ExprStatisticsGraph {
 mod tests {
     use std::sync::Arc;
 
-    use crate::expressions::{binary, try_cast, Column};
+    use crate::expressions::{Column, binary, try_cast};
     use crate::intervals::cp_solver::PropagationResult;
     use crate::statistics::stats_solver::ExprStatisticsGraph;
 
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 8a57cc7b7c154..c4ce74fd3a573 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -19,7 +19,7 @@
 //! constant.
 
 use crate::utils::split_disjunction;
-use crate::{split_conjunction, PhysicalExpr};
+use crate::{PhysicalExpr, split_conjunction};
 use datafusion_common::{Column, HashMap, ScalarValue};
 use datafusion_expr::Operator;
 use std::collections::HashSet;
@@ -124,7 +124,7 @@ impl LiteralGuarantee {
             // for an `AND` conjunction to be true, all terms individually must be true
             .fold(GuaranteeBuilder::new(), |builder, expr| {
                 if let Some(cel) = ColOpLit::try_new(expr) {
-                    builder.aggregate_conjunct(cel)
+                    builder.aggregate_conjunct(&cel)
                 } else if let Some(inlist) = expr
                     .as_any()
                     .downcast_ref::<crate::expressions::InListExpr>()
@@ -292,7 +292,7 @@ impl<'a> GuaranteeBuilder<'a> {
     /// # Examples
     /// * `AND (a = 1)`: `a` is guaranteed to be 1
     /// * `AND (a != 1)`: a is guaranteed to not be 1
-    fn aggregate_conjunct(self, col_op_lit: ColOpLit<'a>) -> Self {
+    fn aggregate_conjunct(self, col_op_lit: &ColOpLit<'a>) -> Self {
         self.aggregate_multi_conjunct(
             col_op_lit.col,
             col_op_lit.guarantee,
@@ -550,7 +550,7 @@ mod test {
 
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use datafusion_expr::expr_fn::*;
-    use datafusion_expr::{lit, Expr};
+    use datafusion_expr::{Expr, lit};
 
     use itertools::Itertools;
 
diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs
index 745ae855efee2..cd476ee3b31a3 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -21,10 +21,10 @@ pub use guarantee::{Guarantee, LiteralGuarantee};
 use std::borrow::Borrow;
 use std::sync::Arc;
 
-use crate::expressions::{BinaryExpr, Column};
-use crate::tree_node::ExprContext;
 use crate::PhysicalExpr;
 use crate::PhysicalSortExpr;
+use crate::expressions::{BinaryExpr, Column};
+use crate::tree_node::ExprContext;
 
 use arrow::datatypes::Schema;
 use datafusion_common::tree_node::{
@@ -271,11 +271,11 @@ pub(crate) mod tests {
     use std::fmt::{Display, Formatter};
 
     use super::*;
-    use crate::expressions::{binary, cast, col, in_list, lit, Literal};
+    use crate::expressions::{Literal, binary, cast, col, in_list, lit};
 
     use arrow::array::{ArrayRef, Float32Array, Float64Array};
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::{exec_err, internal_datafusion_err, ScalarValue};
+    use datafusion_common::{ScalarValue, exec_err, internal_datafusion_err};
     use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
     use datafusion_expr::{
         ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 2ed9770902d58..1ff13d107c036 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 
 use crate::aggregate::AggregateFunctionExpr;
 use crate::window::standard::add_new_ordering_expr_with_partition_by;
-use crate::window::window_expr::{filter_array, AggregateWindowExpr, WindowFn};
+use crate::window::window_expr::{AggregateWindowExpr, WindowFn, filter_array};
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr,
 };
@@ -33,7 +33,7 @@ use arrow::array::ArrayRef;
 use arrow::array::BooleanArray;
 use arrow::datatypes::FieldRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{exec_datafusion_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_datafusion_err};
 use datafusion_expr::{Accumulator, WindowFrame, WindowFrameBound, WindowFrameUnits};
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs
index f93b13fef4dff..a71df3ec88472 100644
--- a/datafusion/physical-expr/src/window/sliding_aggregate.rs
+++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs
@@ -22,11 +22,11 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use crate::aggregate::AggregateFunctionExpr;
-use crate::window::window_expr::{filter_array, AggregateWindowExpr, WindowFn};
+use crate::window::window_expr::{AggregateWindowExpr, WindowFn, filter_array};
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr,
 };
-use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
+use crate::{PhysicalExpr, expressions::PhysicalSortExpr};
 
 use arrow::array::{ArrayRef, BooleanArray};
 use arrow::datatypes::FieldRef;
diff --git a/datafusion/physical-expr/src/window/standard.rs b/datafusion/physical-expr/src/window/standard.rs
index e9e7f6abf6368..f8d92d5de4ad5 100644
--- a/datafusion/physical-expr/src/window/standard.rs
+++ b/datafusion/physical-expr/src/window/standard.rs
@@ -22,17 +22,17 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use super::{StandardWindowFunctionExpr, WindowExpr};
-use crate::window::window_expr::{get_orderby_values, WindowFn};
+use crate::window::window_expr::{WindowFn, get_orderby_values};
 use crate::window::{PartitionBatches, PartitionWindowAggStates, WindowState};
 use crate::{EquivalenceProperties, PhysicalExpr};
 
-use arrow::array::{new_empty_array, ArrayRef};
+use arrow::array::{ArrayRef, new_empty_array};
 use arrow::datatypes::FieldRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::utils::evaluate_partition_ranges;
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::window_state::{WindowAggState, WindowFrameContext};
 use datafusion_expr::WindowFrame;
+use datafusion_expr::window_state::{WindowAggState, WindowFrameContext};
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
 /// A window expr that takes the form of a [`StandardWindowFunctionExpr`].
diff --git a/datafusion/physical-expr/src/window/standard_window_function_expr.rs b/datafusion/physical-expr/src/window/standard_window_function_expr.rs
index ca7c3a4db3d4f..a6ea5e44a4997 100644
--- a/datafusion/physical-expr/src/window/standard_window_function_expr.rs
+++ b/datafusion/physical-expr/src/window/standard_window_function_expr.rs
@@ -23,18 +23,18 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::Result;
 use datafusion_expr::{LimitEffect, PartitionEvaluator};
 
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use std::any::Any;
 use std::sync::Arc;
 
 /// Evaluates a window function by instantiating a
-/// `[PartitionEvaluator]` for calculating the function's output in
+/// [`PartitionEvaluator`] for calculating the function's output in
 /// that partition.
 ///
 /// Note that unlike aggregation based window functions, some window
 /// functions such as `rank` ignore the values in the window frame,
 /// but others such as `first_value`, `last_value`, and
 /// `nth_value` need the value.
-#[allow(rustdoc::private_intra_doc_links)]
 pub trait StandardWindowFunctionExpr: Send + Sync + std::fmt::Debug {
     /// Returns the aggregate expression as [`Any`] so that it can be
     /// downcast to a specific implementation.
@@ -57,13 +57,7 @@ pub trait StandardWindowFunctionExpr: Send + Sync + std::fmt::Debug {
     ///
     /// Typically, the resulting vector is a single element vector.
     fn evaluate_args(&self, batch: &RecordBatch) -> Result<Vec<ArrayRef>> {
-        self.expressions()
-            .iter()
-            .map(|e| {
-                e.evaluate(batch)
-                    .and_then(|v| v.into_array(batch.num_rows()))
-            })
-            .collect()
+        evaluate_expressions_to_arrays(&self.expressions(), batch)
     }
 
     /// Create a [`PartitionEvaluator`] for evaluating the function on
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index a6b5bf1871161..0f0ec647a50ae 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -23,17 +23,16 @@ use std::sync::Arc;
 use crate::PhysicalExpr;
 
 use arrow::array::BooleanArray;
-use arrow::array::{new_empty_array, Array, ArrayRef};
+use arrow::array::{Array, ArrayRef, new_empty_array};
+use arrow::compute::SortOptions;
 use arrow::compute::filter as arrow_filter;
 use arrow::compute::kernels::sort::SortColumn;
-use arrow::compute::SortOptions;
 use arrow::datatypes::FieldRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::utils::compare_rows;
 use datafusion_common::{
-    arrow_datafusion_err, exec_datafusion_err, internal_err, DataFusionError, Result,
-    ScalarValue,
+    Result, ScalarValue, arrow_datafusion_err, exec_datafusion_err, internal_err,
 };
 use datafusion_expr::window_state::{
     PartitionBatchState, WindowAggState, WindowFrameContext, WindowFrameStateGroups,
@@ -41,6 +40,7 @@ use datafusion_expr::window_state::{
 use datafusion_expr::{Accumulator, PartitionEvaluator, WindowFrame, WindowFrameBound};
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use indexmap::IndexMap;
 
 /// Common trait for [window function] implementations
@@ -90,13 +90,7 @@ pub trait WindowExpr: Send + Sync + Debug {
     /// Evaluate the window function arguments against the batch and return
     /// array ref, normally the resulting `Vec` is a single element one.
     fn evaluate_args(&self, batch: &RecordBatch) -> Result<Vec<ArrayRef>> {
-        self.expressions()
-            .iter()
-            .map(|e| {
-                e.evaluate(batch)
-                    .and_then(|v| v.into_array(batch.num_rows()))
-            })
-            .collect()
+        evaluate_expressions_to_arrays(&self.expressions(), batch)
     }
 
     /// Evaluate the window function values against the batch
@@ -287,7 +281,7 @@ pub trait AggregateWindowExpr: WindowExpr {
     /// * `window_frame_ctx`: Details about the window frame (see [`WindowFrameContext`]).
     /// * `idx`: The index of the current row in the record batch.
     /// * `not_end`: is the current row not the end of the partition (see [`PartitionBatchState`]).
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     fn get_result_column(
         &self,
         accumulator: &mut Box<dyn Accumulator>,
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 672317060d902..cf3c15509c29a 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -16,15 +16,15 @@
 // under the License.
 
 //! Utilizing exact statistics from sources to avoid scanning data
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::scalar::ScalarValue;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
 use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs};
-use datafusion_physical_plan::{expressions, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, expressions};
 use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
@@ -34,7 +34,7 @@ use crate::PhysicalOptimizerRule;
 pub struct AggregateStatistics {}
 
 impl AggregateStatistics {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -42,6 +42,7 @@ impl AggregateStatistics {
 
 impl PhysicalOptimizerRule for AggregateStatistics {
     #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
+    #[expect(clippy::allow_attributes)] // See https://github.com/apache/datafusion/issues/18881#issuecomment-3621545670
     #[allow(clippy::only_used_in_recursion)] // See https://github.com/rust-lang/rust-clippy/issues/14566
     fn optimize(
         &self,
@@ -114,27 +115,23 @@ impl PhysicalOptimizerRule for AggregateStatistics {
 /// We would have preferred to return a casted ref to AggregateExec but the recursion requires
 /// the `ExecutionPlan.children()` method that returns an owned reference.
 fn take_optimizable(node: &dyn ExecutionPlan) -> Option<Arc<dyn ExecutionPlan>> {
-    if let Some(final_agg_exec) = node.as_any().downcast_ref::<AggregateExec>() {
-        if !final_agg_exec.mode().is_first_stage()
-            && final_agg_exec.group_expr().is_empty()
-        {
-            let mut child = Arc::clone(final_agg_exec.input());
-            loop {
-                if let Some(partial_agg_exec) =
-                    child.as_any().downcast_ref::<AggregateExec>()
-                {
-                    if partial_agg_exec.mode().is_first_stage()
-                        && partial_agg_exec.group_expr().is_empty()
-                        && partial_agg_exec.filter_expr().iter().all(|e| e.is_none())
-                    {
-                        return Some(child);
-                    }
-                }
-                if let [childrens_child] = child.children().as_slice() {
-                    child = Arc::clone(childrens_child);
-                } else {
-                    break;
-                }
+    if let Some(final_agg_exec) = node.as_any().downcast_ref::<AggregateExec>()
+        && !final_agg_exec.mode().is_first_stage()
+        && final_agg_exec.group_expr().is_empty()
+    {
+        let mut child = Arc::clone(final_agg_exec.input());
+        loop {
+            if let Some(partial_agg_exec) = child.as_any().downcast_ref::<AggregateExec>()
+                && partial_agg_exec.mode().is_first_stage()
+                && partial_agg_exec.group_expr().is_empty()
+                && partial_agg_exec.filter_expr().iter().all(|e| e.is_none())
+            {
+                return Some(child);
+            }
+            if let [childrens_child] = child.children().as_slice() {
+                child = Arc::clone(childrens_child);
+            } else {
+                break;
             }
         }
     }
diff --git a/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs b/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs
deleted file mode 100644
index 0b46c68f2daed..0000000000000
--- a/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::PhysicalOptimizerRule;
-use datafusion_common::config::ConfigOptions;
-use datafusion_common::internal_err;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_physical_plan::async_func::AsyncFuncExec;
-use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion_physical_plan::ExecutionPlan;
-use std::sync::Arc;
-
-/// Optimizer rule that introduces CoalesceAsyncExec to reduce the number of async executions.
-#[derive(Default, Debug)]
-pub struct CoalesceAsyncExecInput {}
-
-impl CoalesceAsyncExecInput {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self::default()
-    }
-}
-
-impl PhysicalOptimizerRule for CoalesceAsyncExecInput {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ConfigOptions,
-    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
-        let target_batch_size = config.execution.batch_size;
-        plan.transform(|plan| {
-            if let Some(async_exec) = plan.as_any().downcast_ref::<AsyncFuncExec>() {
-                if async_exec.children().len() != 1 {
-                    return internal_err!(
-                        "Expected AsyncFuncExec to have exactly one child"
-                    );
-                }
-                let child = Arc::clone(async_exec.children()[0]);
-                let coalesce_exec =
-                    Arc::new(CoalesceBatchesExec::new(child, target_batch_size));
-                let coalesce_async_exec = plan.with_new_children(vec![coalesce_exec])?;
-                Ok(Transformed::yes(coalesce_async_exec))
-            } else {
-                Ok(Transformed::no(plan))
-            }
-        })
-        .data()
-    }
-
-    fn name(&self) -> &str {
-        "coalesce_async_exec_input"
-    }
-
-    fn schema_check(&self) -> bool {
-        true
-    }
-}
diff --git a/datafusion/physical-optimizer/src/coalesce_batches.rs b/datafusion/physical-optimizer/src/coalesce_batches.rs
index 5cf2c877c61a4..bedb7f6be0493 100644
--- a/datafusion/physical-optimizer/src/coalesce_batches.rs
+++ b/datafusion/physical-optimizer/src/coalesce_batches.rs
@@ -22,12 +22,11 @@ use crate::PhysicalOptimizerRule;
 
 use std::sync::Arc;
 
+use datafusion_common::assert_eq_or_internal_err;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::error::Result;
-use datafusion_physical_expr::Partitioning;
 use datafusion_physical_plan::{
-    coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec,
-    repartition::RepartitionExec, ExecutionPlan,
+    ExecutionPlan, async_func::AsyncFuncExec, coalesce_batches::CoalesceBatchesExec,
 };
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
@@ -38,7 +37,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 pub struct CoalesceBatches {}
 
 impl CoalesceBatches {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self::default()
     }
@@ -56,27 +55,21 @@ impl PhysicalOptimizerRule for CoalesceBatches {
         let target_batch_size = config.execution.batch_size;
         plan.transform_up(|plan| {
             let plan_any = plan.as_any();
-            // The goal here is to detect operators that could produce small batches and only
-            // wrap those ones with a CoalesceBatchesExec operator. An alternate approach here
-            // would be to build the coalescing logic directly into the operators
-            // See https://github.com/apache/datafusion/issues/139
-            let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
-                || plan_any.downcast_ref::<HashJoinExec>().is_some()
-                // Don't need to add CoalesceBatchesExec after a round robin RepartitionExec
-                || plan_any
-                    .downcast_ref::<RepartitionExec>()
-                    .map(|repart_exec| {
-                        !matches!(
-                            repart_exec.partitioning().clone(),
-                            Partitioning::RoundRobinBatch(_)
-                        )
-                    })
-                    .unwrap_or(false);
-            if wrap_in_coalesce {
-                Ok(Transformed::yes(Arc::new(CoalesceBatchesExec::new(
-                    plan,
+            if let Some(async_exec) = plan_any.downcast_ref::<AsyncFuncExec>() {
+                // Coalesce inputs to async functions to reduce number of async function invocations
+                let children = async_exec.children();
+                assert_eq_or_internal_err!(
+                    children.len(),
+                    1,
+                    "Expected AsyncFuncExec to have exactly one child"
+                );
+
+                let coalesce_exec = Arc::new(CoalesceBatchesExec::new(
+                    Arc::clone(children[0]),
                     target_batch_size,
-                ))))
+                ));
+                let new_plan = plan.with_new_children(vec![coalesce_exec])?;
+                Ok(Transformed::yes(new_plan))
             } else {
                 Ok(Transformed::no(plan))
             }
diff --git a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
index bffb2c9df98ec..782e0754b7d27 100644
--- a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
+++ b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
@@ -21,16 +21,16 @@
 use std::sync::Arc;
 
 use datafusion_common::error::Result;
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
-use datafusion_physical_plan::ExecutionPlan;
 
 use crate::PhysicalOptimizerRule;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
-use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr};
+use datafusion_physical_expr::{PhysicalExpr, physical_exprs_equal};
 
 /// CombinePartialFinalAggregate optimizer rule combines the adjacent Partial and Final AggregateExecs
 /// into a Single AggregateExec if their grouping exprs and aggregate exprs equal.
@@ -40,7 +40,7 @@ use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr};
 pub struct CombinePartialFinalAggregate {}
 
 impl CombinePartialFinalAggregate {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs
index e9e28fec064ff..6120e1f3b5826 100644
--- a/datafusion/physical-optimizer/src/enforce_distribution.rs
+++ b/datafusion/physical-optimizer/src/enforce_distribution.rs
@@ -40,8 +40,9 @@ use datafusion_expr::logical_plan::JoinType;
 use datafusion_physical_expr::expressions::{Column, NoOp};
 use datafusion_physical_expr::utils::map_columns_before_projection;
 use datafusion_physical_expr::{
-    physical_exprs_equal, EquivalenceProperties, PhysicalExpr, PhysicalExprRef,
+    EquivalenceProperties, PhysicalExpr, PhysicalExprRef, physical_exprs_equal,
 };
+use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
@@ -54,10 +55,9 @@ use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::tree_node::PlanContext;
-use datafusion_physical_plan::union::{can_interleave, InterleaveExec, UnionExec};
+use datafusion_physical_plan::union::{InterleaveExec, UnionExec, can_interleave};
 use datafusion_physical_plan::windows::WindowAggExec;
-use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
-use datafusion_physical_plan::ExecutionPlanProperties;
+use datafusion_physical_plan::windows::{BoundedWindowAggExec, get_best_fitting_window};
 use datafusion_physical_plan::{Distribution, ExecutionPlan, Partitioning};
 
 use itertools::izip;
@@ -183,7 +183,7 @@ use itertools::izip;
 pub struct EnforceDistribution {}
 
 impl EnforceDistribution {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -457,14 +457,14 @@ where
         positions,
     ) = try_reorder(join_key_pairs, parent_required, eq_properties);
 
-    if let Some(positions) = positions {
-        if !positions.is_empty() {
-            let new_join_on = new_join_conditions(&left_keys, &right_keys);
-            let new_sort_options = (0..sort_options.len())
-                .map(|idx| sort_options[positions[idx]])
-                .collect();
-            join_plan.plan = join_constructor((new_join_on, new_sort_options))?;
-        }
+    if let Some(positions) = positions
+        && !positions.is_empty()
+    {
+        let new_join_on = new_join_conditions(&left_keys, &right_keys);
+        let new_sort_options = (0..sort_options.len())
+            .map(|idx| sort_options[positions[idx]])
+            .collect();
+        join_plan.plan = join_constructor((new_join_on, new_sort_options))?;
     }
 
     join_plan.children[0].data = left_keys;
@@ -493,83 +493,75 @@ pub fn reorder_aggregate_keys(
     if parent_required.len() == output_exprs.len()
         && agg_exec.group_expr().null_expr().is_empty()
         && !physical_exprs_equal(&output_exprs, parent_required)
+        && let Some(positions) = expected_expr_positions(&output_exprs, parent_required)
+        && let Some(agg_exec) = agg_exec.input().as_any().downcast_ref::<AggregateExec>()
+        && matches!(agg_exec.mode(), &AggregateMode::Partial)
     {
-        if let Some(positions) = expected_expr_positions(&output_exprs, parent_required) {
-            if let Some(agg_exec) =
-                agg_exec.input().as_any().downcast_ref::<AggregateExec>()
-            {
-                if matches!(agg_exec.mode(), &AggregateMode::Partial) {
-                    let group_exprs = agg_exec.group_expr().expr();
-                    let new_group_exprs = positions
-                        .into_iter()
-                        .map(|idx| group_exprs[idx].clone())
-                        .collect();
-                    let partial_agg = Arc::new(AggregateExec::try_new(
-                        AggregateMode::Partial,
-                        PhysicalGroupBy::new_single(new_group_exprs),
-                        agg_exec.aggr_expr().to_vec(),
-                        agg_exec.filter_expr().to_vec(),
-                        Arc::clone(agg_exec.input()),
-                        Arc::clone(&agg_exec.input_schema),
-                    )?);
-                    // Build new group expressions that correspond to the output
-                    // of the "reordered" aggregator:
-                    let group_exprs = partial_agg.group_expr().expr();
-                    let new_group_by = PhysicalGroupBy::new_single(
-                        partial_agg
-                            .output_group_expr()
-                            .into_iter()
-                            .enumerate()
-                            .map(|(idx, expr)| (expr, group_exprs[idx].1.clone()))
-                            .collect(),
-                    );
-                    let new_final_agg = Arc::new(AggregateExec::try_new(
-                        AggregateMode::FinalPartitioned,
-                        new_group_by,
-                        agg_exec.aggr_expr().to_vec(),
-                        agg_exec.filter_expr().to_vec(),
-                        Arc::clone(&partial_agg) as _,
-                        agg_exec.input_schema(),
-                    )?);
-
-                    agg_node.plan = Arc::clone(&new_final_agg) as _;
-                    agg_node.data.clear();
-                    agg_node.children = vec![PlanWithKeyRequirements::new(
-                        partial_agg as _,
-                        vec![],
-                        agg_node.children.swap_remove(0).children,
-                    )];
-
-                    // Need to create a new projection to change the expr ordering back
-                    let agg_schema = new_final_agg.schema();
-                    let mut proj_exprs = output_columns
-                        .iter()
-                        .map(|col| {
-                            let name = col.name();
-                            let index = agg_schema.index_of(name)?;
-                            Ok(ProjectionExpr {
-                                expr: Arc::new(Column::new(name, index)) as _,
-                                alias: name.to_owned(),
-                            })
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    let agg_fields = agg_schema.fields();
-                    for (idx, field) in
-                        agg_fields.iter().enumerate().skip(output_columns.len())
-                    {
-                        let name = field.name();
-                        let plan = Arc::new(Column::new(name, idx)) as _;
-                        proj_exprs.push(ProjectionExpr {
-                            expr: plan,
-                            alias: name.clone(),
-                        })
-                    }
-                    return ProjectionExec::try_new(proj_exprs, new_final_agg).map(|p| {
-                        PlanWithKeyRequirements::new(Arc::new(p), vec![], vec![agg_node])
-                    });
-                }
-            }
+        let group_exprs = agg_exec.group_expr().expr();
+        let new_group_exprs = positions
+            .into_iter()
+            .map(|idx| group_exprs[idx].clone())
+            .collect();
+        let partial_agg = Arc::new(AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::new_single(new_group_exprs),
+            agg_exec.aggr_expr().to_vec(),
+            agg_exec.filter_expr().to_vec(),
+            Arc::clone(agg_exec.input()),
+            Arc::clone(&agg_exec.input_schema),
+        )?);
+        // Build new group expressions that correspond to the output
+        // of the "reordered" aggregator:
+        let group_exprs = partial_agg.group_expr().expr();
+        let new_group_by = PhysicalGroupBy::new_single(
+            partial_agg
+                .output_group_expr()
+                .into_iter()
+                .enumerate()
+                .map(|(idx, expr)| (expr, group_exprs[idx].1.clone()))
+                .collect(),
+        );
+        let new_final_agg = Arc::new(AggregateExec::try_new(
+            AggregateMode::FinalPartitioned,
+            new_group_by,
+            agg_exec.aggr_expr().to_vec(),
+            agg_exec.filter_expr().to_vec(),
+            Arc::clone(&partial_agg) as _,
+            agg_exec.input_schema(),
+        )?);
+
+        agg_node.plan = Arc::clone(&new_final_agg) as _;
+        agg_node.data.clear();
+        agg_node.children = vec![PlanWithKeyRequirements::new(
+            partial_agg as _,
+            vec![],
+            agg_node.children.swap_remove(0).children,
+        )];
+
+        // Need to create a new projection to change the expr ordering back
+        let agg_schema = new_final_agg.schema();
+        let mut proj_exprs = output_columns
+            .iter()
+            .map(|col| {
+                let name = col.name();
+                let index = agg_schema.index_of(name)?;
+                Ok(ProjectionExpr {
+                    expr: Arc::new(Column::new(name, index)) as _,
+                    alias: name.to_owned(),
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        let agg_fields = agg_schema.fields();
+        for (idx, field) in agg_fields.iter().enumerate().skip(output_columns.len()) {
+            let name = field.name();
+            let plan = Arc::new(Column::new(name, idx)) as _;
+            proj_exprs.push(ProjectionExpr {
+                expr: plan,
+                alias: name.clone(),
+            })
         }
+        return ProjectionExec::try_new(proj_exprs, new_final_agg)
+            .map(|p| PlanWithKeyRequirements::new(Arc::new(p), vec![], vec![agg_node]));
     }
     Ok(agg_node)
 }
@@ -673,27 +665,27 @@ pub fn reorder_join_keys_to_inputs(
             left.equivalence_properties(),
             right.equivalence_properties(),
         );
-        if let Some(positions) = positions {
-            if !positions.is_empty() {
-                let JoinKeyPairs {
-                    left_keys,
-                    right_keys,
-                } = join_keys;
-                let new_join_on = new_join_conditions(&left_keys, &right_keys);
-                let new_sort_options = (0..sort_options.len())
-                    .map(|idx| sort_options[positions[idx]])
-                    .collect();
-                return SortMergeJoinExec::try_new(
-                    Arc::clone(left),
-                    Arc::clone(right),
-                    new_join_on,
-                    filter.clone(),
-                    *join_type,
-                    new_sort_options,
-                    *null_equality,
-                )
-                .map(|smj| Arc::new(smj) as _);
-            }
+        if let Some(positions) = positions
+            && !positions.is_empty()
+        {
+            let JoinKeyPairs {
+                left_keys,
+                right_keys,
+            } = join_keys;
+            let new_join_on = new_join_conditions(&left_keys, &right_keys);
+            let new_sort_options = (0..sort_options.len())
+                .map(|idx| sort_options[positions[idx]])
+                .collect();
+            return SortMergeJoinExec::try_new(
+                Arc::clone(left),
+                Arc::clone(right),
+                new_join_on,
+                filter.clone(),
+                *join_type,
+                new_sort_options,
+                *null_equality,
+            )
+            .map(|smj| Arc::new(smj) as _);
         }
     }
     Ok(plan)
@@ -889,6 +881,8 @@ fn add_roundrobin_on_top(
 /// * `hash_exprs`: Stores Physical Exprs that are used during hashing.
 /// * `n_target`: desired target partition number, if partition number of the
 ///   current executor is less than this value. Partition number will be increased.
+/// * `allow_subset_satisfy_partitioning`: Whether to allow subset partitioning logic in satisfaction checks.
+///   Set to `false` for partitioned hash joins to ensure exact hash matching.
 ///
 /// # Returns
 ///
@@ -898,6 +892,7 @@ fn add_hash_on_top(
     input: DistributionContext,
     hash_exprs: Vec<Arc<dyn PhysicalExpr>>,
     n_target: usize,
+    allow_subset_satisfy_partitioning: bool,
 ) -> Result<DistributionContext> {
     // Early return if hash repartition is unnecessary
     // `RepartitionExec: partitioning=Hash([...], 1), input_partitions=1` is unnecessary.
@@ -906,15 +901,23 @@ fn add_hash_on_top(
     }
 
     let dist = Distribution::HashPartitioned(hash_exprs);
-    let satisfied = input
-        .plan
-        .output_partitioning()
-        .satisfy(&dist, input.plan.equivalence_properties());
+    let satisfaction = input.plan.output_partitioning().satisfaction(
+        &dist,
+        input.plan.equivalence_properties(),
+        allow_subset_satisfy_partitioning,
+    );
 
     // Add hash repartitioning when:
-    // - The hash distribution requirement is not satisfied, or
-    // - We can increase parallelism by adding hash partitioning.
-    if !satisfied || n_target > input.plan.output_partitioning().partition_count() {
+    // - When subset satisfaction is enabled (current >= threshold): only repartition if not satisfied
+    // - When below threshold (current < threshold): repartition if expressions don't match OR to increase parallelism
+    let needs_repartition = if allow_subset_satisfy_partitioning {
+        !satisfaction.is_satisfied()
+    } else {
+        !satisfaction.is_satisfied()
+            || n_target > input.plan.output_partitioning().partition_count()
+    };
+
+    if needs_repartition {
         // When there is an existing ordering, we preserve ordering during
         // repartition. This will be rolled back in the future if any of the
         // following conditions is true:
@@ -1045,14 +1048,13 @@ pub fn replace_order_preserving_variants(
         return Ok(context);
     } else if let Some(repartition) =
         context.plan.as_any().downcast_ref::<RepartitionExec>()
+        && repartition.preserve_order()
     {
-        if repartition.preserve_order() {
-            context.plan = Arc::new(RepartitionExec::try_new(
-                Arc::clone(&context.children[0].plan),
-                repartition.partitioning().clone(),
-            )?);
-            return Ok(context);
-        }
+        context.plan = Arc::new(RepartitionExec::try_new(
+            Arc::clone(&context.children[0].plan),
+            repartition.partitioning().clone(),
+        )?);
+        return Ok(context);
     }
 
     context.update_plan_from_children()
@@ -1184,6 +1186,7 @@ pub fn ensure_distribution(
     let should_use_estimates = config
         .execution
         .use_row_number_estimates_to_optimize_partitioning;
+    let subset_satisfaction_threshold = config.optimizer.subset_repartition_threshold;
     let unbounded_and_pipeline_friendly = dist_context.plan.boundedness().is_unbounded()
         && matches!(
             dist_context.plan.pipeline_behavior(),
@@ -1211,16 +1214,51 @@ pub fn ensure_distribution(
         )? {
             plan = updated_window;
         }
-    } else if let Some(exec) = plan.as_any().downcast_ref::<BoundedWindowAggExec>() {
-        if let Some(updated_window) = get_best_fitting_window(
+    } else if let Some(exec) = plan.as_any().downcast_ref::<BoundedWindowAggExec>()
+        && let Some(updated_window) = get_best_fitting_window(
             exec.window_expr(),
             exec.input(),
             &exec.partition_keys(),
-        )? {
-            plan = updated_window;
-        }
+        )?
+    {
+        plan = updated_window;
     };
 
+    // For joins in partitioned mode, we need exact hash matching between
+    // both sides, so subset partitioning logic must be disabled.
+    //
+    // Why: Different hash expressions produce different hash values, causing
+    // rows with the same join key to land in different partitions. Since
+    // partitioned joins match partition N left with partition N right, rows
+    // that should match may be in different partitions and miss each other.
+    //
+    // Example JOIN ON left.a = right.a:
+    //
+    // Left: Hash([a])
+    //  Partition 1: a=1
+    //  Partition 2: a=2
+    //
+    // Right: Hash([a, b])
+    //  Partition 1: (a=1, b=1) -> Same a=1
+    //  Partition 2: (a=2, b=2)
+    //  Partition 3: (a=1, b=2) -> Same a=1
+    //
+    // Partitioned join execution:
+    //  P1 left (a=1) joins P1 right (a=1, b=1) -> Match
+    //  P2 left (a=2) joins P2 right (a=2, b=2) -> Match
+    //  P3 left (empty) joins P3 right (a=1, b=2) -> Missing, errors
+    //
+    // The row (a=1, b=2) should match left.a=1 but they're in different
+    // partitions, causing panics.
+    //
+    // CollectLeft/CollectRight modes are safe because one side is collected
+    // to a single partition which eliminates partition-to-partition mapping.
+    let is_partitioned_join = plan
+        .as_any()
+        .downcast_ref::<HashJoinExec>()
+        .is_some_and(|join| matches!(join.mode, PartitionMode::Partitioned))
+        || plan.as_any().is::<SortMergeJoinExec>();
+
     let repartition_status_flags =
         get_repartition_requirement_status(&plan, batch_size, should_use_estimates)?;
     // This loop iterates over all the children to:
@@ -1246,12 +1284,23 @@ pub fn ensure_distribution(
                 hash_necessary,
             },
         )| {
+            let increases_partition_count =
+                child.plan.output_partitioning().partition_count() < target_partitions;
+
             let add_roundrobin = enable_round_robin
                 // Operator benefits from partitioning (e.g. filter):
                 && roundrobin_beneficial
                 && roundrobin_beneficial_stats
                 // Unless partitioning increases the partition count, it is not beneficial:
-                && child.plan.output_partitioning().partition_count() < target_partitions;
+                && increases_partition_count;
+
+            // Allow subset satisfaction when:
+            // 1. Current partition count >= threshold
+            // 2. Not a partitioned join since must use exact hash matching for joins
+            let current_partitions = child.plan.output_partitioning().partition_count();
+            let allow_subset_satisfy_partitioning = current_partitions
+                >= subset_satisfaction_threshold
+                && !is_partitioned_join;
 
             // When `repartition_file_scans` is set, attempt to increase
             // parallelism at the source.
@@ -1259,12 +1308,12 @@ pub fn ensure_distribution(
             // If repartitioning is not possible (a.k.a. None is returned from `ExecutionPlan::repartitioned`)
             // then no repartitioning will have occurred. As the default implementation returns None, it is only
             // specific physical plan nodes, such as certain datasources, which are repartitioned.
-            if repartition_file_scans && roundrobin_beneficial_stats {
-                if let Some(new_child) =
+            if repartition_file_scans
+                && roundrobin_beneficial_stats
+                && let Some(new_child) =
                     child.plan.repartitioned(target_partitions, config)?
-                {
-                    child.plan = new_child;
-                }
+            {
+                child.plan = new_child;
             }
 
             // Satisfy the distribution requirement if it is unmet.
@@ -1273,15 +1322,15 @@ pub fn ensure_distribution(
                     child = add_merge_on_top(child);
                 }
                 Distribution::HashPartitioned(exprs) => {
-                    if add_roundrobin {
-                        // Add round-robin repartitioning on top of the operator
-                        // to increase parallelism.
-                        child = add_roundrobin_on_top(child, target_partitions)?;
-                    }
+                    // See https://github.com/apache/datafusion/issues/18341#issuecomment-3503238325 for background
                     // When inserting hash is necessary to satisfy hash requirement, insert hash repartition.
                     if hash_necessary {
-                        child =
-                            add_hash_on_top(child, exprs.to_vec(), target_partitions)?;
+                        child = add_hash_on_top(
+                            child,
+                            exprs.to_vec(),
+                            target_partitions,
+                            allow_subset_satisfy_partitioning,
+                        )?;
                     }
                 }
                 Distribution::UnspecifiedDistribution => {
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
index 28d187bbf8930..a5fafb9e87e1d 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
@@ -40,23 +40,23 @@ pub mod sort_pushdown;
 
 use std::sync::Arc;
 
+use crate::PhysicalOptimizerRule;
 use crate::enforce_sorting::replace_with_order_preserving_variants::{
-    replace_with_order_preserving_variants, OrderPreservationContext,
+    OrderPreservationContext, replace_with_order_preserving_variants,
 };
 use crate::enforce_sorting::sort_pushdown::{
-    assign_initial_requirements, pushdown_sorts, SortPushDown,
+    SortPushDown, assign_initial_requirements, pushdown_sorts,
 };
 use crate::output_requirements::OutputRequirementExec;
 use crate::utils::{
     add_sort_above, add_sort_above_with_check, is_coalesce_partitions, is_limit,
-    is_repartition, is_sort, is_sort_preserving_merge, is_union, is_window,
+    is_repartition, is_sort, is_sort_preserving_merge, is_window,
 };
-use crate::PhysicalOptimizerRule;
 
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
 use datafusion_physical_expr::{Distribution, Partitioning};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
@@ -67,7 +67,7 @@ use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::tree_node::PlanContext;
 use datafusion_physical_plan::windows::{
-    get_best_fitting_window, BoundedWindowAggExec, WindowAggExec,
+    BoundedWindowAggExec, WindowAggExec, get_best_fitting_window,
 };
 use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties, InputOrderMode};
 
@@ -79,7 +79,7 @@ use itertools::izip;
 pub struct EnforceSorting {}
 
 impl EnforceSorting {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -516,10 +516,7 @@ pub fn ensure_sorting(
                 );
                 child = update_sort_ctx_children_data(child, true)?;
             }
-        } else if physical_ordering.is_none()
-            || !plan.maintains_input_order()[idx]
-            || is_union(plan)
-        {
+        } else if physical_ordering.is_none() || !plan.maintains_input_order()[idx] {
             // We have a `SortExec` whose effect may be neutralized by another
             // order-imposing operator, remove this sort:
             child = update_child_to_remove_unnecessary_sort(idx, child, plan)?;
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
index b536e7960208e..6ab84dc95eab9 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
@@ -27,13 +27,13 @@ use crate::utils::{
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::Transformed;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
+use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::execution_plan::EmissionType;
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::tree_node::PlanContext;
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 use itertools::izip;
 
@@ -139,16 +139,21 @@ pub fn plan_with_order_preserving_variants(
         if let Some(ordering) = child.output_ordering() {
             let mut fetch = fetch;
             if let Some(coalesce_fetch) = sort_input.plan.fetch() {
-                if let Some(sort_fetch) = fetch {
-                    if coalesce_fetch < sort_fetch {
-                        return internal_err!(
-                            "CoalescePartitionsExec fetch [{:?}] should be greater than or equal to SortExec fetch [{:?}]", coalesce_fetch, sort_fetch
+                fetch = match fetch {
+                    Some(sort_fetch) => {
+                        assert_or_internal_err!(
+                            coalesce_fetch >= sort_fetch,
+                            "CoalescePartitionsExec fetch [{:?}] should be greater than or equal to SortExec fetch [{:?}]",
+                            coalesce_fetch,
+                            sort_fetch
                         );
+                        Some(sort_fetch)
+                    }
+                    None => {
+                        // If the sort node does not have a fetch, we need to keep the coalesce node's fetch.
+                        Some(coalesce_fetch)
                     }
-                } else {
-                    // If the sort node does not have a fetch, we need to keep the coalesce node's fetch.
-                    fetch = Some(coalesce_fetch);
-                }
+                };
             };
             // When the input of a `CoalescePartitionsExec` has an ordering,
             // replace it with a `SortPreservingMergeExec` if appropriate:
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
index 6e4e784866129..698fdea8e766e 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
@@ -24,12 +24,12 @@ use crate::utils::{
 
 use arrow::datatypes::SchemaRef;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{internal_err, HashSet, JoinSide, Result};
+use datafusion_common::{HashSet, JoinSide, Result, internal_err};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{
-    add_offset_to_physical_sort_exprs, EquivalenceProperties,
+    EquivalenceProperties, add_offset_to_physical_sort_exprs,
 };
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortExpr,
@@ -38,7 +38,7 @@ use datafusion_physical_expr_common::sort_expr::{
 use datafusion_physical_plan::execution_plan::CardinalityEffect;
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::joins::utils::{
-    calculate_join_output_ordering, ColumnIndex,
+    ColumnIndex, calculate_join_output_ordering,
 };
 use datafusion_physical_plan::joins::{HashJoinExec, SortMergeJoinExec};
 use datafusion_physical_plan::projection::ProjectionExec;
@@ -383,7 +383,7 @@ fn pushdown_requirement_to_children(
     } else if let Some(hash_join) = plan.as_any().downcast_ref::<HashJoinExec>() {
         handle_hash_join(hash_join, parent_required)
     } else {
-        handle_custom_pushdown(plan, parent_required, maintains_input_order)
+        handle_custom_pushdown(plan, parent_required, &maintains_input_order)
     }
     // TODO: Add support for Projection push down
 }
@@ -604,7 +604,7 @@ fn expr_source_side(
 fn handle_custom_pushdown(
     plan: &Arc<dyn ExecutionPlan>,
     parent_required: OrderingRequirements,
-    maintains_input_order: Vec<bool>,
+    maintains_input_order: &[bool],
 ) -> Result<Option<Vec<Option<OrderingRequirements>>>> {
     // If the plan has no children, return early:
     if plan.children().is_empty() {
diff --git a/datafusion/physical-optimizer/src/ensure_coop.rs b/datafusion/physical-optimizer/src/ensure_coop.rs
index 0c0b63c0b3e79..dfa97fc840333 100644
--- a/datafusion/physical-optimizer/src/ensure_coop.rs
+++ b/datafusion/physical-optimizer/src/ensure_coop.rs
@@ -25,12 +25,12 @@ use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
 
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
-use datafusion_common::Result;
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::coop::CooperativeExec;
 use datafusion_physical_plan::execution_plan::{EvaluationType, SchedulingType};
-use datafusion_physical_plan::ExecutionPlan;
 
 /// `EnsureCooperative` is a [`PhysicalOptimizerRule`] that inspects the physical plan for
 /// sub plans that do not participate in cooperative scheduling. The plan is subdivided into sub
@@ -110,9 +110,9 @@ mod tests {
 
         let display = displayable(optimized.as_ref()).indent(true).to_string();
         // Use insta snapshot to ensure full plan structure
-        assert_snapshot!(display, @r###"
-            CooperativeExec
-              DataSourceExec: partitions=1, partition_sizes=[1]
-            "###);
+        assert_snapshot!(display, @r"
+        CooperativeExec
+          DataSourceExec: partitions=1, partition_sizes=[1]
+        ");
     }
 }
diff --git a/datafusion/physical-optimizer/src/filter_pushdown.rs b/datafusion/physical-optimizer/src/filter_pushdown.rs
index 5ee7023ff6ee2..28f8155002a50 100644
--- a/datafusion/physical-optimizer/src/filter_pushdown.rs
+++ b/datafusion/physical-optimizer/src/filter_pushdown.rs
@@ -36,16 +36,16 @@ use std::sync::Arc;
 use crate::PhysicalOptimizerRule;
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{config::ConfigOptions, internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err, config::ConfigOptions};
 use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::physical_expr::is_volatile;
 use datafusion_physical_plan::filter_pushdown::{
     ChildFilterPushdownResult, ChildPushdownResult, FilterPushdownPhase,
     FilterPushdownPropagation, PushedDown,
 };
-use datafusion_physical_plan::{with_new_children_if_necessary, ExecutionPlan};
+use datafusion_physical_plan::{ExecutionPlan, with_new_children_if_necessary};
 
-use itertools::{izip, Itertools};
+use itertools::{Itertools, izip};
 
 /// Attempts to recursively push given filters from the top of the tree into leaves.
 ///
@@ -422,7 +422,7 @@ impl PhysicalOptimizerRule for FilterPushdown {
         config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         Ok(
-            push_down_filters(Arc::clone(&plan), vec![], config, self.phase)?
+            push_down_filters(&Arc::clone(&plan), vec![], config, self.phase)?
                 .updated_node
                 .unwrap_or(plan),
         )
@@ -438,7 +438,7 @@ impl PhysicalOptimizerRule for FilterPushdown {
 }
 
 fn push_down_filters(
-    node: Arc<dyn ExecutionPlan>,
+    node: &Arc<dyn ExecutionPlan>,
     parent_predicates: Vec<Arc<dyn PhysicalExpr>>,
     config: &ConfigOptions,
     phase: FilterPushdownPhase,
@@ -461,22 +461,18 @@ fn push_down_filters(
 
     let filter_description_parent_filters = filter_description.parent_filters();
     let filter_description_self_filters = filter_description.self_filters();
-    if filter_description_parent_filters.len() != children.len() {
-        return internal_err!(
-            "Filter pushdown expected FilterDescription to have parent filters for {}, but got {} for node {}",
-            children.len(),
-            filter_description_parent_filters.len(),
-            node.name()
-        );
-    }
-    if filter_description_self_filters.len() != children.len() {
-        return internal_err!(
-            "Filter pushdown expected FilterDescription to have self filters for {}, but got {} for node {}",
-            children.len(),
-            filter_description_self_filters.len(),
-            node.name()
-        );
-    }
+    assert_eq_or_internal_err!(
+        filter_description_parent_filters.len(),
+        children.len(),
+        "Filter pushdown expected parent filters count to match number of children for node {}",
+        node.name()
+    );
+    assert_eq_or_internal_err!(
+        filter_description_self_filters.len(),
+        children.len(),
+        "Filter pushdown expected self filters count to match number of children for node {}",
+        node.name()
+    );
 
     for (child_idx, (child, parent_filters, self_filters)) in izip!(
         children,
@@ -510,7 +506,8 @@ fn push_down_filters(
         let num_parent_filters = all_predicates.len() - num_self_filters;
 
         // Any filters that could not be pushed down to a child are marked as not-supported to our parents
-        let result = push_down_filters(Arc::clone(child), all_predicates, config, phase)?;
+        let result =
+            push_down_filters(&Arc::clone(child), all_predicates, config, phase)?;
 
         if let Some(new_child) = result.updated_node {
             // If we have a filter pushdown result, we need to update our children
@@ -524,15 +521,12 @@ fn push_down_filters(
         // from our parents and filters that the current node injected. We need to de-entangle
         // this since we do need to distinguish between them.
         let mut all_filters = result.filters.into_iter().collect_vec();
-        if all_filters.len() != num_self_filters + num_parent_filters {
-            return internal_err!(
-                "Filter pushdown did not return the expected number of filters: expected {} self filters and {} parent filters, but got {}. Likely culprit is {}",
-                num_self_filters,
-                num_parent_filters,
-                all_filters.len(),
-                child.name()
-            );
-        }
+        assert_eq_or_internal_err!(
+            all_filters.len(),
+            num_self_filters + num_parent_filters,
+            "Filter pushdown did not return the expected number of filters from {}",
+            child.name()
+        );
         let parent_filters = all_filters
             .split_off(num_self_filters)
             .into_iter()
@@ -571,7 +565,7 @@ fn push_down_filters(
     }
 
     // Re-create this node with new children
-    let updated_node = with_new_children_if_necessary(Arc::clone(&node), new_children)?;
+    let updated_node = with_new_children_if_necessary(Arc::clone(node), new_children)?;
 
     // TODO: by calling `handle_child_pushdown_result` we are assuming that the
     // `ExecutionPlan` implementation will not change the plan itself.
@@ -596,7 +590,7 @@ fn push_down_filters(
     )?;
     // Compare pointers for new_node and node, if they are different we must replace
     // ourselves because of changes in our children.
-    if res.updated_node.is_none() && !Arc::ptr_eq(&updated_node, &node) {
+    if res.updated_node.is_none() && !Arc::ptr_eq(&updated_node, node) {
         res.updated_node = Some(updated_node)
     }
     Ok(res)
diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs
index b55c01f62e992..f837c79a4e391 100644
--- a/datafusion/physical-optimizer/src/join_selection.rs
+++ b/datafusion/physical-optimizer/src/join_selection.rs
@@ -27,10 +27,10 @@ use crate::PhysicalOptimizerRule;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::error::Result;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_err, JoinSide, JoinType};
+use datafusion_common::{JoinSide, JoinType, internal_err};
 use datafusion_expr_common::sort_properties::SortProperties;
-use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::LexOrdering;
+use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_plan::execution_plan::EmissionType;
 use datafusion_physical_plan::joins::utils::ColumnIndex;
 use datafusion_physical_plan::joins::{
@@ -47,7 +47,7 @@ use std::sync::Arc;
 pub struct JoinSelection {}
 
 impl JoinSelection {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -481,19 +481,15 @@ pub fn hash_join_swap_subrule(
     mut input: Arc<dyn ExecutionPlan>,
     _config_options: &ConfigOptions,
 ) -> Result<Arc<dyn ExecutionPlan>> {
-    if let Some(hash_join) = input.as_any().downcast_ref::<HashJoinExec>() {
-        if hash_join.left.boundedness().is_unbounded()
-            && !hash_join.right.boundedness().is_unbounded()
-            && matches!(
-                *hash_join.join_type(),
-                JoinType::Inner
-                    | JoinType::Left
-                    | JoinType::LeftSemi
-                    | JoinType::LeftAnti
-            )
-        {
-            input = swap_join_according_to_unboundedness(hash_join)?;
-        }
+    if let Some(hash_join) = input.as_any().downcast_ref::<HashJoinExec>()
+        && hash_join.left.boundedness().is_unbounded()
+        && !hash_join.right.boundedness().is_unbounded()
+        && matches!(
+            *hash_join.join_type(),
+            JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti
+        )
+    {
+        input = swap_join_according_to_unboundedness(hash_join)?;
     }
     Ok(input)
 }
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 79db43c1cbe94..1b45f02ebd511 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -23,9 +23,11 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 pub mod aggregate_statistics;
-pub mod coalesce_async_exec_input;
 pub mod coalesce_batches;
 pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
@@ -40,6 +42,7 @@ pub mod optimizer;
 pub mod output_requirements;
 pub mod projection_pushdown;
 pub use datafusion_pruning as pruning;
+pub mod pushdown_sort;
 pub mod sanity_checker;
 pub mod topk_aggregation;
 pub mod update_aggr_exprs;
diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs
index 7469c3af9344c..4cb3abe30bae2 100644
--- a/datafusion/physical-optimizer/src/limit_pushdown.rs
+++ b/datafusion/physical-optimizer/src/limit_pushdown.rs
@@ -53,7 +53,7 @@ pub struct GlobalRequirements {
 }
 
 impl LimitPushdown {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs
index 1c671cd074886..c23fa4faef95f 100644
--- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs
+++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs
@@ -16,9 +16,9 @@
 // under the License.
 
 use crate::PhysicalOptimizerRule;
+use datafusion_common::ScalarValue;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::ScalarValue;
 use datafusion_expr::{LimitEffect, WindowFrameBound, WindowFrameUnits};
 use datafusion_physical_expr::window::{
     PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr,
@@ -113,10 +113,10 @@ impl PhysicalOptimizerRule for LimitPushPastWindows {
             }
 
             // Apply the limit if we hit a sortpreservingmerge node
-            if phase == Phase::Apply {
-                if let Some(out) = apply_limit(&node, &mut ctx) {
-                    return Ok(out);
-                }
+            if phase == Phase::Apply
+                && let Some(out) = apply_limit(&node, &mut ctx)
+            {
+                return Ok(out);
             }
 
             // nodes along the way
diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
index 3666ff3798b67..671d247cf36a5 100644
--- a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
+++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
@@ -24,9 +24,9 @@ use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
 
 use crate::PhysicalOptimizerRule;
 use itertools::Itertools;
@@ -113,17 +113,15 @@ impl LimitedDistinctAggregation {
                 return Ok(Transformed::no(plan));
             }
             if let Some(aggr) = plan.as_any().downcast_ref::<AggregateExec>() {
-                if found_match_aggr {
-                    if let Some(parent_aggr) =
+                if found_match_aggr
+                    && let Some(parent_aggr) =
                         match_aggr.as_any().downcast_ref::<AggregateExec>()
-                    {
-                        if !parent_aggr.group_expr().eq(aggr.group_expr()) {
-                            // a partial and final aggregation with different groupings disqualifies
-                            // rewriting the child aggregation
-                            rewrite_applicable = false;
-                            return Ok(Transformed::no(plan));
-                        }
-                    }
+                    && !parent_aggr.group_expr().eq(aggr.group_expr())
+                {
+                    // a partial and final aggregation with different groupings disqualifies
+                    // rewriting the child aggregation
+                    rewrite_applicable = false;
+                    return Ok(Transformed::no(plan));
                 }
                 // either we run into an Aggregate and transform it, or disable the rewrite
                 // for subsequent children
diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs
index 4d00f1029db71..aa1975d98d48b 100644
--- a/datafusion/physical-optimizer/src/optimizer.rs
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -36,10 +36,10 @@ use crate::sanity_checker::SanityCheckPlan;
 use crate::topk_aggregation::TopKAggregation;
 use crate::update_aggr_exprs::OptimizeAggregateOrder;
 
-use crate::coalesce_async_exec_input::CoalesceAsyncExecInput;
 use crate::limit_pushdown_past_window::LimitPushPastWindows;
-use datafusion_common::config::ConfigOptions;
+use crate::pushdown_sort::PushdownSort;
 use datafusion_common::Result;
+use datafusion_common::config::ConfigOptions;
 use datafusion_physical_plan::ExecutionPlan;
 
 /// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which
@@ -123,7 +123,6 @@ impl PhysicalOptimizer {
             // The CoalesceBatches rule will not influence the distribution and ordering of the
             // whole plan tree. Therefore, to avoid influencing other rules, it should run last.
             Arc::new(CoalesceBatches::new()),
-            Arc::new(CoalesceAsyncExecInput::new()),
             // Remove the ancillary output requirement operator since we are done with the planning
             // phase.
             Arc::new(OutputRequirements::new_remove_mode()),
@@ -147,6 +146,8 @@ impl PhysicalOptimizer {
             // are not present, the load of executors such as join or union will be
             // reduced by narrowing their input tables.
             Arc::new(ProjectionPushdown::new()),
+            // PushdownSort: Detect sorts that can be pushed down to data sources.
+            Arc::new(PushdownSort::new()),
             Arc::new(EnsureCooperative::new()),
             // This FilterPushdown handles dynamic filters that may have references to the source ExecutionPlan.
             // Therefore it should be run at the end of the optimization process since any changes to the plan may break the dynamic filter's references.
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index 9e5e980219767..0dc6a25fbc0b7 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -34,7 +34,7 @@ use datafusion_physical_expr::Distribution;
 use datafusion_physical_expr_common::sort_expr::OrderingRequirements;
 use datafusion_physical_plan::execution_plan::Boundedness;
 use datafusion_physical_plan::projection::{
-    make_with_child, update_expr, update_ordering_requirement, ProjectionExec,
+    ProjectionExec, make_with_child, update_expr, update_ordering_requirement,
 };
 use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs
index 987e3cb6f713e..281d61aecf538 100644
--- a/datafusion/physical-optimizer/src/projection_pushdown.rs
+++ b/datafusion/physical-optimizer/src/projection_pushdown.rs
@@ -33,12 +33,12 @@ use datafusion_common::tree_node::{
 use datafusion_common::{JoinSide, JoinType, Result};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::joins::NestedLoopJoinExec;
+use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
 use datafusion_physical_plan::projection::{
-    remove_unnecessary_projections, ProjectionExec,
+    ProjectionExec, remove_unnecessary_projections,
 };
-use datafusion_physical_plan::ExecutionPlan;
 
 /// This rule inspects `ProjectionExec`'s in the given physical plan and tries to
 /// remove or swap with its child.
@@ -50,7 +50,7 @@ use datafusion_physical_plan::ExecutionPlan;
 pub struct ProjectionPushdown {}
 
 impl ProjectionPushdown {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -129,7 +129,7 @@ fn try_push_down_join_filter(
 
     let join_filter = minimize_join_filter(
         Arc::clone(rhs_rewrite.data.1.expression()),
-        rhs_rewrite.data.1.column_indices().to_vec(),
+        rhs_rewrite.data.1.column_indices(),
         lhs_rewrite.data.0.schema().as_ref(),
         rhs_rewrite.data.0.schema().as_ref(),
     );
@@ -238,7 +238,7 @@ fn try_push_down_projection(
 /// columns are not needed anymore.
 fn minimize_join_filter(
     expr: Arc<dyn PhysicalExpr>,
-    old_column_indices: Vec<ColumnIndex>,
+    old_column_indices: &[ColumnIndex],
     lhs_schema: &Schema,
     rhs_schema: &Schema,
 ) -> JoinFilter {
@@ -449,8 +449,8 @@ mod test {
     use arrow::datatypes::{DataType, Field, FieldRef, Schema};
     use datafusion_expr_common::operator::Operator;
     use datafusion_functions::math::random;
-    use datafusion_physical_expr::expressions::{binary, lit};
     use datafusion_physical_expr::ScalarFunctionExpr;
+    use datafusion_physical_expr::expressions::{binary, lit};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_plan::displayable;
     use datafusion_physical_plan::empty::EmptyExec;
diff --git a/datafusion/physical-optimizer/src/pushdown_sort.rs b/datafusion/physical-optimizer/src/pushdown_sort.rs
new file mode 100644
index 0000000000000..1fa15492d2a92
--- /dev/null
+++ b/datafusion/physical-optimizer/src/pushdown_sort.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Sort Pushdown Optimization
+//!
+//! This optimizer attempts to push sort requirements down through the execution plan
+//! tree to data sources that can natively handle them (e.g., by scanning files in
+//! reverse order).
+//!
+//! ## How it works
+//!
+//! 1. Detects `SortExec` nodes in the plan
+//! 2. Calls `try_pushdown_sort()` on the input to recursively push the sort requirement
+//! 3. Each node type defines its own pushdown behavior:
+//!    - **Transparent nodes** (CoalesceBatchesExec, RepartitionExec, etc.) delegate to
+//!      their children and wrap the result
+//!    - **Data sources** (DataSourceExec) check if they can optimize for the ordering
+//!    - **Blocking nodes** return `Unsupported` to stop pushdown
+//! 4. Based on the result:
+//!    - `Exact`: Remove the Sort operator (data source guarantees perfect ordering)
+//!    - `Inexact`: Keep Sort but use optimized input (enables early termination for TopK)
+//!    - `Unsupported`: No change
+//!
+//! ## Current capabilities (Phase 1)
+//!
+//! - Reverse scan optimization: when required sort is the reverse of the data source's
+//!   natural ordering, enable reverse scanning (reading row groups in reverse order)
+//! - Supports prefix matching: if data has ordering [A DESC, B ASC] and query needs
+//!   [A ASC], reversing gives [A ASC, B DESC] which satisfies the requirement
+//!
+//! TODO Issue: <https://github.com/apache/datafusion/issues/19329>
+//! ## Future enhancements (Phase 2),
+//!
+//! - File reordering based on statistics
+//! - Return `Exact` when files are known to be perfectly sorted
+//! - Complete Sort elimination when ordering is guaranteed
+
+use crate::PhysicalOptimizerRule;
+use datafusion_common::Result;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::SortOrderPushdownResult;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use std::sync::Arc;
+
+/// A PhysicalOptimizerRule that attempts to push down sort requirements to data sources.
+///
+/// See module-level documentation for details.
+#[derive(Debug, Clone, Default)]
+pub struct PushdownSort;
+
+impl PushdownSort {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for PushdownSort {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // Check if sort pushdown optimization is enabled
+        if !config.optimizer.enable_sort_pushdown {
+            return Ok(plan);
+        }
+
+        // Use transform_down to find and optimize all SortExec nodes (including nested ones)
+        plan.transform_down(|plan: Arc<dyn ExecutionPlan>| {
+            // Check if this is a SortExec
+            let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() else {
+                return Ok(Transformed::no(plan));
+            };
+
+            let sort_input = Arc::clone(sort_exec.input());
+            let required_ordering = sort_exec.expr();
+
+            // Try to push the sort requirement down through the plan tree
+            // Each node type defines its own pushdown behavior via try_pushdown_sort()
+            match sort_input.try_pushdown_sort(required_ordering)? {
+                SortOrderPushdownResult::Exact { inner } => {
+                    // Data source guarantees perfect ordering - remove the Sort operator
+                    Ok(Transformed::yes(inner))
+                }
+                SortOrderPushdownResult::Inexact { inner } => {
+                    // Data source is optimized for the ordering but not perfectly sorted
+                    // Keep the Sort operator but use the optimized input
+                    // Benefits: TopK queries can terminate early, better cache locality
+                    Ok(Transformed::yes(Arc::new(
+                        SortExec::new(required_ordering.clone(), inner)
+                            .with_fetch(sort_exec.fetch())
+                            .with_preserve_partitioning(
+                                sort_exec.preserve_partitioning(),
+                            ),
+                    )))
+                }
+                SortOrderPushdownResult::Unsupported => {
+                    // Cannot optimize for this ordering - no change
+                    Ok(Transformed::no(plan))
+                }
+            }
+        })
+        .data()
+    }
+
+    fn name(&self) -> &str {
+        "PushdownSort"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs
index acc70d39f057b..bff33a281556d 100644
--- a/datafusion/physical-optimizer/src/sanity_checker.rs
+++ b/datafusion/physical-optimizer/src/sanity_checker.rs
@@ -32,7 +32,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
-use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
+use datafusion_physical_plan::{ExecutionPlanProperties, get_plan_string};
 
 use crate::PhysicalOptimizerRule;
 use datafusion_physical_expr_common::sort_expr::format_physical_sort_requirement_list;
@@ -47,7 +47,7 @@ use itertools::izip;
 pub struct SanityCheckPlan {}
 
 impl SanityCheckPlan {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
@@ -78,13 +78,14 @@ pub fn check_finiteness_requirements(
     input: Arc<dyn ExecutionPlan>,
     optimizer_options: &OptimizerOptions,
 ) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
-    if let Some(exec) = input.as_any().downcast_ref::<SymmetricHashJoinExec>() {
-        if !(optimizer_options.allow_symmetric_joins_without_pruning
+    if let Some(exec) = input.as_any().downcast_ref::<SymmetricHashJoinExec>()
+        && !(optimizer_options.allow_symmetric_joins_without_pruning
             || (exec.check_if_order_information_available()? && is_prunable(exec)))
-        {
-            return plan_err!("Join operation cannot operate on a non-prunable stream without enabling \
-                              the 'allow_symmetric_joins_without_pruning' configuration flag");
-        }
+    {
+        return plan_err!(
+            "Join operation cannot operate on a non-prunable stream without enabling \
+                              the 'allow_symmetric_joins_without_pruning' configuration flag"
+        );
     }
 
     if matches!(
@@ -152,7 +153,8 @@ pub fn check_plan_sanity(
 
         if !child
             .output_partitioning()
-            .satisfy(&dist_req, child_eq_props)
+            .satisfaction(&dist_req, child_eq_props, true)
+            .is_satisfied()
         {
             let plan_str = get_plan_string(&plan);
             return plan_err!(
diff --git a/datafusion/physical-optimizer/src/topk_aggregation.rs b/datafusion/physical-optimizer/src/topk_aggregation.rs
index b7505f0df4edb..7eb9e6a76211b 100644
--- a/datafusion/physical-optimizer/src/topk_aggregation.rs
+++ b/datafusion/physical-optimizer/src/topk_aggregation.rs
@@ -21,15 +21,15 @@ use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
 use arrow::datatypes::DataType;
+use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::execution_plan::CardinalityEffect;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::sorts::sort::SortExec;
-use datafusion_physical_plan::ExecutionPlan;
 use itertools::Itertools;
 
 /// An optimizer rule that passes a `limit` hint to aggregations if the whole result is not needed
diff --git a/datafusion/physical-optimizer/src/update_aggr_exprs.rs b/datafusion/physical-optimizer/src/update_aggr_exprs.rs
index 61bc715592af6..c0aab4080da77 100644
--- a/datafusion/physical-optimizer/src/update_aggr_exprs.rs
+++ b/datafusion/physical-optimizer/src/update_aggr_exprs.rs
@@ -22,10 +22,10 @@ use std::sync::Arc;
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{plan_datafusion_err, Result};
+use datafusion_common::{Result, plan_datafusion_err};
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement};
-use datafusion_physical_plan::aggregates::{concat_slices, AggregateExec};
+use datafusion_physical_plan::aggregates::{AggregateExec, concat_slices};
 use datafusion_physical_plan::windows::get_ordered_partition_by_indices;
 use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
@@ -49,7 +49,7 @@ use crate::PhysicalOptimizerRule;
 pub struct OptimizeAggregateOrder {}
 
 impl OptimizeAggregateOrder {
-    #[allow(missing_docs)]
+    #[expect(missing_docs)]
     pub fn new() -> Self {
         Self::default()
     }
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 5858deb83c83c..68e67fa018f08 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -39,6 +39,7 @@ workspace = true
 
 [features]
 force_hash_collisions = []
+test_utils = ["arrow/test_utils"]
 tokio_coop = []
 tokio_coop_fallback = []
 
@@ -51,11 +52,11 @@ arrow = { workspace = true }
 arrow-ord = { workspace = true }
 arrow-schema = { workspace = true }
 async-trait = { workspace = true }
-chrono = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-common-runtime = { workspace = true, default-features = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-functions = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-functions-window-common = { workspace = true }
 datafusion-physical-expr = { workspace = true, default-features = true }
@@ -99,3 +100,4 @@ name = "sort_preserving_merge"
 [[bench]]
 harness = false
 name = "aggregate_vectorized"
+required-features = ["test_utils"]
diff --git a/datafusion/physical-plan/benches/aggregate_vectorized.rs b/datafusion/physical-plan/benches/aggregate_vectorized.rs
index 3c1899406c985..a93088a4ebe72 100644
--- a/datafusion/physical-plan/benches/aggregate_vectorized.rs
+++ b/datafusion/physical-plan/benches/aggregate_vectorized.rs
@@ -25,11 +25,11 @@ use arrow::util::test_util::seedable_rng;
 use arrow_schema::DataType;
 use criterion::measurement::WallTime;
 use criterion::{
-    criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
+    BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main,
 };
+use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
 use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder;
 use datafusion_physical_plan::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder;
-use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
 use rand::distr::{Bernoulli, Distribution};
 use std::hint::black_box;
 use std::sync::Arc;
@@ -271,6 +271,7 @@ fn bench_single_primitive<const NULLABLE: bool>(
 }
 
 /// Test `vectorized_equal_to` with different number of true in the initial results
+#[expect(clippy::needless_pass_by_value)]
 fn vectorized_equal_to<GroupColumnBuilder: GroupColumn>(
     group: &mut BenchmarkGroup<WallTime>,
     mut builder: GroupColumnBuilder,
diff --git a/datafusion/physical-plan/benches/partial_ordering.rs b/datafusion/physical-plan/benches/partial_ordering.rs
index e1a9d0b583e98..bdadd6274b75e 100644
--- a/datafusion/physical-plan/benches/partial_ordering.rs
+++ b/datafusion/physical-plan/benches/partial_ordering.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 use arrow::array::{ArrayRef, Int32Array};
 use datafusion_physical_plan::aggregates::order::GroupOrderingPartial;
 
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 const BATCH_SIZE: usize = 8192;
 
diff --git a/datafusion/physical-plan/benches/sort_preserving_merge.rs b/datafusion/physical-plan/benches/sort_preserving_merge.rs
index f223fd806b694..76ebf230a30e0 100644
--- a/datafusion/physical-plan/benches/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/benches/sort_preserving_merge.rs
@@ -20,9 +20,9 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use arrow_schema::{SchemaRef, SortOptions};
-use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use criterion::{BatchSize, Criterion, criterion_group, criterion_main};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{expressions::col, LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col};
 use datafusion_physical_plan::test::TestMemoryExec;
 use datafusion_physical_plan::{
     collect, sorts::sort_preserving_merge::SortPreservingMergeExec,
diff --git a/datafusion/physical-plan/benches/spill_io.rs b/datafusion/physical-plan/benches/spill_io.rs
index 40c8f7634c8c4..fac2547a131b4 100644
--- a/datafusion/physical-plan/benches/spill_io.rs
+++ b/datafusion/physical-plan/benches/spill_io.rs
@@ -22,15 +22,15 @@ use arrow::array::{
 use arrow::datatypes::{DataType, Field, Schema};
 use criterion::measurement::WallTime;
 use criterion::{
-    criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
+    BatchSize, BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main,
 };
 use datafusion_common::config::SpillCompression;
+use datafusion_common::human_readable_size;
 use datafusion_common::instant::Instant;
-use datafusion_execution::memory_pool::human_readable_size;
 use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_physical_plan::SpillManager;
 use datafusion_physical_plan::common::collect;
 use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, SpillMetrics};
-use datafusion_physical_plan::SpillManager;
 use rand::{Rng, SeedableRng};
 use std::sync::Arc;
 use tokio::runtime::Runtime;
@@ -490,6 +490,7 @@ fn bench_spill_compression(c: &mut Criterion) {
     group.finish();
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn benchmark_spill_batches_for_all_codec(
     group: &mut BenchmarkGroup<'_, WallTime>,
     batch_label: &str,
diff --git a/datafusion/physical-plan/src/aggregates/group_values/metrics.rs b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs
index c4e29ea71060b..b6c32204e85f0 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/metrics.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs
@@ -53,7 +53,7 @@ mod tests {
     use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
     use crate::metrics::MetricsSet;
     use crate::test::TestMemoryExec;
-    use crate::{collect, ExecutionPlan};
+    use crate::{ExecutionPlan, collect};
     use arrow::array::{Float64Array, UInt32Array};
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
index 4bd7f03506a15..2f3b1a19e7d73 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
@@ -22,7 +22,7 @@ use arrow::array::types::{
     Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
     TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
 };
-use arrow::array::{downcast_primitive, ArrayRef, RecordBatch};
+use arrow::array::{ArrayRef, downcast_primitive};
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use datafusion_common::Result;
 
@@ -112,7 +112,7 @@ pub trait GroupValues: Send {
     fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
 
     /// Clear the contents and shrink the capacity to the size of the batch (free up memory usage)
-    fn clear_shrink(&mut self, batch: &RecordBatch);
+    fn clear_shrink(&mut self, num_rows: usize);
 }
 
 /// Return a specialized implementation of [`GroupValues`] for the given schema.
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
index 03e26446f5751..91a39f28f33c1 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
@@ -18,7 +18,7 @@
 use std::sync::Arc;
 
 use crate::aggregates::group_values::multi_group_by::Nulls;
-use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
+use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to};
 use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
 use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder};
 use datafusion_common::Result;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
index d52721c2ee6c3..cd173741b6464 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
@@ -16,18 +16,18 @@
 // under the License.
 
 use crate::aggregates::group_values::multi_group_by::{
-    nulls_equal_to, GroupColumn, Nulls,
+    GroupColumn, Nulls, nulls_equal_to,
 };
 use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
 use arrow::array::{
-    types::GenericStringType, Array, ArrayRef, AsArray, BufferBuilder,
-    GenericBinaryArray, GenericByteArray, GenericStringArray, OffsetSizeTrait,
+    Array, ArrayRef, AsArray, BufferBuilder, GenericBinaryArray, GenericByteArray,
+    GenericStringArray, OffsetSizeTrait, types::GenericStringType,
 };
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType};
 use datafusion_common::utils::proxy::VecAllocExt;
-use datafusion_common::{exec_datafusion_err, Result};
-use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAPACITY};
+use datafusion_common::{Result, exec_datafusion_err};
+use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType};
 use itertools::izip;
 use std::mem::size_of;
 use std::sync::Arc;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
index fde477c2cf7b5..a91dd3115d879 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use crate::aggregates::group_values::multi_group_by::{
-    nulls_equal_to, GroupColumn, Nulls,
+    GroupColumn, Nulls, nulls_equal_to,
 };
 use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
-use arrow::array::{make_view, Array, ArrayRef, AsArray, ByteView, GenericByteViewArray};
+use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view};
 use arrow::buffer::{Buffer, ScalarBuffer};
 use arrow::datatypes::ByteViewType;
 use datafusion_common::Result;
@@ -99,7 +99,8 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
 
     fn equal_to_inner(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
         let array = array.as_byte_view::<B>();
-        self.do_equal_to_inner(lhs_row, array, rhs_row)
+        // since this is a single row comparison, don't bother specializing for nulls/buffers
+        self.do_equal_to_inner::<true, true>(lhs_row, array, rhs_row)
     }
 
     fn append_val_inner(&mut self, array: &ArrayRef, row: usize) {
@@ -117,15 +118,16 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         self.do_append_val_inner(arr, row);
     }
 
-    fn vectorized_equal_to_inner(
+    // Don't inline to keep the code small and give LLVM the best chance of
+    // vectorizing the inner loop
+    #[inline(never)]
+    fn vectorized_equal_to_inner<const HAS_NULLS: bool, const HAS_BUFFERS: bool>(
         &self,
         lhs_rows: &[usize],
-        array: &ArrayRef,
+        array: &GenericByteViewArray<B>,
         rhs_rows: &[usize],
         equal_to_results: &mut [bool],
     ) {
-        let array = array.as_byte_view::<B>();
-
         let iter = izip!(
             lhs_rows.iter(),
             rhs_rows.iter(),
@@ -138,7 +140,8 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
                 continue;
             }
 
-            *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row);
+            *equal_to_result =
+                self.do_equal_to_inner::<HAS_NULLS, HAS_BUFFERS>(lhs_row, array, rhs_row);
         }
     }
 
@@ -216,26 +219,42 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         }
     }
 
-    fn do_equal_to_inner(
+    /// Compare the value at `lhs_row` in this builder with
+    /// the value at `rhs_row` in input `array`
+    ///
+    /// Templated so that the inner compare loop can be
+    /// specialized based on the input array
+    #[inline(always)]
+    fn do_equal_to_inner<const HAS_NULLS: bool, const HAS_BUFFERS: bool>(
         &self,
         lhs_row: usize,
         array: &GenericByteViewArray<B>,
         rhs_row: usize,
     ) -> bool {
         // Check if nulls equal firstly
-        let exist_null = self.nulls.is_null(lhs_row);
-        let input_null = array.is_null(rhs_row);
-        if let Some(result) = nulls_equal_to(exist_null, input_null) {
-            return result;
+        if HAS_NULLS {
+            let exist_null = self.nulls.is_null(lhs_row);
+            let input_null = array.is_null(rhs_row);
+            if let Some(result) = nulls_equal_to(exist_null, input_null) {
+                return result;
+            }
         }
 
         // Otherwise, we need to check their values
-        let exist_view = self.views[lhs_row];
+
+        // SAFETY: the `lhs_row` and rhs_row` are valid
+        let exist_view = unsafe { *self.views.get_unchecked(lhs_row) };
         let exist_view_len = exist_view as u32;
 
-        let input_view = array.views()[rhs_row];
+        let input_view = unsafe { *array.views().get_unchecked(rhs_row) };
         let input_view_len = input_view as u32;
 
+        // fast path, if we know there are no buffers, then the view must be inlined
+        // so we can simply compare the u128 views
+        if !HAS_BUFFERS {
+            return exist_view == input_view;
+        }
+
         // The check logic
         //   - Check len equality
         //   - If inlined, check inlined value
@@ -246,19 +265,8 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         }
 
         if exist_view_len <= 12 {
-            let exist_inline = unsafe {
-                GenericByteViewArray::<B>::inline_value(
-                    &exist_view,
-                    exist_view_len as usize,
-                )
-            };
-            let input_inline = unsafe {
-                GenericByteViewArray::<B>::inline_value(
-                    &input_view,
-                    input_view_len as usize,
-                )
-            };
-            exist_inline == input_inline
+            // both inlined, so compare inlined value
+            exist_view == input_view
         } else {
             let exist_prefix =
                 unsafe { GenericByteViewArray::<B>::inline_value(&exist_view, 4) };
@@ -269,30 +277,28 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
                 return false;
             }
 
+            // get the full values and compare
             let exist_full = {
                 let byte_view = ByteView::from(exist_view);
-                self.value(
-                    byte_view.buffer_index as usize,
-                    byte_view.offset as usize,
-                    byte_view.length as usize,
-                )
+                let buffer_index = byte_view.buffer_index as usize;
+                let offset = byte_view.offset as usize;
+                let length = byte_view.length as usize;
+                debug_assert!(buffer_index <= self.completed.len());
+
+                unsafe {
+                    if buffer_index < self.completed.len() {
+                        let block = self.completed.get_unchecked(buffer_index);
+                        block.as_slice().get_unchecked(offset..offset + length)
+                    } else {
+                        self.in_progress.get_unchecked(offset..offset + length)
+                    }
+                }
             };
             let input_full: &[u8] = unsafe { array.value_unchecked(rhs_row).as_ref() };
             exist_full == input_full
         }
     }
 
-    fn value(&self, buffer_index: usize, offset: usize, length: usize) -> &[u8] {
-        debug_assert!(buffer_index <= self.completed.len());
-
-        if buffer_index < self.completed.len() {
-            let block = &self.completed[buffer_index];
-            &block[offset..offset + length]
-        } else {
-            &self.in_progress[offset..offset + length]
-        }
-    }
-
     fn build_inner(self) -> ArrayRef {
         let Self {
             views,
@@ -451,21 +457,23 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         last_take_len: usize,
     ) -> Vec<Buffer> {
         let mut take_buffers = Vec::with_capacity(last_remaining_buffer_index + 1);
+        debug_assert!(last_remaining_buffer_index <= self.completed.len());
 
-        // Take `0 ~ last_remaining_buffer_index - 1` buffers
-        if !self.completed.is_empty() || last_remaining_buffer_index == 0 {
-            take_buffers.extend(self.completed.drain(0..last_remaining_buffer_index));
-        }
-
-        // Process the `last_remaining_buffer_index` buffers
+        // Process the `last_remaining_buffer_index` buffer before draining so the index is valid.
         let last_buffer = if last_remaining_buffer_index < self.completed.len() {
             // If it is in `completed`, simply clone
             self.completed[last_remaining_buffer_index].clone()
         } else {
             // If it is `in_progress`, copied `0 ~ offset` part
+            debug_assert!(last_take_len <= self.in_progress.len());
             let taken_last_buffer = self.in_progress[0..last_take_len].to_vec();
             Buffer::from_vec(taken_last_buffer)
         };
+
+        // Take `0 ~ last_remaining_buffer_index - 1` buffers
+        if last_remaining_buffer_index > 0 {
+            take_buffers.extend(self.completed.drain(0..last_remaining_buffer_index));
+        }
         take_buffers.push(last_buffer);
 
         take_buffers
@@ -507,7 +515,36 @@ impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> {
         rows: &[usize],
         equal_to_results: &mut [bool],
     ) {
-        self.vectorized_equal_to_inner(group_indices, array, rows, equal_to_results);
+        let has_nulls = array.null_count() != 0;
+        let array = array.as_byte_view::<B>();
+        let has_buffers = !array.data_buffers().is_empty();
+        // call specialized version based on nulls and buffers presence
+        match (has_nulls, has_buffers) {
+            (true, true) => self.vectorized_equal_to_inner::<true, true>(
+                group_indices,
+                array,
+                rows,
+                equal_to_results,
+            ),
+            (true, false) => self.vectorized_equal_to_inner::<true, false>(
+                group_indices,
+                array,
+                rows,
+                equal_to_results,
+            ),
+            (false, true) => self.vectorized_equal_to_inner::<false, true>(
+                group_indices,
+                array,
+                rows,
+                equal_to_results,
+            ),
+            (false, false) => self.vectorized_equal_to_inner::<false, false>(
+                group_indices,
+                array,
+                rows,
+                equal_to_results,
+            ),
+        }
     }
 
     fn vectorized_append(&mut self, array: &ArrayRef, rows: &[usize]) -> Result<()> {
@@ -913,4 +950,28 @@ mod tests {
         let taken_array = builder.take_n(final_ones_to_append);
         assert_eq!(&taken_array, &input_array);
     }
+
+    #[test]
+    fn test_byte_view_take_n_partial_completed_nonzero_index() {
+        let mut builder =
+            ByteViewGroupValueBuilder::<StringViewType>::new().with_max_block_size(30);
+        let input_array = StringViewArray::from(vec![
+            Some("aaaaaaaaaaaaaa"),
+            Some("bbbbbbbbbbbbbb"),
+            Some("cccccccccccccc"),
+            Some("dddddddddddddd"),
+            Some("eeeeeeeeeeeeee"),
+        ]);
+        let input_array: ArrayRef = Arc::new(input_array);
+
+        for row in 0..input_array.len() {
+            builder.append_val(&input_array, row).unwrap();
+        }
+
+        assert_eq!(builder.completed.len(), 2);
+        assert_eq!(builder.in_progress.len(), 14);
+
+        let taken_array = builder.take_n(3);
+        assert_eq!(&taken_array, &input_array.slice(0, 3));
+    }
 }
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
index 9adf028eca7f6..479bff001e3c8 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
@@ -24,24 +24,24 @@ pub mod primitive;
 
 use std::mem::{self, size_of};
 
+use crate::aggregates::group_values::GroupValues;
 use crate::aggregates::group_values::multi_group_by::{
     boolean::BooleanGroupValueBuilder, bytes::ByteGroupValueBuilder,
     bytes_view::ByteViewGroupValueBuilder, primitive::PrimitiveGroupValueBuilder,
 };
-use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
-use arrow::array::{Array, ArrayRef, RecordBatch};
+use arrow::array::{Array, ArrayRef};
 use arrow::compute::cast;
 use arrow::datatypes::{
     BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type,
-    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef,
+    Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Schema, SchemaRef,
     StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
     Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type,
 };
 use datafusion_common::hash_utils::create_hashes;
-use datafusion_common::{internal_datafusion_err, not_impl_err, Result};
+use datafusion_common::{Result, internal_datafusion_err, not_impl_err};
 use datafusion_execution::memory_pool::proxy::{HashTableAllocExt, VecAllocExt};
 use datafusion_expr::EmitTo;
 use datafusion_physical_expr::binary_map::OutputType;
@@ -540,14 +540,13 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
                 // into `vectorized_equal_to_row_indices` and `vectorized_equal_to_group_indices`.
                 let list_offset = group_index_view.value() as usize;
                 let group_index_list = &self.group_index_lists[list_offset];
-                for &group_index in group_index_list {
-                    self.vectorized_operation_buffers
-                        .equal_to_row_indices
-                        .push(row);
-                    self.vectorized_operation_buffers
-                        .equal_to_group_indices
-                        .push(group_index);
-                }
+
+                self.vectorized_operation_buffers
+                    .equal_to_group_indices
+                    .extend_from_slice(group_index_list);
+                self.vectorized_operation_buffers
+                    .equal_to_row_indices
+                    .extend(std::iter::repeat_n(row, group_index_list.len()));
             } else {
                 let group_index = group_index_view.value() as usize;
                 self.vectorized_operation_buffers
@@ -1048,7 +1047,7 @@ impl<const STREAMING: bool> GroupValues for GroupValuesColumn<STREAMING> {
                         }
                     }
                     dt => {
-                        return not_impl_err!("{dt} not supported in GroupValuesColumn")
+                        return not_impl_err!("{dt} not supported in GroupValuesColumn");
                     }
                 }
             }
@@ -1181,14 +1180,13 @@ impl<const STREAMING: bool> GroupValues for GroupValuesColumn<STREAMING> {
         Ok(output)
     }
 
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        let count = batch.num_rows();
+    fn clear_shrink(&mut self, num_rows: usize) {
         self.group_values.clear();
         self.map.clear();
-        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
+        self.map.shrink_to(num_rows, |_| 0); // hasher does not matter since the map is cleared
         self.map_size = self.map.capacity() * size_of::<(u64, usize)>();
         self.hashes_buffer.clear();
-        self.hashes_buffer.shrink_to(count);
+        self.hashes_buffer.shrink_to(num_rows);
 
         // Such structures are only used in `non-streaming` case
         if !STREAMING {
@@ -1261,7 +1259,7 @@ mod tests {
     use datafusion_expr::EmitTo;
 
     use crate::aggregates::group_values::{
-        multi_group_by::GroupValuesColumn, GroupValues,
+        GroupValues, multi_group_by::GroupValuesColumn,
     };
 
     use super::GroupIndexView;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
index a586197e50341..31126348b3fd4 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
@@ -16,11 +16,11 @@
 // under the License.
 
 use crate::aggregates::group_values::multi_group_by::{
-    nulls_equal_to, GroupColumn, Nulls,
+    GroupColumn, Nulls, nulls_equal_to,
 };
 use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
 use arrow::array::ArrowNativeTypeOp;
-use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
+use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, cast::AsArray};
 use arrow::buffer::ScalarBuffer;
 use arrow::datatypes::DataType;
 use datafusion_common::Result;
@@ -56,6 +56,85 @@ where
             nulls: MaybeNullBufferBuilder::new(),
         }
     }
+
+    fn vectorized_equal_to_non_nullable(
+        &self,
+        lhs_rows: &[usize],
+        array: &ArrayRef,
+        rhs_rows: &[usize],
+        equal_to_results: &mut [bool],
+    ) {
+        assert!(
+            !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()),
+            "called with nullable input"
+        );
+        let array_values = array.as_primitive::<T>().values();
+
+        let iter = izip!(
+            lhs_rows.iter(),
+            rhs_rows.iter(),
+            equal_to_results.iter_mut(),
+        );
+
+        for (&lhs_row, &rhs_row, equal_to_result) in iter {
+            let result = {
+                // Getting unchecked not only for bound checks but because the bound checks are
+                // what prevents auto-vectorization
+                let left = if cfg!(debug_assertions) {
+                    self.group_values[lhs_row]
+                } else {
+                    // SAFETY: indices are guaranteed to be in bounds
+                    unsafe { *self.group_values.get_unchecked(lhs_row) }
+                };
+                let right = if cfg!(debug_assertions) {
+                    array_values[rhs_row]
+                } else {
+                    // SAFETY: indices are guaranteed to be in bounds
+                    unsafe { *array_values.get_unchecked(rhs_row) }
+                };
+
+                // Always evaluate, to allow for auto-vectorization
+                left.is_eq(right)
+            };
+
+            *equal_to_result = result && *equal_to_result;
+        }
+    }
+
+    pub fn vectorized_equal_nullable(
+        &self,
+        lhs_rows: &[usize],
+        array: &ArrayRef,
+        rhs_rows: &[usize],
+        equal_to_results: &mut [bool],
+    ) {
+        assert!(NULLABLE, "called with non-nullable input");
+        let array = array.as_primitive::<T>();
+
+        let iter = izip!(
+            lhs_rows.iter(),
+            rhs_rows.iter(),
+            equal_to_results.iter_mut(),
+        );
+
+        for (&lhs_row, &rhs_row, equal_to_result) in iter {
+            // Has found not equal to in previous column, don't need to check
+            if !*equal_to_result {
+                continue;
+            }
+
+            // Perf: skip null check (by short circuit) if input is not nullable
+            let exist_null = self.nulls.is_null(lhs_row);
+            let input_null = array.is_null(rhs_row);
+            if let Some(result) = nulls_equal_to(exist_null, input_null) {
+                *equal_to_result = result;
+                continue;
+            }
+
+            // Otherwise, we need to check their values
+            *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row));
+        }
+    }
 }
 
 impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
@@ -99,32 +178,15 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
         rhs_rows: &[usize],
         equal_to_results: &mut [bool],
     ) {
-        let array = array.as_primitive::<T>();
-
-        let iter = izip!(
-            lhs_rows.iter(),
-            rhs_rows.iter(),
-            equal_to_results.iter_mut(),
-        );
-
-        for (&lhs_row, &rhs_row, equal_to_result) in iter {
-            // Has found not equal to in previous column, don't need to check
-            if !*equal_to_result {
-                continue;
-            }
-
-            // Perf: skip null check (by short circuit) if input is not nullable
-            if NULLABLE {
-                let exist_null = self.nulls.is_null(lhs_row);
-                let input_null = array.is_null(rhs_row);
-                if let Some(result) = nulls_equal_to(exist_null, input_null) {
-                    *equal_to_result = result;
-                    continue;
-                }
-                // Otherwise, we need to check their values
-            }
-
-            *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row));
+        if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) {
+            self.vectorized_equal_to_non_nullable(
+                lhs_rows,
+                array,
+                rhs_rows,
+                equal_to_results,
+            );
+        } else {
+            self.vectorized_equal_nullable(lhs_rows, array, rhs_rows, equal_to_results);
         }
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs b/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs
index 23ffc69f218bf..6a84d685b6c79 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs
@@ -89,4 +89,12 @@ impl MaybeNullBufferBuilder {
         new_builder.truncate(n);
         new_builder.finish()
     }
+
+    /// Returns true if this builder might have any nulls
+    ///
+    /// This is guaranteed to be true if there are nulls
+    /// but may be true even if there are no nulls
+    pub(crate) fn might_have_nulls(&self) -> bool {
+        self.nulls.as_slice().is_some()
+    }
 }
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 34893fcc4ed98..dd794c957350d 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -17,12 +17,12 @@
 
 use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
-use arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray};
+use arrow::array::{Array, ArrayRef, ListArray, StructArray};
 use arrow::compute::cast;
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::row::{RowConverter, Rows, SortField};
-use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::Result;
+use datafusion_common::hash_utils::create_hashes;
 use datafusion_execution::memory_pool::proxy::{HashTableAllocExt, VecAllocExt};
 use datafusion_expr::EmitTo;
 use hashbrown::hash_table::HashTable;
@@ -236,30 +236,28 @@ impl GroupValues for GroupValuesRows {
         // https://github.com/apache/datafusion/issues/7647
         for (field, array) in self.schema.fields.iter().zip(&mut output) {
             let expected = field.data_type();
-            *array =
-                dictionary_encode_if_necessary(Arc::<dyn Array>::clone(array), expected)?;
+            *array = dictionary_encode_if_necessary(array, expected)?;
         }
 
         self.group_values = Some(group_values);
         Ok(output)
     }
 
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        let count = batch.num_rows();
+    fn clear_shrink(&mut self, num_rows: usize) {
         self.group_values = self.group_values.take().map(|mut rows| {
             rows.clear();
             rows
         });
         self.map.clear();
-        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
+        self.map.shrink_to(num_rows, |_| 0); // hasher does not matter since the map is cleared
         self.map_size = self.map.capacity() * size_of::<(u64, usize)>();
         self.hashes_buffer.clear();
-        self.hashes_buffer.shrink_to(count);
+        self.hashes_buffer.shrink_to(num_rows);
     }
 }
 
 fn dictionary_encode_if_necessary(
-    array: ArrayRef,
+    array: &ArrayRef,
     expected: &DataType,
 ) -> Result<ArrayRef> {
     match (expected, array.data_type()) {
@@ -269,10 +267,7 @@ fn dictionary_encode_if_necessary(
                 .iter()
                 .zip(struct_array.columns())
                 .map(|(expected_field, column)| {
-                    dictionary_encode_if_necessary(
-                        Arc::<dyn Array>::clone(column),
-                        expected_field.data_type(),
-                    )
+                    dictionary_encode_if_necessary(column, expected_field.data_type())
                 })
                 .collect::<Result<Vec<_>>>()?;
 
@@ -289,13 +284,13 @@ fn dictionary_encode_if_necessary(
                 Arc::<arrow::datatypes::Field>::clone(expected_field),
                 list.offsets().clone(),
                 dictionary_encode_if_necessary(
-                    Arc::<dyn Array>::clone(list.values()),
+                    list.values(),
                     expected_field.data_type(),
                 )?,
                 list.nulls().cloned(),
             )?))
         }
         (DataType::Dictionary(_, _), _) => Ok(cast(array.as_ref(), expected)?),
-        (_, _) => Ok(Arc::<dyn Array>::clone(&array)),
+        (_, _) => Ok(Arc::<dyn Array>::clone(array)),
     }
 }
diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/boolean.rs
index 44b763a91f523..e993c0c53d199 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/boolean.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/boolean.rs
@@ -19,7 +19,6 @@ use crate::aggregates::group_values::GroupValues;
 
 use arrow::array::{
     ArrayRef, AsArray as _, BooleanArray, BooleanBufferBuilder, NullBufferBuilder,
-    RecordBatch,
 };
 use datafusion_common::Result;
 use datafusion_expr::EmitTo;
@@ -146,7 +145,7 @@ impl GroupValues for GroupValuesBoolean {
         Ok(vec![Arc::new(BooleanArray::new(values, nulls)) as _])
     }
 
-    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+    fn clear_shrink(&mut self, _num_rows: usize) {
         self.false_group = None;
         self.true_group = None;
         self.null_group = None;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs
index b901aee313fb7..b881a51b25474 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs
@@ -19,7 +19,7 @@ use std::mem::size_of;
 
 use crate::aggregates::group_values::GroupValues;
 
-use arrow::array::{Array, ArrayRef, OffsetSizeTrait, RecordBatch};
+use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
 use datafusion_common::Result;
 use datafusion_expr::EmitTo;
 use datafusion_physical_expr_common::binary_map::{ArrowBytesMap, OutputType};
@@ -120,7 +120,7 @@ impl<O: OffsetSizeTrait> GroupValues for GroupValuesBytes<O> {
         Ok(vec![group_values])
     }
 
-    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+    fn clear_shrink(&mut self, _num_rows: usize) {
         // in theory we could potentially avoid this reallocation and clear the
         // contents of the maps, but for now we just reset the map from the beginning
         self.map.take();
diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs
index be9a0334e3ee6..7a56f7c52c11a 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::aggregates::group_values::GroupValues;
-use arrow::array::{Array, ArrayRef, RecordBatch};
+use arrow::array::{Array, ArrayRef};
 use datafusion_expr::EmitTo;
 use datafusion_physical_expr::binary_map::OutputType;
 use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewMap;
@@ -122,7 +122,7 @@ impl GroupValues for GroupValuesBytesView {
         Ok(vec![group_values])
     }
 
-    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+    fn clear_shrink(&mut self, _num_rows: usize) {
         // in theory we could potentially avoid this reallocation and clear the
         // contents of the maps, but for now we just reset the map from the beginning
         self.map.take();
diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
index f35c580b0e632..c46cde8786eb4 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
@@ -19,11 +19,10 @@ use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
 use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
 use arrow::array::{
-    cast::AsArray, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder,
-    PrimitiveArray,
+    ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder, PrimitiveArray,
+    cast::AsArray,
 };
-use arrow::datatypes::{i256, DataType};
-use arrow::record_batch::RecordBatch;
+use arrow::datatypes::{DataType, i256};
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_expr::EmitTo;
@@ -213,11 +212,10 @@ where
         Ok(vec![Arc::new(array.with_data_type(self.data_type.clone()))])
     }
 
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        let count = batch.num_rows();
+    fn clear_shrink(&mut self, num_rows: usize) {
         self.values.clear();
-        self.values.shrink_to(count);
+        self.values.shrink_to(num_rows);
         self.map.clear();
-        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
+        self.map.shrink_to(num_rows, |_| 0); // hasher does not matter since the map is cleared
     }
 }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 30d1441f5773e..06f12a90195d2 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -27,38 +27,42 @@ use crate::aggregates::{
 };
 use crate::execution_plan::{CardinalityEffect, EmissionType};
 use crate::filter_pushdown::{
-    ChildFilterDescription, FilterDescription, FilterPushdownPhase, PushedDownPredicate,
+    ChildFilterDescription, ChildPushdownResult, FilterDescription, FilterPushdownPhase,
+    FilterPushdownPropagation, PushedDownPredicate,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use crate::windows::get_ordered_partition_by_indices;
 use crate::{
     DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode,
     SendableRecordBatchStream, Statistics,
 };
 use datafusion_common::config::ConfigOptions;
 use datafusion_physical_expr::utils::collect_columns;
+use parking_lot::Mutex;
 use std::collections::HashSet;
 
-use arrow::array::{ArrayRef, UInt16Array, UInt32Array, UInt64Array, UInt8Array};
+use arrow::array::{ArrayRef, UInt8Array, UInt16Array, UInt32Array, UInt64Array};
 use arrow::datatypes::{Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::FieldRef;
 use datafusion_common::stats::Precision;
-use datafusion_common::{internal_err, not_impl_err, Constraint, Constraints, Result};
+use datafusion_common::{
+    Constraint, Constraints, Result, ScalarValue, assert_eq_or_internal_err, not_impl_err,
+};
 use datafusion_execution::TaskContext;
 use datafusion_expr::{Accumulator, Aggregate};
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
-use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::expressions::{Column, DynamicFilterPhysicalExpr, lit};
 use datafusion_physical_expr::{
-    physical_exprs_contains, ConstExpr, EquivalenceProperties,
+    ConstExpr, EquivalenceProperties, physical_exprs_contains,
 };
-use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExpr};
+use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, fmt_sql};
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortRequirement,
 };
 
 use datafusion_expr::utils::AggregateOrderSensitivity;
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use itertools::Itertools;
 
 pub mod group_values;
@@ -172,6 +176,9 @@ pub struct PhysicalGroupBy {
     /// expression in null_expr. If `groups[i][j]` is true, then the
     /// j-th expression in the i-th group is NULL, otherwise it is `expr[j]`.
     groups: Vec<Vec<bool>>,
+    /// True when GROUPING SETS/CUBE/ROLLUP are used so `__grouping_id` should
+    /// be included in the output schema.
+    has_grouping_set: bool,
 }
 
 impl PhysicalGroupBy {
@@ -180,11 +187,13 @@ impl PhysicalGroupBy {
         expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
         null_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
         groups: Vec<Vec<bool>>,
+        has_grouping_set: bool,
     ) -> Self {
         Self {
             expr,
             null_expr,
             groups,
+            has_grouping_set,
         }
     }
 
@@ -196,6 +205,7 @@ impl PhysicalGroupBy {
             expr,
             null_expr: vec![],
             groups: vec![vec![false; num_exprs]],
+            has_grouping_set: false,
         }
     }
 
@@ -212,6 +222,11 @@ impl PhysicalGroupBy {
         exprs_nullable
     }
 
+    /// Returns true if this has no grouping at all (including no GROUPING SETS)
+    pub fn is_true_no_grouping(&self) -> bool {
+        self.is_empty() && !self.has_grouping_set
+    }
+
     /// Returns the group expressions
     pub fn expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
         &self.expr
@@ -227,14 +242,20 @@ impl PhysicalGroupBy {
         &self.groups
     }
 
+    /// Returns true if this grouping uses GROUPING SETS, CUBE or ROLLUP.
+    pub fn has_grouping_set(&self) -> bool {
+        self.has_grouping_set
+    }
+
     /// Returns true if this `PhysicalGroupBy` has no group expressions
     pub fn is_empty(&self) -> bool {
         self.expr.is_empty()
     }
 
-    /// Check whether grouping set is single group
+    /// Returns true if this is a "simple" GROUP BY (not using GROUPING SETS/CUBE/ROLLUP).
+    /// This determines whether the `__grouping_id` column is included in the output schema.
     pub fn is_single(&self) -> bool {
-        self.null_expr.is_empty()
+        !self.has_grouping_set
     }
 
     /// Calculate GROUP BY expressions according to input schema.
@@ -248,7 +269,7 @@ impl PhysicalGroupBy {
     /// The number of expressions in the output schema.
     fn num_output_exprs(&self) -> usize {
         let mut num_exprs = self.expr.len();
-        if !self.is_single() {
+        if self.has_grouping_set {
             num_exprs += 1
         }
         num_exprs
@@ -265,7 +286,7 @@ impl PhysicalGroupBy {
                 .take(num_output_exprs)
                 .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _),
         );
-        if !self.is_single() {
+        if self.has_grouping_set {
             output_exprs.push(Arc::new(Column::new(
                 Aggregate::INTERNAL_GROUPING_ID,
                 self.expr.len(),
@@ -276,11 +297,7 @@ impl PhysicalGroupBy {
 
     /// Returns the number expression as grouping keys.
     pub fn num_group_exprs(&self) -> usize {
-        if self.is_single() {
-            self.expr.len()
-        } else {
-            self.expr.len() + 1
-        }
+        self.expr.len() + usize::from(self.has_grouping_set)
     }
 
     pub fn group_schema(&self, schema: &Schema) -> Result<SchemaRef> {
@@ -303,7 +320,7 @@ impl PhysicalGroupBy {
                 .into(),
             );
         }
-        if !self.is_single() {
+        if self.has_grouping_set {
             fields.push(
                 Field::new(
                     Aggregate::INTERNAL_GROUPING_ID,
@@ -339,17 +356,17 @@ impl PhysicalGroupBy {
                 )
                 .collect();
         let num_exprs = expr.len();
-        let groups = if self.expr.is_empty() {
+        let groups = if self.expr.is_empty() && !self.has_grouping_set {
             // No GROUP BY expressions - should have no groups
             vec![]
         } else {
-            // Has GROUP BY expressions - create a single group
             vec![vec![false; num_exprs]]
         };
         Self {
             expr,
             null_expr: vec![],
             groups,
+            has_grouping_set: false,
         }
     }
 }
@@ -369,10 +386,11 @@ impl PartialEq for PhysicalGroupBy {
                 .zip(other.null_expr.iter())
                 .all(|((expr1, name1), (expr2, name2))| expr1.eq(expr2) && name1 == name2)
             && self.groups == other.groups
+            && self.has_grouping_set == other.has_grouping_set
     }
 }
 
-#[allow(clippy::large_enum_variant)]
+#[expect(clippy::large_enum_variant)]
 enum StreamType {
     AggregateStream(AggregateStream),
     GroupedHash(GroupedHashAggregateStream),
@@ -389,6 +407,88 @@ impl From<StreamType> for SendableRecordBatchStream {
     }
 }
 
+/// # Aggregate Dynamic Filter Pushdown Overview
+///
+/// For queries like
+///   -- `example_table(type TEXT, val INT)`
+///   SELECT min(val)
+///   FROM example_table
+///   WHERE type='A';
+///
+/// And `example_table`'s physical representation is a partitioned parquet file with
+/// column statistics
+/// - part-0.parquet: val {min=0, max=100}
+/// - part-1.parquet: val {min=100, max=200}
+/// - ...
+/// - part-100.parquet: val {min=10000, max=10100}
+///
+/// After scanning the 1st file, we know we only have to read files if their minimal
+/// value on `val` column is less than 0, the minimal `val` value in the 1st file.
+///
+/// We can skip scanning the remaining file by implementing dynamic filter, the
+/// intuition is we keep a shared data structure for current min in both `AggregateExec`
+/// and `DataSourceExec`, and let it update during execution, so the scanner can
+/// know during execution if it's possible to skip scanning certain files. See
+/// physical optimizer rule `FilterPushdown` for details.
+///
+/// # Implementation
+///
+/// ## Enable Condition
+/// - No grouping (no `GROUP BY` clause in the sql, only a single global group to aggregate)
+/// - The aggregate expression must be `min`/`max`, and evaluate directly on columns.
+///   Note multiple aggregate expressions that satisfy this requirement are allowed,
+///   and a dynamic filter will be constructed combining all applicable expr's
+///   states. See more in the following example with dynamic filter on multiple columns.
+///
+/// ## Filter Construction
+/// The filter is kept in the `DataSourceExec`, and it will gets update during execution,
+/// the reader will interpret it as "the upstream only needs rows that such filter
+/// predicate is evaluated to true", and certain scanner implementation like `parquet`
+/// can evalaute column statistics on those dynamic filters, to decide if they can
+/// prune a whole range.
+///
+/// ### Examples
+/// - Expr: `min(a)`, Dynamic Filter: `a < a_cur_min`
+/// - Expr: `min(a), max(a), min(b)`, Dynamic Filter: `(a < a_cur_min) OR (a > a_cur_max) OR (b < b_cur_min)`
+#[derive(Debug, Clone)]
+struct AggrDynFilter {
+    /// The physical expr for the dynamic filter shared between the `AggregateExec`
+    /// and the parquet scanner.
+    filter: Arc<DynamicFilterPhysicalExpr>,
+    /// The current bounds for the dynamic filter, updates during the execution to
+    /// tighten the bound for more effective pruning.
+    ///
+    /// Each vector element is for the accumulators that support dynamic filter.
+    /// e.g. This `AggregateExec` has accumulator:
+    /// min(a), avg(a), max(b)
+    /// And this field stores [PerAccumulatorDynFilter(min(a)), PerAccumulatorDynFilter(min(b))]
+    supported_accumulators_info: Vec<PerAccumulatorDynFilter>,
+}
+
+// ---- Aggregate Dynamic Filter Utility Structs ----
+
+/// Aggregate expressions that support the dynamic filter pushdown in aggregation.
+/// See comments in [`AggrDynFilter`] for conditions.
+#[derive(Debug, Clone)]
+struct PerAccumulatorDynFilter {
+    aggr_type: DynamicFilterAggregateType,
+    /// During planning and optimization, the parent structure is kept in `AggregateExec`,
+    /// this index is into `aggr_expr` vec inside `AggregateExec`.
+    /// During execution, the parent struct is moved into `AggregateStream` (stream
+    /// for no grouping aggregate execution), and this index is into    `aggregate_expressions`
+    /// vec inside `AggregateStreamInner`
+    aggr_index: usize,
+    // The current bound. Shared among all streams.
+    shared_bound: Arc<Mutex<ScalarValue>>,
+}
+
+/// Aggregate types that are supported for dynamic filter in `AggregateExec`
+#[derive(Debug, Clone)]
+enum DynamicFilterAggregateType {
+    Min,
+    Max,
+}
+
 /// Hash aggregate execution plan
 #[derive(Debug, Clone)]
 pub struct AggregateExec {
@@ -418,6 +518,13 @@ pub struct AggregateExec {
     /// Describes how the input is ordered relative to the group by columns
     input_order_mode: InputOrderMode,
     cache: PlanProperties,
+    /// During initialization, if the plan supports dynamic filtering (see [`AggrDynFilter`]),
+    /// it is set to `Some(..)` regardless of whether it can be pushed down to a child node.
+    ///
+    /// During filter pushdown optimization, if a child node can accept this filter,
+    /// it remains `Some(..)` to enable dynamic filtering during aggregate execution;
+    /// otherwise, it is cleared to `None`.
+    dynamic_filter: Option<Arc<AggrDynFilter>>,
 }
 
 impl AggregateExec {
@@ -442,6 +549,7 @@ impl AggregateExec {
             input: Arc::clone(&self.input),
             schema: Arc::clone(&self.schema),
             input_schema: Arc::clone(&self.input_schema),
+            dynamic_filter: self.dynamic_filter.clone(),
         }
     }
 
@@ -480,7 +588,6 @@ impl AggregateExec {
     /// a rule may re-write aggregate expressions (e.g. reverse them) during
     /// initialization, field names may change inadvertently if one re-creates
     /// the schema in such cases.
-    #[allow(clippy::too_many_arguments)]
     fn try_new_with_schema(
         mode: AggregateMode,
         group_by: PhysicalGroupBy,
@@ -491,9 +598,13 @@ impl AggregateExec {
         schema: SchemaRef,
     ) -> Result<Self> {
         // Make sure arguments are consistent in size
-        if aggr_expr.len() != filter_expr.len() {
-            return internal_err!("Inconsistent aggregate expr: {:?} and filter expr: {:?} for AggregateExec, their size should match", aggr_expr, filter_expr);
-        }
+        assert_eq_or_internal_err!(
+            aggr_expr.len(),
+            filter_expr.len(),
+            "Inconsistent aggregate expr: {:?} and filter expr: {:?} for AggregateExec, their size should match",
+            aggr_expr,
+            filter_expr
+        );
 
         let input_eq_properties = input.equivalence_properties();
         // Get GROUP BY expressions:
@@ -501,12 +612,13 @@ impl AggregateExec {
         // If existing ordering satisfies a prefix of the GROUP BY expressions,
         // prefix requirements with this section. In this case, aggregation will
         // work more efficiently.
-        let indices = get_ordered_partition_by_indices(&groupby_exprs, &input)?;
-        let mut new_requirements = indices
-            .iter()
-            .map(|&idx| {
-                PhysicalSortRequirement::new(Arc::clone(&groupby_exprs[idx]), None)
-            })
+        // Copy the `PhysicalSortExpr`s to retain the sort options.
+        let (new_sort_exprs, indices) =
+            input_eq_properties.find_longest_permutation(&groupby_exprs)?;
+
+        let mut new_requirements = new_sort_exprs
+            .into_iter()
+            .map(PhysicalSortRequirement::from)
             .collect::<Vec<_>>();
 
         let req = get_finer_aggregate_exprs_requirement(
@@ -554,7 +666,7 @@ impl AggregateExec {
             aggr_expr.as_slice(),
         )?;
 
-        Ok(AggregateExec {
+        let mut exec = AggregateExec {
             mode,
             group_by,
             aggr_expr,
@@ -567,7 +679,12 @@ impl AggregateExec {
             limit: None,
             input_order_mode,
             cache,
-        })
+            dynamic_filter: None,
+        };
+
+        exec.init_dynamic_filter();
+
+        Ok(exec)
     }
 
     /// Aggregation mode (full, partial)
@@ -618,22 +735,21 @@ impl AggregateExec {
     fn execute_typed(
         &self,
         partition: usize,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
     ) -> Result<StreamType> {
-        // no group by at all
-        if self.group_by.expr.is_empty() {
+        if self.group_by.is_true_no_grouping() {
             return Ok(StreamType::AggregateStream(AggregateStream::new(
                 self, context, partition,
             )?));
         }
 
         // grouping by an expression that has a sort/limit upstream
-        if let Some(limit) = self.limit {
-            if !self.is_unordered_unfiltered_group_by_distinct() {
-                return Ok(StreamType::GroupedPriorityQueue(
-                    GroupedTopKAggregateStream::new(self, context, partition, limit)?,
-                ));
-            }
+        if let Some(limit) = self.limit
+            && !self.is_unordered_unfiltered_group_by_distinct()
+        {
+            return Ok(StreamType::GroupedPriorityQueue(
+                GroupedTopKAggregateStream::new(self, context, partition, limit)?,
+            ));
         }
 
         // grouping by something else and we need to just materialize all results
@@ -654,7 +770,7 @@ impl AggregateExec {
     /// on an AggregateExec.
     pub fn is_unordered_unfiltered_group_by_distinct(&self) -> bool {
         // ensure there is a group by
-        if self.group_expr().is_empty() {
+        if self.group_expr().is_empty() && !self.group_expr().has_grouping_set() {
             return false;
         }
         // ensure there are no aggregate expressions
@@ -753,7 +869,7 @@ impl AggregateExec {
         &self.input_order_mode
     }
 
-    fn statistics_inner(&self, child_statistics: Statistics) -> Result<Statistics> {
+    fn statistics_inner(&self, child_statistics: &Statistics) -> Result<Statistics> {
         // TODO stats: group expressions:
         // - once expressions will be able to compute their own stats, use it here
         // - case where we group by on a column for which with have the `distinct` stat
@@ -784,10 +900,13 @@ impl AggregateExec {
             AggregateMode::Final | AggregateMode::FinalPartitioned
                 if self.group_by.expr.is_empty() =>
             {
+                let total_byte_size =
+                    Self::calculate_scaled_byte_size(child_statistics, 1);
+
                 Ok(Statistics {
                     num_rows: Precision::Exact(1),
                     column_statistics,
-                    total_byte_size: Precision::Absent,
+                    total_byte_size,
                 })
             }
             _ => {
@@ -807,14 +926,107 @@ impl AggregateExec {
                 } else {
                     Precision::Absent
                 };
+
+                let total_byte_size = num_rows
+                    .get_value()
+                    .and_then(|&output_rows| {
+                        Self::calculate_scaled_byte_size(child_statistics, output_rows)
+                            .get_value()
+                            .map(|&bytes| Precision::Inexact(bytes))
+                    })
+                    .unwrap_or(Precision::Absent);
+
                 Ok(Statistics {
                     num_rows,
                     column_statistics,
-                    total_byte_size: Precision::Absent,
+                    total_byte_size,
                 })
             }
         }
     }
+
+    /// Check if dynamic filter is possible for the current plan node.
+    /// - If yes, init one inside `AggregateExec`'s `dynamic_filter` field.
+    /// - If not supported, `self.dynamic_filter` should be kept `None`
+    fn init_dynamic_filter(&mut self) {
+        if (!self.group_by.is_empty()) || (!matches!(self.mode, AggregateMode::Partial)) {
+            debug_assert!(
+                self.dynamic_filter.is_none(),
+                "The current operator node does not support dynamic filter"
+            );
+            return;
+        }
+
+        // Already initialized.
+        if self.dynamic_filter.is_some() {
+            return;
+        }
+
+        // Collect supported accumulators
+        // It is assumed the order of aggregate expressions are not changed from `AggregateExec`
+        // to `AggregateStream`
+        let mut aggr_dyn_filters = Vec::new();
+        // All column references in the dynamic filter, used when initializing the dynamic
+        // filter, and it's used to decide if this dynamic filter is able to get push
+        // through certain node during optimization.
+        let mut all_cols: Vec<Arc<dyn PhysicalExpr>> = Vec::new();
+        for (i, aggr_expr) in self.aggr_expr.iter().enumerate() {
+            // 1. Only `min` or `max` aggregate function
+            let fun_name = aggr_expr.fun().name();
+            // HACK: Should check the function type more precisely
+            // Issue: <https://github.com/apache/datafusion/issues/18643>
+            let aggr_type = if fun_name.eq_ignore_ascii_case("min") {
+                DynamicFilterAggregateType::Min
+            } else if fun_name.eq_ignore_ascii_case("max") {
+                DynamicFilterAggregateType::Max
+            } else {
+                continue;
+            };
+
+            // 2. arg should be only 1 column reference
+            if let [arg] = aggr_expr.expressions().as_slice()
+                && arg.as_any().is::<Column>()
+            {
+                all_cols.push(Arc::clone(arg));
+                aggr_dyn_filters.push(PerAccumulatorDynFilter {
+                    aggr_type,
+                    aggr_index: i,
+                    shared_bound: Arc::new(Mutex::new(ScalarValue::Null)),
+                });
+            }
+        }
+
+        if !aggr_dyn_filters.is_empty() {
+            self.dynamic_filter = Some(Arc::new(AggrDynFilter {
+                filter: Arc::new(DynamicFilterPhysicalExpr::new(all_cols, lit(true))),
+                supported_accumulators_info: aggr_dyn_filters,
+            }))
+        }
+    }
+
+    /// Calculate scaled byte size based on row count ratio.
+    /// Returns `Precision::Absent` if input statistics are insufficient.
+    /// Returns `Precision::Inexact` with the scaled value otherwise.
+    ///
+    /// This is a simple heuristic that assumes uniform row sizes.
+    #[inline]
+    fn calculate_scaled_byte_size(
+        input_stats: &Statistics,
+        target_row_count: usize,
+    ) -> Precision<usize> {
+        match (
+            input_stats.num_rows.get_value(),
+            input_stats.total_byte_size.get_value(),
+        ) {
+            (Some(&input_rows), Some(&input_bytes)) if input_rows > 0 => {
+                let bytes_per_row = input_bytes as f64 / input_rows as f64;
+                let scaled_bytes =
+                    (bytes_per_row * target_row_count as f64).ceil() as usize;
+                Precision::Inexact(scaled_bytes)
+            }
+            _ => Precision::Absent,
+        }
+    }
 }
 
 impl DisplayAs for AggregateExec {
@@ -1003,6 +1215,7 @@ impl ExecutionPlan for AggregateExec {
             Arc::clone(&self.schema),
         )?;
         me.limit = self.limit;
+        me.dynamic_filter = self.dynamic_filter.clone();
 
         Ok(Arc::new(me))
     }
@@ -1012,7 +1225,7 @@ impl ExecutionPlan for AggregateExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        self.execute_typed(partition, context)
+        self.execute_typed(partition, &context)
             .map(|stream| stream.into())
     }
 
@@ -1025,7 +1238,8 @@ impl ExecutionPlan for AggregateExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
-        self.statistics_inner(self.input().partition_statistics(partition)?)
+        let child_statistics = self.input().partition_statistics(partition)?;
+        self.statistics_inner(&child_statistics)
     }
 
     fn cardinality_effect(&self) -> CardinalityEffect {
@@ -1033,12 +1247,12 @@ impl ExecutionPlan for AggregateExec {
     }
 
     /// Push down parent filters when possible (see implementation comment for details),
-    /// but do not introduce any new self filters.
+    /// and also pushdown self dynamic filters (see `AggrDynFilter` for details)
     fn gather_filters_for_pushdown(
         &self,
-        _phase: FilterPushdownPhase,
+        phase: FilterPushdownPhase,
         parent_filters: Vec<Arc<dyn PhysicalExpr>>,
-        _config: &ConfigOptions,
+        config: &ConfigOptions,
     ) -> Result<FilterDescription> {
         // It's safe to push down filters through aggregates when filters only reference
         // grouping columns, because such filters determine which groups to compute, not
@@ -1111,8 +1325,67 @@ impl ExecutionPlan for AggregateExec {
                 .map(PushedDownPredicate::unsupported),
         );
 
+        // Include self dynamic filter when it's possible
+        if matches!(phase, FilterPushdownPhase::Post)
+            && config.optimizer.enable_aggregate_dynamic_filter_pushdown
+            && let Some(self_dyn_filter) = &self.dynamic_filter
+        {
+            let dyn_filter = Arc::clone(&self_dyn_filter.filter);
+            child_desc = child_desc.with_self_filter(dyn_filter);
+        }
+
         Ok(FilterDescription::new().with_child(child_desc))
     }
+
+    /// If child accepts self's dynamic filter, keep `self.dynamic_filter` with Some,
+    /// otherwise clear it to None.
+    fn handle_child_pushdown_result(
+        &self,
+        phase: FilterPushdownPhase,
+        child_pushdown_result: ChildPushdownResult,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>> {
+        let mut result = FilterPushdownPropagation::if_any(child_pushdown_result.clone());
+
+        // If this node tried to pushdown some dynamic filter before, now we check
+        // if the child accept the filter
+        if matches!(phase, FilterPushdownPhase::Post) && self.dynamic_filter.is_some() {
+            // let child_accepts_dyn_filter = child_pushdown_result
+            //     .self_filters
+            //     .first()
+            //     .map(|filters| {
+            //         assert_eq_or_internal_err!(
+            //             filters.len(),
+            //             1,
+            //             "Aggregate only pushdown one self dynamic filter"
+            //         );
+            //         let filter = filters.get(0).unwrap(); // Asserted above
+            //         Ok(matches!(filter.discriminant, PushedDown::Yes))
+            //     })
+            //     .unwrap_or_else(|| internal_err!("The length of self filters equals to the number of child of this ExecutionPlan, so it must be 1"))?;
+
+            // HACK: The above snippet should be used, however, now the child reply
+            // `PushDown::No` can indicate they're not able to push down row-level
+            // filter, but still keep the filter for statistics pruning.
+            // So here, we try to use ref count to determine if the dynamic filter
+            // has actually be pushed down.
+            // Issue: <https://github.com/apache/datafusion/issues/18856>
+            let dyn_filter = self.dynamic_filter.as_ref().unwrap();
+            let child_accepts_dyn_filter = Arc::strong_count(dyn_filter) > 1;
+
+            if !child_accepts_dyn_filter {
+                // Child can't consume the self dynamic filter, so disable it by setting
+                // to `None`
+                let mut new_node = self.clone();
+                new_node.dynamic_filter = None;
+
+                result = result
+                    .with_updated_node(Arc::new(new_node) as Arc<dyn ExecutionPlan>);
+            }
+        }
+
+        Ok(result)
+    }
 }
 
 fn create_schema(
@@ -1434,25 +1707,14 @@ pub fn finalize_aggregation(
     }
 }
 
-/// Evaluates expressions against a record batch.
-fn evaluate(
-    expr: &[Arc<dyn PhysicalExpr>],
-    batch: &RecordBatch,
-) -> Result<Vec<ArrayRef>> {
-    expr.iter()
-        .map(|expr| {
-            expr.evaluate(batch)
-                .and_then(|v| v.into_array(batch.num_rows()))
-        })
-        .collect()
-}
-
-/// Evaluates expressions against a record batch.
+/// Evaluates groups of expressions against a record batch.
 pub fn evaluate_many(
     expr: &[Vec<Arc<dyn PhysicalExpr>>],
     batch: &RecordBatch,
 ) -> Result<Vec<Vec<ArrayRef>>> {
-    expr.iter().map(|expr| evaluate(expr, batch)).collect()
+    expr.iter()
+        .map(|expr| evaluate_expressions_to_arrays(expr, batch))
+        .collect()
 }
 
 fn evaluate_optional(
@@ -1506,23 +1768,14 @@ pub fn evaluate_group_by(
     group_by: &PhysicalGroupBy,
     batch: &RecordBatch,
 ) -> Result<Vec<Vec<ArrayRef>>> {
-    let exprs: Vec<ArrayRef> = group_by
-        .expr
-        .iter()
-        .map(|(expr, _)| {
-            let value = expr.evaluate(batch)?;
-            value.into_array(batch.num_rows())
-        })
-        .collect::<Result<Vec<_>>>()?;
-
-    let null_exprs: Vec<ArrayRef> = group_by
-        .null_expr
-        .iter()
-        .map(|(expr, _)| {
-            let value = expr.evaluate(batch)?;
-            value.into_array(batch.num_rows())
-        })
-        .collect::<Result<Vec<_>>>()?;
+    let exprs = evaluate_expressions_to_arrays(
+        group_by.expr.iter().map(|(expr, _)| expr),
+        batch,
+    )?;
+    let null_exprs = evaluate_expressions_to_arrays(
+        group_by.null_expr.iter().map(|(expr, _)| expr),
+        batch,
+    )?;
 
     group_by
         .groups
@@ -1549,6 +1802,7 @@ mod tests {
     use std::task::{Context, Poll};
 
     use super::*;
+    use crate::RecordBatchStream;
     use crate::coalesce_batches::CoalesceBatchesExec;
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::common;
@@ -1556,19 +1810,18 @@ mod tests {
     use crate::execution_plan::Boundedness;
     use crate::expressions::col;
     use crate::metrics::MetricValue;
-    use crate::test::assert_is_pending;
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::TestMemoryExec;
-    use crate::RecordBatchStream;
+    use crate::test::assert_is_pending;
+    use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero};
 
     use arrow::array::{
-        DictionaryArray, Float32Array, Float64Array, Int32Array, StructArray,
+        DictionaryArray, Float32Array, Float64Array, Int32Array, Int64Array, StructArray,
         UInt32Array, UInt64Array,
     };
-    use arrow::compute::{concat_batches, SortOptions};
+    use arrow::compute::{SortOptions, concat_batches};
     use arrow::datatypes::{DataType, Int32Type};
     use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
-    use datafusion_common::{internal_err, DataFusionError, ScalarValue};
+    use datafusion_common::{DataFusionError, ScalarValue, internal_err};
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::memory_pool::FairSpillPool;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
@@ -1578,12 +1831,14 @@ mod tests {
     use datafusion_functions_aggregate::first_last::{first_value_udaf, last_value_udaf};
     use datafusion_functions_aggregate::median::median_udaf;
     use datafusion_functions_aggregate::sum::sum_udaf;
-    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
-    use datafusion_physical_expr::expressions::lit;
-    use datafusion_physical_expr::expressions::Literal;
     use datafusion_physical_expr::Partitioning;
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    use datafusion_physical_expr::expressions::Literal;
+    use datafusion_physical_expr::expressions::lit;
 
+    use crate::projection::ProjectionExec;
+    use datafusion_physical_expr::projection::ProjectionExpr;
     use futures::{FutureExt, Stream};
     use insta::{allow_duplicates, assert_snapshot};
 
@@ -1714,6 +1969,7 @@ mod tests {
                 vec![true, false],  // (NULL, b)
                 vec![false, false], // (a,b)
             ],
+            true,
         );
 
         let aggregates = vec![Arc::new(
@@ -1748,30 +2004,30 @@ mod tests {
             allow_duplicates! {
             assert_snapshot!(batches_to_sort_string(&result),
             @r"
-+---+-----+---------------+-----------------+
-| a | b   | __grouping_id | COUNT(1)[count] |
-+---+-----+---------------+-----------------+
-|   | 1.0 | 2             | 1               |
-|   | 1.0 | 2             | 1               |
-|   | 2.0 | 2             | 1               |
-|   | 2.0 | 2             | 1               |
-|   | 3.0 | 2             | 1               |
-|   | 3.0 | 2             | 1               |
-|   | 4.0 | 2             | 1               |
-|   | 4.0 | 2             | 1               |
-| 2 |     | 1             | 1               |
-| 2 |     | 1             | 1               |
-| 2 | 1.0 | 0             | 1               |
-| 2 | 1.0 | 0             | 1               |
-| 3 |     | 1             | 1               |
-| 3 |     | 1             | 2               |
-| 3 | 2.0 | 0             | 2               |
-| 3 | 3.0 | 0             | 1               |
-| 4 |     | 1             | 1               |
-| 4 |     | 1             | 2               |
-| 4 | 3.0 | 0             | 1               |
-| 4 | 4.0 | 0             | 2               |
-+---+-----+---------------+-----------------+
+            +---+-----+---------------+-----------------+
+            | a | b   | __grouping_id | COUNT(1)[count] |
+            +---+-----+---------------+-----------------+
+            |   | 1.0 | 2             | 1               |
+            |   | 1.0 | 2             | 1               |
+            |   | 2.0 | 2             | 1               |
+            |   | 2.0 | 2             | 1               |
+            |   | 3.0 | 2             | 1               |
+            |   | 3.0 | 2             | 1               |
+            |   | 4.0 | 2             | 1               |
+            |   | 4.0 | 2             | 1               |
+            | 2 |     | 1             | 1               |
+            | 2 |     | 1             | 1               |
+            | 2 | 1.0 | 0             | 1               |
+            | 2 | 1.0 | 0             | 1               |
+            | 3 |     | 1             | 1               |
+            | 3 |     | 1             | 2               |
+            | 3 | 2.0 | 0             | 2               |
+            | 3 | 3.0 | 0             | 1               |
+            | 4 |     | 1             | 1               |
+            | 4 |     | 1             | 2               |
+            | 4 | 3.0 | 0             | 1               |
+            | 4 | 4.0 | 0             | 2               |
+            +---+-----+---------------+-----------------+
             "
             );
             }
@@ -1779,22 +2035,22 @@ mod tests {
             allow_duplicates! {
             assert_snapshot!(batches_to_sort_string(&result),
             @r"
-+---+-----+---------------+-----------------+
-| a | b   | __grouping_id | COUNT(1)[count] |
-+---+-----+---------------+-----------------+
-|   | 1.0 | 2             | 2               |
-|   | 2.0 | 2             | 2               |
-|   | 3.0 | 2             | 2               |
-|   | 4.0 | 2             | 2               |
-| 2 |     | 1             | 2               |
-| 2 | 1.0 | 0             | 2               |
-| 3 |     | 1             | 3               |
-| 3 | 2.0 | 0             | 2               |
-| 3 | 3.0 | 0             | 1               |
-| 4 |     | 1             | 3               |
-| 4 | 3.0 | 0             | 1               |
-| 4 | 4.0 | 0             | 2               |
-+---+-----+---------------+-----------------+
+            +---+-----+---------------+-----------------+
+            | a | b   | __grouping_id | COUNT(1)[count] |
+            +---+-----+---------------+-----------------+
+            |   | 1.0 | 2             | 2               |
+            |   | 2.0 | 2             | 2               |
+            |   | 3.0 | 2             | 2               |
+            |   | 4.0 | 2             | 2               |
+            | 2 |     | 1             | 2               |
+            | 2 | 1.0 | 0             | 2               |
+            | 3 |     | 1             | 3               |
+            | 3 | 2.0 | 0             | 2               |
+            | 3 | 3.0 | 0             | 1               |
+            | 4 |     | 1             | 3               |
+            | 4 | 3.0 | 0             | 1               |
+            | 4 | 4.0 | 0             | 2               |
+            +---+-----+---------------+-----------------+
             "
             );
             }
@@ -1828,23 +2084,23 @@ mod tests {
         assert_snapshot!(
             batches_to_sort_string(&result),
             @r"
-            +---+-----+---------------+----------+
-            | a | b   | __grouping_id | COUNT(1) |
-            +---+-----+---------------+----------+
-            |   | 1.0 | 2             | 2        |
-            |   | 2.0 | 2             | 2        |
-            |   | 3.0 | 2             | 2        |
-            |   | 4.0 | 2             | 2        |
-            | 2 |     | 1             | 2        |
-            | 2 | 1.0 | 0             | 2        |
-            | 3 |     | 1             | 3        |
-            | 3 | 2.0 | 0             | 2        |
-            | 3 | 3.0 | 0             | 1        |
-            | 4 |     | 1             | 3        |
-            | 4 | 3.0 | 0             | 1        |
-            | 4 | 4.0 | 0             | 2        |
-            +---+-----+---------------+----------+
-            "
+        +---+-----+---------------+----------+
+        | a | b   | __grouping_id | COUNT(1) |
+        +---+-----+---------------+----------+
+        |   | 1.0 | 2             | 2        |
+        |   | 2.0 | 2             | 2        |
+        |   | 3.0 | 2             | 2        |
+        |   | 4.0 | 2             | 2        |
+        | 2 |     | 1             | 2        |
+        | 2 | 1.0 | 0             | 2        |
+        | 3 |     | 1             | 3        |
+        | 3 | 2.0 | 0             | 2        |
+        | 3 | 3.0 | 0             | 1        |
+        | 4 |     | 1             | 3        |
+        | 4 | 3.0 | 0             | 1        |
+        | 4 | 4.0 | 0             | 2        |
+        +---+-----+---------------+----------+
+        "
         );
         }
 
@@ -1863,6 +2119,7 @@ mod tests {
             vec![(col("a", &input_schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![Arc::new(
@@ -1894,27 +2151,27 @@ mod tests {
         if spill {
             allow_duplicates! {
             assert_snapshot!(batches_to_sort_string(&result), @r"
-                +---+---------------+-------------+
-                | a | AVG(b)[count] | AVG(b)[sum] |
-                +---+---------------+-------------+
-                | 2 | 1             | 1.0         |
-                | 2 | 1             | 1.0         |
-                | 3 | 1             | 2.0         |
-                | 3 | 2             | 5.0         |
-                | 4 | 3             | 11.0        |
-                +---+---------------+-------------+
+            +---+---------------+-------------+
+            | a | AVG(b)[count] | AVG(b)[sum] |
+            +---+---------------+-------------+
+            | 2 | 1             | 1.0         |
+            | 2 | 1             | 1.0         |
+            | 3 | 1             | 2.0         |
+            | 3 | 2             | 5.0         |
+            | 4 | 3             | 11.0        |
+            +---+---------------+-------------+
             ");
             }
         } else {
             allow_duplicates! {
             assert_snapshot!(batches_to_sort_string(&result), @r"
-                +---+---------------+-------------+
-                | a | AVG(b)[count] | AVG(b)[sum] |
-                +---+---------------+-------------+
-                | 2 | 2             | 2.0         |
-                | 3 | 3             | 7.0         |
-                | 4 | 3             | 11.0        |
-                +---+---------------+-------------+
+            +---+---------------+-------------+
+            | a | AVG(b)[count] | AVG(b)[sum] |
+            +---+---------------+-------------+
+            | 2 | 2             | 2.0         |
+            | 3 | 3             | 7.0         |
+            | 4 | 3             | 11.0        |
+            +---+---------------+-------------+
             ");
             }
         };
@@ -1932,6 +2189,10 @@ mod tests {
             input_schema,
         )?);
 
+        // Verify statistics are preserved proportionally through aggregation
+        let final_stats = merged_aggregate.partition_statistics(None)?;
+        assert!(final_stats.total_byte_size.get_value().is_some());
+
         let task_ctx = if spill {
             // enlarge memory limit to let the final aggregation finish
             new_spill_ctx(2, 2600)
@@ -1945,14 +2206,14 @@ mod tests {
 
         allow_duplicates! {
         assert_snapshot!(batches_to_sort_string(&result), @r"
-            +---+--------------------+
-            | a | AVG(b)             |
-            +---+--------------------+
-            | 2 | 1.0                |
-            | 3 | 2.3333333333333335 |
-            | 4 | 3.6666666666666665 |
-            +---+--------------------+
-            ");
+        +---+--------------------+
+        | a | AVG(b)             |
+        +---+--------------------+
+        | 2 | 1.0                |
+        | 3 | 2.3333333333333335 |
+        | 4 | 3.6666666666666665 |
+        +---+--------------------+
+        ");
             // For row 2: 3, (2 + 3 + 2) / 3
             // For row 3: 4, (3 + 4 + 4) / 3
         }
@@ -2204,6 +2465,7 @@ mod tests {
             vec![(col("a", &input_schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         // something that allocates within the aggregator
@@ -2224,7 +2486,7 @@ mod tests {
         ] {
             let n_aggr = aggregates.len();
             let partial_aggregate = Arc::new(AggregateExec::try_new(
-                AggregateMode::Partial,
+                AggregateMode::Single,
                 groups,
                 aggregates,
                 vec![None; n_aggr],
@@ -2232,7 +2494,7 @@ mod tests {
                 Arc::clone(&input_schema),
             )?);
 
-            let stream = partial_aggregate.execute_typed(0, Arc::clone(&task_ctx))?;
+            let stream = partial_aggregate.execute_typed(0, &task_ctx)?;
 
             // ensure that we really got the version we wanted
             match version {
@@ -2478,26 +2740,26 @@ mod tests {
         if is_first_acc {
             allow_duplicates! {
             assert_snapshot!(batches_to_string(&result), @r"
-                +---+--------------------------------------------+
-                | a | first_value(b) ORDER BY [b ASC NULLS LAST] |
-                +---+--------------------------------------------+
-                | 2 | 0.0                                        |
-                | 3 | 1.0                                        |
-                | 4 | 3.0                                        |
-                +---+--------------------------------------------+
-                ");
+            +---+--------------------------------------------+
+            | a | first_value(b) ORDER BY [b ASC NULLS LAST] |
+            +---+--------------------------------------------+
+            | 2 | 0.0                                        |
+            | 3 | 1.0                                        |
+            | 4 | 3.0                                        |
+            +---+--------------------------------------------+
+            ");
             }
         } else {
             allow_duplicates! {
             assert_snapshot!(batches_to_string(&result), @r"
-                +---+-------------------------------------------+
-                | a | last_value(b) ORDER BY [b ASC NULLS LAST] |
-                +---+-------------------------------------------+
-                | 2 | 3.0                                       |
-                | 3 | 5.0                                       |
-                | 4 | 6.0                                       |
-                +---+-------------------------------------------+
-                ");
+            +---+-------------------------------------------+
+            | a | last_value(b) ORDER BY [b ASC NULLS LAST] |
+            +---+-------------------------------------------+
+            | 2 | 3.0                                       |
+            | 3 | 5.0                                       |
+            | 4 | 6.0                                       |
+            +---+-------------------------------------------+
+            ");
             }
         };
         Ok(())
@@ -2648,14 +2910,16 @@ mod tests {
                 vec![true, false, true],
                 vec![true, true, false],
             ],
+            true,
         );
 
-        let aggregates: Vec<Arc<AggregateFunctionExpr>> =
-            vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
+            AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
                 .schema(Arc::clone(&schema))
                 .alias("1")
                 .build()
-                .map(Arc::new)?];
+                .map(Arc::new)?,
+        ];
 
         let input_batches = (0..4)
             .map(|_| {
@@ -2684,13 +2948,13 @@ mod tests {
 
         allow_duplicates! {
         assert_snapshot!(batches_to_sort_string(&output), @r"
-            +-----+-----+-------+---------------+-------+
-            | a   | b   | const | __grouping_id | 1     |
-            +-----+-----+-------+---------------+-------+
-            |     |     | 1     | 6             | 32768 |
-            |     | 0.0 |       | 5             | 32768 |
-            | 0.0 |     |       | 3             | 32768 |
-            +-----+-----+-------+---------------+-------+
+        +-----+-----+-------+---------------+-------+
+        | a   | b   | const | __grouping_id | 1     |
+        +-----+-----+-------+---------------+-------+
+        |     |     | 1     | 6             | 32768 |
+        |     | 0.0 |       | 5             | 32768 |
+        | 0.0 |     |       | 3             | 32768 |
+        +-----+-----+-------+---------------+-------+
         ");
         }
 
@@ -2771,14 +3035,13 @@ mod tests {
             "labels".to_string(),
         )]);
 
-        let aggr_expr = vec![AggregateExprBuilder::new(
-            sum_udaf(),
-            vec![col("value", &batch.schema())?],
-        )
-        .schema(Arc::clone(&batch.schema()))
-        .alias(String::from("SUM(value)"))
-        .build()
-        .map(Arc::new)?];
+        let aggr_expr = vec![
+            AggregateExprBuilder::new(sum_udaf(), vec![col("value", &batch.schema())?])
+                .schema(Arc::clone(&batch.schema()))
+                .alias(String::from("SUM(value)"))
+                .build()
+                .map(Arc::new)?,
+        ];
 
         let input = TestMemoryExec::try_new_exec(
             &[vec![batch.clone()]],
@@ -2800,13 +3063,13 @@ mod tests {
 
         allow_duplicates! {
         assert_snapshot!(batches_to_string(&output), @r"
-            +--------------+------------+
-            | labels       | SUM(value) |
-            +--------------+------------+
-            | {a: a, b: b} | 2          |
-            | {a: , b: c}  | 1          |
-            +--------------+------------+
-            ");
+        +--------------+------------+
+        | labels       | SUM(value) |
+        +--------------+------------+
+        | {a: a, b: b} | 2          |
+        | {a: , b: c}  | 1          |
+        +--------------+------------+
+        ");
         }
 
         Ok(())
@@ -2822,14 +3085,13 @@ mod tests {
         let group_by =
             PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
 
-        let aggr_expr =
-            vec![
-                AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
-                    .schema(Arc::clone(&schema))
-                    .alias(String::from("COUNT(val)"))
-                    .build()
-                    .map(Arc::new)?,
-            ];
+        let aggr_expr = vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias(String::from("COUNT(val)"))
+                .build()
+                .map(Arc::new)?,
+        ];
 
         let input_data = vec![
             RecordBatch::try_new(
@@ -2902,14 +3164,13 @@ mod tests {
         let group_by =
             PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
 
-        let aggr_expr =
-            vec![
-                AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
-                    .schema(Arc::clone(&schema))
-                    .alias(String::from("COUNT(val)"))
-                    .build()
-                    .map(Arc::new)?,
-            ];
+        let aggr_expr = vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias(String::from("COUNT(val)"))
+                .build()
+                .map(Arc::new)?,
+        ];
 
         let input_data = vec![
             RecordBatch::try_new(
@@ -2988,14 +3249,13 @@ mod tests {
             Field::new("b", DataType::Float32, false),
         ]));
 
-        let aggr_expr =
-            vec![
-                AggregateExprBuilder::new(count_udaf(), vec![col("a", &input_schema)?])
-                    .schema(Arc::clone(&input_schema))
-                    .alias("COUNT(a)")
-                    .build()
-                    .map(Arc::new)?,
-            ];
+        let aggr_expr = vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("a", &input_schema)?])
+                .schema(Arc::clone(&input_schema))
+                .alias("COUNT(a)")
+                .build()
+                .map(Arc::new)?,
+        ];
 
         let grouping_set = PhysicalGroupBy::new(
             vec![
@@ -3010,6 +3270,7 @@ mod tests {
                 vec![false, true],  // (a, NULL)
                 vec![false, false], // (a,b)
             ],
+            true,
         );
         let aggr_schema = create_schema(
             &input_schema,
@@ -3061,6 +3322,7 @@ mod tests {
             vec![(col("a", &schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         // Test with MIN for simple intermediate state (min) and AVG for multiple intermediate states (partial sum, partial count).
@@ -3109,13 +3371,13 @@ mod tests {
 
         allow_duplicates! {
             assert_snapshot!(batches_to_string(&result), @r"
-                +---+--------+--------+
-                | a | MIN(b) | AVG(b) |
-                +---+--------+--------+
-                | 2 | 1.0    | 1.0    |
-                | 3 | 2.0    | 2.0    |
-                | 4 | 3.0    | 3.5    |
-                +---+--------+--------+
+            +---+--------+--------+
+            | a | MIN(b) | AVG(b) |
+            +---+--------+--------+
+            | 2 | 1.0    | 1.0    |
+            | 3 | 2.0    | 2.0    |
+            | 4 | 3.0    | 3.5    |
+            +---+--------+--------+
             ");
         }
 
@@ -3142,7 +3404,9 @@ mod tests {
                     "Expected spill but SpillCount metric not found or SpillCount was 0."
                 );
             } else if !expect_spill && spill_count > 0 {
-                panic!("Expected no spill but found SpillCount metric with value greater than 0.");
+                panic!(
+                    "Expected no spill but found SpillCount metric with value greater than 0."
+                );
             }
         } else {
             panic!("No metrics returned from the operator; cannot verify spilling.");
@@ -3157,4 +3421,271 @@ mod tests {
         run_test_with_spill_pool_if_necessary(20_000, false).await?;
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_grouped_aggregation_respects_memory_limit() -> Result<()> {
+        // test with spill
+        fn create_record_batch(
+            schema: &Arc<Schema>,
+            data: (Vec<u32>, Vec<f64>),
+        ) -> Result<RecordBatch> {
+            Ok(RecordBatch::try_new(
+                Arc::clone(schema),
+                vec![
+                    Arc::new(UInt32Array::from(data.0)),
+                    Arc::new(Float64Array::from(data.1)),
+                ],
+            )?)
+        }
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::UInt32, false),
+            Field::new("b", DataType::Float64, false),
+        ]));
+
+        let batches = vec![
+            create_record_batch(&schema, (vec![2, 3, 4, 4], vec![1.0, 2.0, 3.0, 4.0]))?,
+            create_record_batch(&schema, (vec![2, 3, 4, 4], vec![1.0, 2.0, 3.0, 4.0]))?,
+        ];
+        let plan: Arc<dyn ExecutionPlan> =
+            TestMemoryExec::try_new_exec(&[batches], Arc::clone(&schema), None)?;
+        let proj = ProjectionExec::try_new(
+            vec![
+                ProjectionExpr::new(lit("0"), "l".to_string()),
+                ProjectionExpr::new_from_expression(col("a", &schema)?, &schema)?,
+                ProjectionExpr::new_from_expression(col("b", &schema)?, &schema)?,
+            ],
+            plan,
+        )?;
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(proj);
+        let schema = plan.schema();
+
+        let grouping_set = PhysicalGroupBy::new(
+            vec![
+                (col("l", &schema)?, "l".to_string()),
+                (col("a", &schema)?, "a".to_string()),
+            ],
+            vec![],
+            vec![vec![false, false]],
+            false,
+        );
+
+        // Test with MIN for simple intermediate state (min) and AVG for multiple intermediate states (partial sum, partial count).
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
+            Arc::new(
+                AggregateExprBuilder::new(
+                    datafusion_functions_aggregate::min_max::min_udaf(),
+                    vec![col("b", &schema)?],
+                )
+                .schema(Arc::clone(&schema))
+                .alias("MIN(b)")
+                .build()?,
+            ),
+            Arc::new(
+                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias("AVG(b)")
+                    .build()?,
+            ),
+        ];
+
+        let single_aggregate = Arc::new(AggregateExec::try_new(
+            AggregateMode::Single,
+            grouping_set,
+            aggregates,
+            vec![None, None],
+            plan,
+            Arc::clone(&schema),
+        )?);
+
+        let batch_size = 2;
+        let memory_pool = Arc::new(FairSpillPool::new(2000));
+        let task_ctx = Arc::new(
+            TaskContext::default()
+                .with_session_config(SessionConfig::new().with_batch_size(batch_size))
+                .with_runtime(Arc::new(
+                    RuntimeEnvBuilder::new()
+                        .with_memory_pool(memory_pool)
+                        .build()?,
+                )),
+        );
+
+        let result = collect(single_aggregate.execute(0, Arc::clone(&task_ctx))?).await;
+        match result {
+            Ok(result) => {
+                assert_spill_count_metric(true, single_aggregate);
+
+                allow_duplicates! {
+                    assert_snapshot!(batches_to_string(&result), @r"
+                +---+---+--------+--------+
+                | l | a | MIN(b) | AVG(b) |
+                +---+---+--------+--------+
+                | 0 | 2 | 1.0    | 1.0    |
+                | 0 | 3 | 2.0    | 2.0    |
+                | 0 | 4 | 3.0    | 3.5    |
+                +---+---+--------+--------+
+            ");
+                }
+            }
+            Err(e) => assert!(matches!(e, DataFusionError::ResourcesExhausted(_))),
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_aggregate_statistics_edge_cases() -> Result<()> {
+        use crate::test::exec::StatisticsExec;
+        use datafusion_common::ColumnStatistics;
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Float64, false),
+        ]));
+
+        // Test 1: Absent statistics remain absent
+        let input = Arc::new(StatisticsExec::new(
+            Statistics {
+                num_rows: Precision::Exact(100),
+                total_byte_size: Precision::Absent,
+                column_statistics: vec![
+                    ColumnStatistics::new_unknown(),
+                    ColumnStatistics::new_unknown(),
+                ],
+            },
+            (*schema).clone(),
+        )) as Arc<dyn ExecutionPlan>;
+
+        let agg = Arc::new(AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![Arc::new(
+                AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias("COUNT(a)")
+                    .build()?,
+            )],
+            vec![None],
+            input,
+            Arc::clone(&schema),
+        )?);
+
+        let stats = agg.partition_statistics(None)?;
+        assert_eq!(stats.total_byte_size, Precision::Absent);
+
+        // Test 2: Zero rows returns Absent (can't estimate output size from zero input)
+        let input_zero = Arc::new(StatisticsExec::new(
+            Statistics {
+                num_rows: Precision::Exact(0),
+                total_byte_size: Precision::Exact(0),
+                column_statistics: vec![
+                    ColumnStatistics::new_unknown(),
+                    ColumnStatistics::new_unknown(),
+                ],
+            },
+            (*schema).clone(),
+        )) as Arc<dyn ExecutionPlan>;
+
+        let agg_zero = Arc::new(AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![Arc::new(
+                AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias("COUNT(a)")
+                    .build()?,
+            )],
+            vec![None],
+            input_zero,
+            Arc::clone(&schema),
+        )?);
+
+        let stats_zero = agg_zero.partition_statistics(None)?;
+        assert_eq!(stats_zero.total_byte_size, Precision::Absent);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_order_is_retained_when_spilling() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int64, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+
+        let batches = vec![vec![
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int64Array::from(vec![2])),
+                    Arc::new(Int64Array::from(vec![2])),
+                    Arc::new(Int64Array::from(vec![1])),
+                ],
+            )?,
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int64Array::from(vec![1])),
+                    Arc::new(Int64Array::from(vec![1])),
+                    Arc::new(Int64Array::from(vec![1])),
+                ],
+            )?,
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int64Array::from(vec![0])),
+                    Arc::new(Int64Array::from(vec![0])),
+                    Arc::new(Int64Array::from(vec![1])),
+                ],
+            )?,
+        ]];
+        let scan = TestMemoryExec::try_new(&batches, Arc::clone(&schema), None)?;
+        let scan = scan.try_with_sort_information(vec![
+            LexOrdering::new([PhysicalSortExpr::new(
+                col("b", schema.as_ref())?,
+                SortOptions::default().desc(),
+            )])
+            .unwrap(),
+        ])?;
+
+        let aggr = Arc::new(AggregateExec::try_new(
+            AggregateMode::Single,
+            PhysicalGroupBy::new(
+                vec![
+                    (col("b", schema.as_ref())?, "b".to_string()),
+                    (col("c", schema.as_ref())?, "c".to_string()),
+                ],
+                vec![],
+                vec![vec![false, false]],
+                false,
+            ),
+            vec![Arc::new(
+                AggregateExprBuilder::new(sum_udaf(), vec![col("c", schema.as_ref())?])
+                    .schema(Arc::clone(&schema))
+                    .alias("SUM(c)")
+                    .build()?,
+            )],
+            vec![None],
+            Arc::new(scan) as Arc<dyn ExecutionPlan>,
+            Arc::clone(&schema),
+        )?);
+
+        let task_ctx = new_spill_ctx(1, 600);
+        let result = collect(aggr.execute(0, Arc::clone(&task_ctx))?).await?;
+        assert_spill_count_metric(true, aggr);
+
+        allow_duplicates! {
+            assert_snapshot!(batches_to_string(&result), @r"
+            +---+---+--------+
+            | b | c | SUM(c) |
+            +---+---+--------+
+            | 2 | 1 | 1      |
+            | 1 | 1 | 1      |
+            | 0 | 1 | 1      |
+            +---+---+--------+
+        ");
+        }
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
index 9474a5f88c92a..a55d70ca6fb27 100644
--- a/datafusion/physical-plan/src/aggregates/no_grouping.rs
+++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs
@@ -18,27 +18,30 @@
 //! Aggregate without grouping columns
 
 use crate::aggregates::{
-    aggregate_expressions, create_accumulators, finalize_aggregation, AccumulatorItem,
-    AggregateMode,
+    AccumulatorItem, AggrDynFilter, AggregateMode, DynamicFilterAggregateType,
+    aggregate_expressions, create_accumulators, finalize_aggregation,
 };
 use crate::metrics::{BaselineMetrics, RecordOutput};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::Result;
+use datafusion_common::{Result, ScalarValue, internal_datafusion_err, internal_err};
 use datafusion_execution::TaskContext;
+use datafusion_expr::Operator;
 use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::{BinaryExpr, lit};
 use futures::stream::BoxStream;
 use std::borrow::Cow;
+use std::cmp::Ordering;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use super::AggregateExec;
 use crate::filter::batch_filter;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use futures::stream::{Stream, StreamExt};
 
-use super::AggregateExec;
-
 /// stream struct for aggregation without grouping columns
 pub(crate) struct AggregateStream {
     stream: BoxStream<'static, Result<RecordBatch>>,
@@ -53,29 +56,230 @@ pub(crate) struct AggregateStream {
 ///
 /// The latter requires a state object, which is [`AggregateStreamInner`].
 struct AggregateStreamInner {
+    // ==== Properties ====
     schema: SchemaRef,
     mode: AggregateMode,
     input: SendableRecordBatchStream,
-    baseline_metrics: BaselineMetrics,
     aggregate_expressions: Vec<Vec<Arc<dyn PhysicalExpr>>>,
     filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+
+    // ==== Runtime States/Buffers ====
     accumulators: Vec<AccumulatorItem>,
-    reservation: MemoryReservation,
+    // None if the dynamic filter is not applicable. See details in `AggrDynFilter`.
+    agg_dyn_filter_state: Option<Arc<AggrDynFilter>>,
     finished: bool,
+
+    // ==== Execution Resources ====
+    baseline_metrics: BaselineMetrics,
+    reservation: MemoryReservation,
+}
+
+impl AggregateStreamInner {
+    // TODO: check if we get Null handling correct
+    /// # Examples
+    /// - Example 1
+    ///   Accumulators: min(c1)
+    ///   Current Bounds: min(c1)=10
+    ///   --> dynamic filter PhysicalExpr: c1 < 10
+    ///
+    /// - Example 2
+    ///   Accumulators: min(c1), max(c1), min(c2)
+    ///   Current Bounds: min(c1)=10, max(c1)=100, min(c2)=20
+    ///   --> dynamic filter PhysicalExpr: (c1 < 10) OR (c1>100) OR (c2 < 20)
+    ///
+    /// # Errors
+    /// Returns internal errors if the dynamic filter is not enabled, or other
+    /// invariant check fails.
+    fn build_dynamic_filter_from_accumulator_bounds(
+        &self,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let Some(filter_state) = self.agg_dyn_filter_state.as_ref() else {
+            return internal_err!(
+                "`build_dynamic_filter_from_accumulator_bounds()` is only called when dynamic filter is enabled"
+            );
+        };
+
+        let mut predicates: Vec<Arc<dyn PhysicalExpr>> =
+            Vec::with_capacity(filter_state.supported_accumulators_info.len());
+
+        for acc_info in &filter_state.supported_accumulators_info {
+            // Skip if we don't yet have a meaningful bound
+            let bound = {
+                let guard = acc_info.shared_bound.lock();
+                if (*guard).is_null() {
+                    continue;
+                }
+                guard.clone()
+            };
+
+            let agg_exprs = self
+                .aggregate_expressions
+                .get(acc_info.aggr_index)
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Invalid aggregate expression index {} for dynamic filter",
+                        acc_info.aggr_index
+                    )
+                })?;
+            // Only aggregates with a single argument are supported.
+            let column_expr = agg_exprs.first().ok_or_else(|| {
+                internal_datafusion_err!(
+                    "Aggregate expression at index {} expected a single argument",
+                    acc_info.aggr_index
+                )
+            })?;
+
+            let literal = lit(bound);
+            let predicate: Arc<dyn PhysicalExpr> = match acc_info.aggr_type {
+                DynamicFilterAggregateType::Min => Arc::new(BinaryExpr::new(
+                    Arc::clone(column_expr),
+                    Operator::Lt,
+                    literal,
+                )),
+                DynamicFilterAggregateType::Max => Arc::new(BinaryExpr::new(
+                    Arc::clone(column_expr),
+                    Operator::Gt,
+                    literal,
+                )),
+            };
+            predicates.push(predicate);
+        }
+
+        let combined = predicates.into_iter().reduce(|acc, pred| {
+            Arc::new(BinaryExpr::new(acc, Operator::Or, pred)) as Arc<dyn PhysicalExpr>
+        });
+
+        Ok(combined.unwrap_or_else(|| lit(true)))
+    }
+
+    // If the dynamic filter is enabled, update it using the current accumulator's
+    // values
+    fn maybe_update_dyn_filter(&mut self) -> Result<()> {
+        // Step 1: Update each partition's current bound
+        let Some(filter_state) = self.agg_dyn_filter_state.as_ref() else {
+            return Ok(());
+        };
+
+        for acc_info in &filter_state.supported_accumulators_info {
+            let acc =
+                self.accumulators
+                    .get_mut(acc_info.aggr_index)
+                    .ok_or_else(|| {
+                        internal_datafusion_err!(
+                            "Invalid accumulator index {} for dynamic filter",
+                            acc_info.aggr_index
+                        )
+                    })?;
+            // First get current partition's bound, then update the shared bound among
+            // all partitions.
+            let current_bound = acc.evaluate()?;
+            {
+                let mut bound = acc_info.shared_bound.lock();
+                match acc_info.aggr_type {
+                    DynamicFilterAggregateType::Max => {
+                        *bound = scalar_max(&bound, &current_bound)?;
+                    }
+                    DynamicFilterAggregateType::Min => {
+                        *bound = scalar_min(&bound, &current_bound)?;
+                    }
+                }
+            }
+        }
+
+        // Step 2: Sync the dynamic filter physical expression with reader
+        let predicate = self.build_dynamic_filter_from_accumulator_bounds()?;
+        filter_state.filter.update(predicate)?;
+
+        Ok(())
+    }
+}
+
+/// Returns the element-wise minimum of two `ScalarValue`s.
+///
+/// # Null semantics
+/// - `min(NULL, NULL)      = NULL`
+/// - `min(NULL, x)         = x`
+/// - `min(x, NULL)         = x`
+///
+/// # Errors
+/// Returns internal error if v1 and v2 has incompatible types.
+fn scalar_min(v1: &ScalarValue, v2: &ScalarValue) -> Result<ScalarValue> {
+    if let Some(result) = scalar_cmp_null_short_circuit(v1, v2) {
+        return Ok(result);
+    }
+
+    match v1.partial_cmp(v2) {
+        Some(Ordering::Less | Ordering::Equal) => Ok(v1.clone()),
+        Some(Ordering::Greater) => Ok(v2.clone()),
+        None => datafusion_common::internal_err!(
+            "cannot compare values of different or incompatible types: {v1:?} vs {v2:?}"
+        ),
+    }
+}
+
+/// Returns the element-wise maximum of two `ScalarValue`s.
+///
+/// # Null semantics
+/// - `max(NULL, NULL)      = NULL`
+/// - `max(NULL, x)         = x`
+/// - `max(x, NULL)         = x`
+///
+/// # Errors
+/// Returns internal error if v1 and v2 has incompatible types.
+fn scalar_max(v1: &ScalarValue, v2: &ScalarValue) -> Result<ScalarValue> {
+    if let Some(result) = scalar_cmp_null_short_circuit(v1, v2) {
+        return Ok(result);
+    }
+
+    match v1.partial_cmp(v2) {
+        Some(Ordering::Greater | Ordering::Equal) => Ok(v1.clone()),
+        Some(Ordering::Less) => Ok(v2.clone()),
+        None => datafusion_common::internal_err!(
+            "cannot compare values of different or incompatible types: {v1:?} vs {v2:?}"
+        ),
+    }
+}
+
+fn scalar_cmp_null_short_circuit(
+    v1: &ScalarValue,
+    v2: &ScalarValue,
+) -> Option<ScalarValue> {
+    match (v1, v2) {
+        (ScalarValue::Null, ScalarValue::Null) => Some(ScalarValue::Null),
+        (ScalarValue::Null, other) | (other, ScalarValue::Null) => Some(other.clone()),
+        _ => None,
+    }
+}
+
+/// Prepend the grouping ID column to the output columns if present.
+///
+/// For GROUPING SETS with no GROUP BY expressions, the schema includes a `__grouping_id`
+/// column that must be present in the output. This function inserts it at the beginning
+/// of the columns array to maintain schema alignment.
+fn prepend_grouping_id_column(
+    mut columns: Vec<Arc<dyn arrow::array::Array>>,
+    grouping_id: Option<&ScalarValue>,
+) -> Result<Vec<Arc<dyn arrow::array::Array>>> {
+    if let Some(id) = grouping_id {
+        let num_rows = columns.first().map(|array| array.len()).unwrap_or(1);
+        let grouping_ids = id.to_array_of_size(num_rows)?;
+        columns.insert(0, grouping_ids);
+    }
+    Ok(columns)
 }
 
 impl AggregateStream {
     /// Create a new AggregateStream
     pub fn new(
         agg: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
         let agg_schema = Arc::clone(&agg.schema);
         let agg_filter_expr = agg.filter_expr.clone();
 
         let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
-        let input = agg.input.execute(partition, Arc::clone(&context))?;
+        let input = agg.input.execute(partition, Arc::clone(context))?;
 
         let aggregate_expressions = aggregate_expressions(&agg.aggr_expr, &agg.mode, 0)?;
         let filter_expressions = match agg.mode {
@@ -91,6 +295,24 @@ impl AggregateStream {
         let reservation = MemoryConsumer::new(format!("AggregateStream[{partition}]"))
             .register(context.memory_pool());
 
+        // Enable dynamic filter if:
+        // 1. AggregateExec did the check and ensure it supports the dynamic filter
+        //    (its dynamic_filter field will be Some(..))
+        // 2. Aggregate dynamic filter is enabled from the config
+        let mut maybe_dynamic_filter = match agg.dynamic_filter.as_ref() {
+            Some(filter) => Some(Arc::clone(filter)),
+            _ => None,
+        };
+
+        if !context
+            .session_config()
+            .options()
+            .optimizer
+            .enable_aggregate_dynamic_filter_pushdown
+        {
+            maybe_dynamic_filter = None;
+        }
+
         let inner = AggregateStreamInner {
             schema: Arc::clone(&agg.schema),
             mode: agg.mode,
@@ -101,27 +323,33 @@ impl AggregateStream {
             accumulators,
             reservation,
             finished: false,
+            agg_dyn_filter_state: maybe_dynamic_filter,
         };
+
         let stream = futures::stream::unfold(inner, |mut this| async move {
             if this.finished {
                 return None;
             }
 
-            let elapsed_compute = this.baseline_metrics.elapsed_compute();
-
             loop {
                 let result = match this.input.next().await {
                     Some(Ok(batch)) => {
-                        let timer = elapsed_compute.timer();
-                        let result = aggregate_batch(
-                            &this.mode,
-                            batch,
-                            &mut this.accumulators,
-                            &this.aggregate_expressions,
-                            &this.filter_expressions,
-                        );
+                        let result = {
+                            let elapsed_compute = this.baseline_metrics.elapsed_compute();
+                            let _timer = elapsed_compute.timer(); // Stops on drop
+                            aggregate_batch(
+                                &this.mode,
+                                &batch,
+                                &mut this.accumulators,
+                                &this.aggregate_expressions,
+                                &this.filter_expressions,
+                            )
+                        };
 
-                        timer.done();
+                        let result = result.and_then(|allocated| {
+                            this.maybe_update_dyn_filter()?;
+                            Ok(allocated)
+                        });
 
                         // allocate memory
                         // This happens AFTER we actually used the memory, but simplifies the whole accounting and we are OK with
@@ -139,6 +367,9 @@ impl AggregateStream {
                         let timer = this.baseline_metrics.elapsed_compute().timer();
                         let result =
                             finalize_aggregation(&mut this.accumulators, &this.mode)
+                                .and_then(|columns| {
+                                    prepend_grouping_id_column(columns, None)
+                                })
                                 .and_then(|columns| {
                                     RecordBatch::try_new(
                                         Arc::clone(&this.schema),
@@ -195,7 +426,7 @@ impl RecordBatchStream for AggregateStream {
 /// TODO: Make this a member function
 fn aggregate_batch(
     mode: &AggregateMode,
-    batch: RecordBatch,
+    batch: &RecordBatch,
     accumulators: &mut [AccumulatorItem],
     expressions: &[Vec<Arc<dyn PhysicalExpr>>],
     filters: &[Option<Arc<dyn PhysicalExpr>>],
@@ -215,17 +446,12 @@ fn aggregate_batch(
         .try_for_each(|((accum, expr), filter)| {
             // 1.2
             let batch = match filter {
-                Some(filter) => Cow::Owned(batch_filter(&batch, filter)?),
-                None => Cow::Borrowed(&batch),
+                Some(filter) => Cow::Owned(batch_filter(batch, filter)?),
+                None => Cow::Borrowed(batch),
             };
 
-            let n_rows = batch.num_rows();
-
             // 1.3
-            let values = expr
-                .iter()
-                .map(|e| e.evaluate(&batch).and_then(|v| v.into_array(n_rows)))
-                .collect::<Result<Vec<_>>>()?;
+            let values = evaluate_expressions_to_arrays(expr, batch.as_ref())?;
 
             // 1.4
             let size_pre = accum.size();
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 3c6577af42860..cb22fbf9a06a1 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -21,28 +21,30 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::vec;
 
-use super::order::GroupOrdering;
 use super::AggregateExec;
-use crate::aggregates::group_values::{new_group_values, GroupByMetrics, GroupValues};
+use super::order::GroupOrdering;
+use crate::aggregates::group_values::{GroupByMetrics, GroupValues, new_group_values};
 use crate::aggregates::order::GroupOrderingFull;
 use crate::aggregates::{
-    create_schema, evaluate_group_by, evaluate_many, evaluate_optional, AggregateMode,
-    PhysicalGroupBy,
+    AggregateMode, PhysicalGroupBy, create_schema, evaluate_group_by, evaluate_many,
+    evaluate_optional,
 };
 use crate::metrics::{BaselineMetrics, MetricBuilder, RecordOutput};
 use crate::sorts::sort::sort_batch;
 use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder};
 use crate::spill::spill_manager::SpillManager;
-use crate::stream::RecordBatchStreamAdapter;
-use crate::{aggregates, metrics, PhysicalExpr};
+use crate::{PhysicalExpr, aggregates, metrics};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::*;
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{
+    DataFusionError, Result, assert_eq_or_internal_err, assert_or_internal_err,
+    internal_err,
+};
+use datafusion_execution::TaskContext;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
-use datafusion_execution::TaskContext;
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion_physical_expr::expressions::Column;
@@ -191,7 +193,9 @@ impl SkipAggregationProbe {
         if self.input_rows >= self.probe_rows_threshold {
             self.should_skip = self.num_groups as f64 / self.input_rows as f64
                 >= self.probe_ratio_threshold;
-            self.is_locked = true;
+            // Set is_locked to true only if we have decided to skip, otherwise we can try to skip
+            // during processing the next record_batch.
+            self.is_locked = self.should_skip;
         }
     }
 
@@ -205,6 +209,17 @@ impl SkipAggregationProbe {
     }
 }
 
+/// Controls the behavior when an out-of-memory condition occurs.
+#[derive(PartialEq, Debug)]
+enum OutOfMemoryMode {
+    /// When out of memory occurs, spill state to disk
+    Spill,
+    /// When out of memory occurs, attempt to emit group values early
+    EmitEarly,
+    /// When out of memory occurs, immediately report the error
+    ReportError,
+}
+
 /// HashTable based Grouping Aggregator
 ///
 /// # Design Goals
@@ -428,6 +443,9 @@ pub(crate) struct GroupedHashAggregateStream {
     /// The memory reservation for this grouping
     reservation: MemoryReservation,
 
+    /// The behavior to trigger when out of memory occurs
+    oom_mode: OutOfMemoryMode,
+
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
 
@@ -442,7 +460,7 @@ impl GroupedHashAggregateStream {
     /// Create a new GroupedHashAggregateStream
     pub fn new(
         agg: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
         debug!("Creating GroupedHashAggregateStream");
@@ -451,7 +469,7 @@ impl GroupedHashAggregateStream {
         let agg_filter_expr = agg.filter_expr.clone();
 
         let batch_size = context.session_config().batch_size();
-        let input = agg.input.execute(partition, Arc::clone(&context))?;
+        let input = agg.input.execute(partition, Arc::clone(context))?;
         let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
         let group_by_metrics = GroupByMetrics::new(&agg.metrics, partition);
 
@@ -505,12 +523,12 @@ impl GroupedHashAggregateStream {
         // Therefore, when we spill these intermediate states or pass them to another
         // aggregation operator, we must use a schema that includes both the group
         // columns **and** the partial-state columns.
-        let partial_agg_schema = create_schema(
+        let spill_schema = Arc::new(create_schema(
             &agg.input().schema(),
             &agg_group_by,
             &aggregate_exprs,
             AggregateMode::Partial,
-        )?;
+        )?);
 
         // Need to update the GROUP BY expressions to point to the correct column after schema change
         let merging_group_by_expr = agg_group_by
@@ -522,20 +540,25 @@ impl GroupedHashAggregateStream {
             })
             .collect();
 
-        let partial_agg_schema = Arc::new(partial_agg_schema);
+        let output_ordering = agg.cache.output_ordering();
 
-        let spill_expr =
+        let spill_sort_exprs =
             group_schema
                 .fields
                 .into_iter()
                 .enumerate()
                 .map(|(idx, field)| {
-                    PhysicalSortExpr::new_default(Arc::new(Column::new(
-                        field.name().as_str(),
-                        idx,
-                    )) as _)
+                    let output_expr = Column::new(field.name().as_str(), idx);
+
+                    // Try to use the sort options from the output ordering, if available.
+                    // This ensures that spilled state is sorted in the required order as well.
+                    let sort_options = output_ordering
+                        .and_then(|o| o.get_sort_options(&output_expr))
+                        .unwrap_or_default();
+
+                    PhysicalSortExpr::new(Arc::new(output_expr), sort_options)
                 });
-        let Some(spill_expr) = LexOrdering::new(spill_expr) else {
+        let Some(spill_ordering) = LexOrdering::new(spill_sort_exprs) else {
             return internal_err!("Spill expression is empty");
         };
 
@@ -545,11 +568,35 @@ impl GroupedHashAggregateStream {
             .collect::<Vec<_>>()
             .join(", ");
         let name = format!("GroupedHashAggregateStream[{partition}] ({agg_fn_names})");
-        let reservation = MemoryConsumer::new(name)
-            .with_can_spill(true)
-            .register(context.memory_pool());
         let group_ordering = GroupOrdering::try_new(&agg.input_order_mode)?;
+        let oom_mode = match (agg.mode, &group_ordering) {
+            // In partial aggregation mode, always prefer to emit incomplete results early.
+            (AggregateMode::Partial, _) => OutOfMemoryMode::EmitEarly,
+            // For non-partial aggregation modes, emitting incomplete results is not an option.
+            // Instead, use disk spilling to store sorted, incomplete results, and merge them
+            // afterwards.
+            (_, GroupOrdering::None | GroupOrdering::Partial(_))
+                if context.runtime_env().disk_manager.tmp_files_enabled() =>
+            {
+                OutOfMemoryMode::Spill
+            }
+            // For `GroupOrdering::Full`, the incoming stream is already sorted. This ensures the
+            // number of incomplete groups can be kept small at all times. If we still hit
+            // an out-of-memory condition, spilling to disk would not be beneficial since the same
+            // situation is likely to reoccur when reading back the spilled data.
+            // Therefore, we fall back to simply reporting the error immediately.
+            // This mode will also be used if the `DiskManager` is not configured to allow spilling
+            // to disk.
+            _ => OutOfMemoryMode::ReportError,
+        };
+
         let group_values = new_group_values(group_schema, &group_ordering)?;
+        let reservation = MemoryConsumer::new(name)
+            // We interpret 'can spill' as 'can handle memory back pressure'.
+            // This value needs to be set to true for the default memory pool implementations
+            // to ensure fair application of back pressure amongst the memory consumers.
+            .with_can_spill(oom_mode != OutOfMemoryMode::ReportError)
+            .register(context.memory_pool());
         timer.done();
 
         let exec_state = ExecutionState::ReadingInput;
@@ -557,14 +604,14 @@ impl GroupedHashAggregateStream {
         let spill_manager = SpillManager::new(
             context.runtime_env(),
             metrics::SpillMetrics::new(&agg.metrics, partition),
-            Arc::clone(&partial_agg_schema),
+            Arc::clone(&spill_schema),
         )
         .with_compression_type(context.session_config().spill_compression());
 
         let spill_state = SpillState {
             spills: vec![],
-            spill_expr,
-            spill_schema: partial_agg_schema,
+            spill_expr: spill_ordering,
+            spill_schema,
             is_stream_merging: false,
             merging_aggregate_arguments,
             merging_group_by: PhysicalGroupBy::new_single(merging_group_by_expr),
@@ -622,6 +669,7 @@ impl GroupedHashAggregateStream {
             filter_expressions,
             group_by: agg_group_by,
             reservation,
+            oom_mode,
             group_values,
             current_group_indices: Default::default(),
             exec_state,
@@ -671,23 +719,24 @@ impl Stream for GroupedHashAggregateStream {
             match &self.exec_state {
                 ExecutionState::ReadingInput => 'reading_input: {
                     match ready!(self.input.poll_next_unpin(cx)) {
-                        // New batch to aggregate in partial aggregation operator
-                        Some(Ok(batch)) if self.mode == AggregateMode::Partial => {
+                        // New batch to aggregate
+                        Some(Ok(batch)) => {
                             let timer = elapsed_compute.timer();
                             let input_rows = batch.num_rows();
 
-                            if let Some(reduction_factor) = self.reduction_factor.as_ref()
+                            if self.mode == AggregateMode::Partial
+                                && let Some(reduction_factor) =
+                                    self.reduction_factor.as_ref()
                             {
                                 reduction_factor.add_total(input_rows);
                             }
 
-                            // Do the grouping
-                            self.group_aggregate_batch(batch)?;
+                            // Do the grouping.
+                            // `group_aggregate_batch` will _not_ have updated the memory reservation yet.
+                            // The rest of the code will first try to reduce memory usage by
+                            // already emitting results.
+                            self.group_aggregate_batch(&batch)?;
 
-                            self.update_skip_aggregation_probe(input_rows);
-
-                            // If we can begin emitting rows, do so,
-                            // otherwise keep consuming input
                             assert!(!self.input_done);
 
                             // If the number of group values equals or exceeds the soft limit,
@@ -699,7 +748,13 @@ impl Stream for GroupedHashAggregateStream {
                                 break 'reading_input;
                             }
 
-                            if let Some(to_emit) = self.group_ordering.emit_to() {
+                            // Try to emit completed groups if possible.
+                            // If we already started spilling, we can no longer emit since
+                            // this might lead to incorrect output ordering
+                            if (self.spill_state.spills.is_empty()
+                                || self.spill_state.is_stream_merging)
+                                && let Some(to_emit) = self.group_ordering.emit_to()
+                            {
                                 timer.done();
                                 if let Some(batch) = self.emit(to_emit, false)? {
                                     self.exec_state =
@@ -709,44 +764,30 @@ impl Stream for GroupedHashAggregateStream {
                                 break 'reading_input;
                             }
 
-                            self.emit_early_if_necessary()?;
-
-                            self.switch_to_skip_aggregation()?;
-
-                            timer.done();
-                        }
-
-                        // New batch to aggregate in terminal aggregation operator
-                        // (Final/FinalPartitioned/Single/SinglePartitioned)
-                        Some(Ok(batch)) => {
-                            let timer = elapsed_compute.timer();
-
-                            // Make sure we have enough capacity for `batch`, otherwise spill
-                            self.spill_previous_if_necessary(&batch)?;
-
-                            // Do the grouping
-                            self.group_aggregate_batch(batch)?;
-
-                            // If we can begin emitting rows, do so,
-                            // otherwise keep consuming input
-                            assert!(!self.input_done);
-
-                            // If the number of group values equals or exceeds the soft limit,
-                            // emit all groups and switch to producing output
-                            if self.hit_soft_group_limit() {
-                                timer.done();
-                                self.set_input_done_and_produce_output()?;
-                                // make sure the exec_state just set is not overwritten below
-                                break 'reading_input;
+                            if self.mode == AggregateMode::Partial {
+                                // Spilling should never be activated in partial aggregation mode.
+                                assert!(!self.spill_state.is_stream_merging);
+
+                                // Check if we should switch to skip aggregation mode
+                                // It's important that we do this before we early emit since we've
+                                // already updated the probe.
+                                self.update_skip_aggregation_probe(input_rows);
+                                if let Some(new_state) =
+                                    self.switch_to_skip_aggregation()?
+                                {
+                                    timer.done();
+                                    self.exec_state = new_state;
+                                    break 'reading_input;
+                                }
                             }
 
-                            if let Some(to_emit) = self.group_ordering.emit_to() {
+                            // If we reach this point, try to update the memory reservation
+                            // handling out-of-memory conditions as determined by the OOM mode.
+                            if let Some(new_state) =
+                                self.try_update_memory_reservation()?
+                            {
                                 timer.done();
-                                if let Some(batch) = self.emit(to_emit, false)? {
-                                    self.exec_state =
-                                        ExecutionState::ProducingOutput(batch);
-                                };
-                                // make sure the exec_state just set is not overwritten below
+                                self.exec_state = new_state;
                                 break 'reading_input;
                             }
 
@@ -774,7 +815,7 @@ impl Stream for GroupedHashAggregateStream {
                             if let Some(probe) = self.skip_aggregation_probe.as_mut() {
                                 probe.record_skipped(&batch);
                             }
-                            let states = self.transform_to_states(batch)?;
+                            let states = self.transform_to_states(&batch)?;
                             return Poll::Ready(Some(Ok(
                                 states.record_output(&self.baseline_metrics)
                             )));
@@ -785,6 +826,15 @@ impl Stream for GroupedHashAggregateStream {
                         }
                         None => {
                             // inner is done, switching to `Done` state
+                            // Sanity check: when switching from SkippingAggregation to Done,
+                            // all groups should have already been emitted
+                            if !self.group_values.is_empty() {
+                                return Poll::Ready(Some(internal_err!(
+                                    "Switching from SkippingAggregation to Done with {} groups still in hash table. \
+                                    This is a bug - all groups should have been emitted before skip aggregation started.",
+                                    self.group_values.len()
+                                )));
+                            }
                             self.exec_state = ExecutionState::Done;
                         }
                     }
@@ -832,6 +882,14 @@ impl Stream for GroupedHashAggregateStream {
                 }
 
                 ExecutionState::Done => {
+                    // Sanity check: all groups should have been emitted by now
+                    if !self.group_values.is_empty() {
+                        return Poll::Ready(Some(internal_err!(
+                            "AggregateStream was in Done state with {} groups left in hash table. \
+                            This is a bug - all groups should have been emitted before entering Done state.",
+                            self.group_values.len()
+                        )));
+                    }
                     // release the memory reservation since sending back output batch itself needs
                     // some memory reservation, so make some room for it.
                     self.clear_all();
@@ -851,12 +909,12 @@ impl RecordBatchStream for GroupedHashAggregateStream {
 
 impl GroupedHashAggregateStream {
     /// Perform group-by aggregation for the given [`RecordBatch`].
-    fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<()> {
+    fn group_aggregate_batch(&mut self, batch: &RecordBatch) -> Result<()> {
         // Evaluate the grouping expressions
         let group_by_values = if self.spill_state.is_stream_merging {
-            evaluate_group_by(&self.spill_state.merging_group_by, &batch)?
+            evaluate_group_by(&self.spill_state.merging_group_by, batch)?
         } else {
-            evaluate_group_by(&self.group_by, &batch)?
+            evaluate_group_by(&self.group_by, batch)?
         };
 
         // Only create the timer if there are actual aggregate arguments to evaluate
@@ -873,18 +931,18 @@ impl GroupedHashAggregateStream {
 
         // Evaluate the aggregation expressions.
         let input_values = if self.spill_state.is_stream_merging {
-            evaluate_many(&self.spill_state.merging_aggregate_arguments, &batch)?
+            evaluate_many(&self.spill_state.merging_aggregate_arguments, batch)?
         } else {
-            evaluate_many(&self.aggregate_arguments, &batch)?
+            evaluate_many(&self.aggregate_arguments, batch)?
         };
         drop(timer);
 
         // Evaluate the filter expressions, if any, against the inputs
         let filter_values = if self.spill_state.is_stream_merging {
             let filter_expressions = vec![None; self.accumulators.len()];
-            evaluate_optional(&filter_expressions, &batch)?
+            evaluate_optional(&filter_expressions, batch)?
         } else {
-            evaluate_optional(&self.filter_expressions, &batch)?
+            evaluate_optional(&self.filter_expressions, batch)?
         };
 
         for group_values in &group_by_values {
@@ -938,9 +996,10 @@ impl GroupedHashAggregateStream {
                         )?;
                     }
                     _ => {
-                        if opt_filter.is_some() {
-                            return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage");
-                        }
+                        assert_or_internal_err!(
+                            opt_filter.is_none(),
+                            "aggregate filter should be applied in partial stage, there should be no filter in final stage"
+                        );
 
                         // if aggregation is over intermediate states,
                         // use merge
@@ -953,25 +1012,56 @@ impl GroupedHashAggregateStream {
             }
         }
 
-        match self.update_memory_reservation() {
-            // Here we can ignore `insufficient_capacity_err` because we will spill later,
-            // but at least one batch should fit in the memory
-            Err(DataFusionError::ResourcesExhausted(_))
-                if self.group_values.len() >= self.batch_size =>
-            {
-                Ok(())
+        Ok(())
+    }
+
+    /// Attempts to update the memory reservation. If that fails due to a
+    /// [DataFusionError::ResourcesExhausted] error, an attempt will be made to resolve
+    /// the out-of-memory condition based on the [out-of-memory handling mode](OutOfMemoryMode).
+    ///
+    /// If the out-of-memory condition can not be resolved, an `Err` value will be returned
+    ///
+    /// Returns `Ok(Some(ExecutionState))` if the state should be changed, `Ok(None)` otherwise.
+    fn try_update_memory_reservation(&mut self) -> Result<Option<ExecutionState>> {
+        let oom = match self.update_memory_reservation() {
+            Err(e @ DataFusionError::ResourcesExhausted(_)) => e,
+            Err(e) => return Err(e),
+            Ok(_) => return Ok(None),
+        };
+
+        match self.oom_mode {
+            OutOfMemoryMode::Spill if !self.group_values.is_empty() => {
+                self.spill()?;
+                self.clear_shrink(self.batch_size);
+                self.update_memory_reservation()?;
+                Ok(None)
+            }
+            OutOfMemoryMode::EmitEarly if self.group_values.len() > 1 => {
+                let n = if self.group_values.len() >= self.batch_size {
+                    // Try to emit an integer multiple of batch size if possible
+                    self.group_values.len() / self.batch_size * self.batch_size
+                } else {
+                    // Otherwise emit whatever we can
+                    self.group_values.len()
+                };
+
+                if let Some(batch) = self.emit(EmitTo::First(n), false)? {
+                    Ok(Some(ExecutionState::ProducingOutput(batch)))
+                } else {
+                    Err(oom)
+                }
             }
-            other => other,
+            _ => Err(oom),
         }
     }
 
     fn update_memory_reservation(&mut self) -> Result<()> {
         let acc = self.accumulators.iter().map(|x| x.size()).sum::<usize>();
-        let reservation_result = self.reservation.try_resize(
-            acc + self.group_values.size()
-                + self.group_ordering.size()
-                + self.current_group_indices.allocated_size(),
-        );
+        let new_size = acc
+            + self.group_values.size()
+            + self.group_ordering.size()
+            + self.current_group_indices.allocated_size();
+        let reservation_result = self.reservation.try_resize(new_size);
 
         if reservation_result.is_ok() {
             self.spill_state
@@ -1026,24 +1116,6 @@ impl GroupedHashAggregateStream {
         Ok(Some(batch))
     }
 
-    /// Optimistically, [`Self::group_aggregate_batch`] allows to exceed the memory target slightly
-    /// (~ 1 [`RecordBatch`]) for simplicity. In such cases, spill the data to disk and clear the
-    /// memory. Currently only [`GroupOrdering::None`] is supported for spilling.
-    fn spill_previous_if_necessary(&mut self, batch: &RecordBatch) -> Result<()> {
-        // TODO: support group_ordering for spilling
-        if !self.group_values.is_empty()
-            && batch.num_rows() > 0
-            && matches!(self.group_ordering, GroupOrdering::None)
-            && !self.spill_state.is_stream_merging
-            && self.update_memory_reservation().is_err()
-        {
-            assert_ne!(self.mode, AggregateMode::Partial);
-            self.spill()?;
-            self.clear_shrink(batch);
-        }
-        Ok(())
-    }
-
     /// Emit all intermediate aggregation states, sort them, and store them on disk.
     /// This process helps in reducing memory pressure by allowing the data to be
     /// read back with streaming merge.
@@ -1081,70 +1153,15 @@ impl GroupedHashAggregateStream {
     }
 
     /// Clear memory and shirk capacities to the size of the batch.
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        self.group_values.clear_shrink(batch);
+    fn clear_shrink(&mut self, num_rows: usize) {
+        self.group_values.clear_shrink(num_rows);
         self.current_group_indices.clear();
-        self.current_group_indices.shrink_to(batch.num_rows());
+        self.current_group_indices.shrink_to(num_rows);
     }
 
     /// Clear memory and shirk capacities to zero.
     fn clear_all(&mut self) {
-        let s = self.schema();
-        self.clear_shrink(&RecordBatch::new_empty(s));
-    }
-
-    /// Emit if the used memory exceeds the target for partial aggregation.
-    /// Currently only [`GroupOrdering::None`] is supported for early emitting.
-    /// TODO: support group_ordering for early emitting
-    fn emit_early_if_necessary(&mut self) -> Result<()> {
-        if self.group_values.len() >= self.batch_size
-            && matches!(self.group_ordering, GroupOrdering::None)
-            && self.update_memory_reservation().is_err()
-        {
-            assert_eq!(self.mode, AggregateMode::Partial);
-            let n = self.group_values.len() / self.batch_size * self.batch_size;
-            if let Some(batch) = self.emit(EmitTo::First(n), false)? {
-                self.exec_state = ExecutionState::ProducingOutput(batch);
-            };
-        }
-        Ok(())
-    }
-
-    /// At this point, all the inputs are read and there are some spills.
-    /// Emit the remaining rows and create a batch.
-    /// Conduct a streaming merge sort between the batch and spilled data. Since the stream is fully
-    /// sorted, set `self.group_ordering` to Full, then later we can read with [`EmitTo::First`].
-    fn update_merged_stream(&mut self) -> Result<()> {
-        let Some(batch) = self.emit(EmitTo::All, true)? else {
-            return Ok(());
-        };
-        // clear up memory for streaming_merge
-        self.clear_all();
-        self.update_memory_reservation()?;
-        let mut streams: Vec<SendableRecordBatchStream> = vec![];
-        let expr = self.spill_state.spill_expr.clone();
-        let schema = batch.schema();
-        streams.push(Box::pin(RecordBatchStreamAdapter::new(
-            Arc::clone(&schema),
-            futures::stream::once(futures::future::lazy(move |_| {
-                sort_batch(&batch, &expr, None)
-            })),
-        )));
-
-        self.spill_state.is_stream_merging = true;
-        self.input = StreamingMergeBuilder::new()
-            .with_streams(streams)
-            .with_schema(schema)
-            .with_spill_manager(self.spill_state.spill_manager.clone())
-            .with_sorted_spill_files(std::mem::take(&mut self.spill_state.spills))
-            .with_expressions(&self.spill_state.spill_expr)
-            .with_metrics(self.baseline_metrics.clone())
-            .with_batch_size(self.batch_size)
-            .with_reservation(self.reservation.new_empty())
-            .build()?;
-        self.input_done = false;
-        self.group_ordering = GroupOrdering::Full(GroupOrderingFull::new());
-        Ok(())
+        self.clear_shrink(0);
     }
 
     /// returns true if there is a soft groups limit and the number of distinct
@@ -1156,18 +1173,60 @@ impl GroupedHashAggregateStream {
         group_values_soft_limit <= self.group_values.len()
     }
 
-    /// common function for signalling end of processing of the input stream
+    /// Finalizes reading of the input stream and prepares for producing output values.
+    ///
+    /// This method is called both when the original input stream and,
+    /// in case of disk spilling, the SPM stream have been drained.
     fn set_input_done_and_produce_output(&mut self) -> Result<()> {
         self.input_done = true;
         self.group_ordering.input_done();
         let elapsed_compute = self.baseline_metrics.elapsed_compute().clone();
         let timer = elapsed_compute.timer();
         self.exec_state = if self.spill_state.spills.is_empty() {
+            // Input has been entirely processed without spilling to disk.
+
+            // Flush any remaining group values.
             let batch = self.emit(EmitTo::All, false)?;
+
+            // If there are none, we're done; otherwise switch to emitting them
             batch.map_or(ExecutionState::Done, ExecutionState::ProducingOutput)
         } else {
-            // If spill files exist, stream-merge them.
-            self.update_merged_stream()?;
+            // Spill any remaining data to disk. There is some performance overhead in
+            // writing out this last chunk of data and reading it back. The benefit of
+            // doing this is that memory usage for this stream is reduced, and the more
+            // sophisticated memory handling in `MultiLevelMergeBuilder` can take over
+            // instead.
+            // Spilling to disk and reading back also ensures batch size is consistent
+            // rather than potentially having one significantly larger last batch.
+            self.spill()?;
+
+            // Mark that we're switching to stream merging mode.
+            self.spill_state.is_stream_merging = true;
+
+            self.input = StreamingMergeBuilder::new()
+                .with_schema(Arc::clone(&self.spill_state.spill_schema))
+                .with_spill_manager(self.spill_state.spill_manager.clone())
+                .with_sorted_spill_files(std::mem::take(&mut self.spill_state.spills))
+                .with_expressions(&self.spill_state.spill_expr)
+                .with_metrics(self.baseline_metrics.clone())
+                .with_batch_size(self.batch_size)
+                .with_reservation(self.reservation.new_empty())
+                .build()?;
+            self.input_done = false;
+
+            // Reset the group values collectors.
+            self.clear_all();
+
+            // We can now use `GroupOrdering::Full` since the spill files are sorted
+            // on the grouping columns.
+            self.group_ordering = GroupOrdering::Full(GroupOrderingFull::new());
+
+            // Use `OutOfMemoryMode::ReportError` from this point on
+            // to ensure we don't spill the spilled data to disk again.
+            self.oom_mode = OutOfMemoryMode::ReportError;
+
+            self.update_memory_reservation()?;
+
             ExecutionState::ReadingInput
         };
         timer.done();
@@ -1190,16 +1249,17 @@ impl GroupedHashAggregateStream {
     /// skipped, forces stream to produce currently accumulated output.
     ///
     /// Notice: It should only be called in Partial aggregation
-    fn switch_to_skip_aggregation(&mut self) -> Result<()> {
-        if let Some(probe) = self.skip_aggregation_probe.as_mut() {
-            if probe.should_skip() {
-                if let Some(batch) = self.emit(EmitTo::All, false)? {
-                    self.exec_state = ExecutionState::ProducingOutput(batch);
-                };
-            }
-        }
+    ///
+    /// Returns `Some(ExecutionState)` if the state should be changed, None otherwise.
+    fn switch_to_skip_aggregation(&mut self) -> Result<Option<ExecutionState>> {
+        if let Some(probe) = self.skip_aggregation_probe.as_mut()
+            && probe.should_skip()
+            && let Some(batch) = self.emit(EmitTo::All, false)?
+        {
+            return Ok(Some(ExecutionState::ProducingOutput(batch)));
+        };
 
-        Ok(())
+        Ok(None)
     }
 
     /// Returns true if the aggregation probe indicates that aggregation
@@ -1213,14 +1273,16 @@ impl GroupedHashAggregateStream {
     }
 
     /// Transforms input batch to intermediate aggregate state, without grouping it
-    fn transform_to_states(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let mut group_values = evaluate_group_by(&self.group_by, &batch)?;
-        let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
-        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
-
-        if group_values.len() != 1 {
-            return internal_err!("group_values expected to have single element");
-        }
+    fn transform_to_states(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let mut group_values = evaluate_group_by(&self.group_by, batch)?;
+        let input_values = evaluate_many(&self.aggregate_arguments, batch)?;
+        let filter_values = evaluate_optional(&self.filter_expressions, batch)?;
+
+        assert_eq_or_internal_err!(
+            group_values.len(),
+            1,
+            "group_values expected to have single element"
+        );
         let mut output = group_values.swap_remove(0);
 
         let iter = self
@@ -1239,3 +1301,270 @@ impl GroupedHashAggregateStream {
         Ok(states_batch)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::execution_plan::ExecutionPlan;
+    use crate::test::TestMemoryExec;
+    use arrow::array::{Int32Array, Int64Array};
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_execution::TaskContext;
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+    use datafusion_functions_aggregate::count::count_udaf;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    use datafusion_physical_expr::expressions::col;
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn test_double_emission_race_condition_bug() -> Result<()> {
+        // Fix for https://github.com/apache/datafusion/issues/18701
+        // This test specifically proves that we have fixed double emission race condition
+        // where emit_early_if_necessary() and switch_to_skip_aggregation()
+        // both emit in the same loop iteration, causing data loss
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("group_col", DataType::Int32, false),
+            Field::new("value_col", DataType::Int64, false),
+        ]));
+
+        // Create data that will trigger BOTH conditions in the same iteration:
+        // 1. More groups than batch_size (triggers early emission when memory pressure hits)
+        // 2. High cardinality ratio (triggers skip aggregation)
+        let batch_size = 1024; // We'll set this in session config
+        let num_groups = batch_size + 100; // Slightly more than batch_size (1124 groups)
+
+        // Create exactly 1 row per group = 100% cardinality ratio
+        let group_ids: Vec<i32> = (0..num_groups as i32).collect();
+        let values: Vec<i64> = vec![1; num_groups];
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(group_ids)),
+                Arc::new(Int64Array::from(values)),
+            ],
+        )?;
+
+        let input_partitions = vec![vec![batch]];
+
+        // Create constrained memory to trigger early emission but not completely fail
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(1024, 1.0) // small enough to start but will trigger pressure
+            .build_arc()?;
+
+        let mut task_ctx = TaskContext::default().with_runtime(runtime);
+
+        // Configure to trigger BOTH conditions:
+        // 1. Low probe threshold (triggers skip probe after few rows)
+        // 2. Low ratio threshold (triggers skip aggregation immediately)
+        // 3. Set batch_size to 1024 so our 1124 groups will trigger early emission
+        // This creates the race condition where both emit paths are triggered
+        let mut session_config = task_ctx.session_config().clone();
+        session_config = session_config.set(
+            "datafusion.execution.batch_size",
+            &datafusion_common::ScalarValue::UInt64(Some(1024)),
+        );
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_rows_threshold",
+            &datafusion_common::ScalarValue::UInt64(Some(50)),
+        );
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_ratio_threshold",
+            &datafusion_common::ScalarValue::Float64(Some(0.8)),
+        );
+        task_ctx = task_ctx.with_session_config(session_config);
+        let task_ctx = Arc::new(task_ctx);
+
+        // Create aggregate: COUNT(*) GROUP BY group_col
+        let group_expr = vec![(col("group_col", &schema)?, "group_col".to_string())];
+        let aggr_expr = vec![Arc::new(
+            AggregateExprBuilder::new(count_udaf(), vec![col("value_col", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("count_value")
+                .build()?,
+        )];
+
+        let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = Arc::new(TestMemoryExec::update_cache(&Arc::new(exec)));
+
+        // Use Partial mode where the race condition occurs
+        let aggregate_exec = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::new_single(group_expr),
+            aggr_expr,
+            vec![None],
+            exec,
+            Arc::clone(&schema),
+        )?;
+
+        // Execute and collect results
+        let mut stream =
+            GroupedHashAggregateStream::new(&aggregate_exec, &Arc::clone(&task_ctx), 0)?;
+        let mut results = Vec::new();
+
+        while let Some(result) = stream.next().await {
+            let batch = result?;
+            results.push(batch);
+        }
+
+        // Count total groups emitted
+        let mut total_output_groups = 0;
+        for batch in &results {
+            total_output_groups += batch.num_rows();
+        }
+
+        assert_eq!(
+            total_output_groups, num_groups,
+            "Unexpected number of groups",
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_skip_aggregation_probe_not_locked_until_skip() -> Result<()> {
+        // Test that the probe is not locked until we actually decide to skip.
+        // This allows us to continue evaluating the skip condition across multiple batches.
+        //
+        // Scenario:
+        // - Batch 1: Hits rows threshold but NOT ratio threshold (low cardinality) -> don't skip
+        // - Batch 2: Now hits ratio threshold (high cardinality) -> skip
+        //
+        // Without the fix, the probe would be locked after batch 1, preventing the skip
+        // decision from being made on batch 2.
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("group_col", DataType::Int32, false),
+            Field::new("value_col", DataType::Int32, false),
+        ]));
+
+        // Configure thresholds:
+        // - probe_rows_threshold: 100 rows
+        // - probe_ratio_threshold: 0.8 (80%)
+        let probe_rows_threshold = 100;
+        let probe_ratio_threshold = 0.8;
+
+        // Batch 1: 100 rows with only 10 unique groups
+        // Ratio: 10/100 = 0.1 (10%) < 0.8 -> should NOT skip
+        // This will hit the rows threshold but not the ratio threshold
+        let batch1_rows = 100;
+        let batch1_groups = 10;
+        let mut group_ids_batch1 = Vec::new();
+        for i in 0..batch1_rows {
+            group_ids_batch1.push((i % batch1_groups) as i32);
+        }
+        let values_batch1: Vec<i32> = vec![1; batch1_rows];
+
+        let batch1 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(group_ids_batch1)),
+                Arc::new(Int32Array::from(values_batch1)),
+            ],
+        )?;
+
+        // Batch 2: 350 rows with 350 unique NEW groups (starting from group 10)
+        // After batch 2, total: 450 rows, 360 groups
+        // Ratio: 360/450 = 0.8 (80%) >= 0.8 -> SHOULD decide to skip
+        let batch2_rows = 350;
+        let batch2_groups = 350;
+        let group_ids_batch2: Vec<i32> = (batch1_groups..(batch1_groups + batch2_groups))
+            .map(|x| x as i32)
+            .collect();
+        let values_batch2: Vec<i32> = vec![1; batch2_rows];
+
+        let batch2 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(group_ids_batch2)),
+                Arc::new(Int32Array::from(values_batch2)),
+            ],
+        )?;
+
+        // Batch 3: This batch should be skipped since we decided to skip after batch 2
+        // 100 rows with 100 unique groups (continuing from where batch 2 left off)
+        let batch3_rows = 100;
+        let batch3_groups = 100;
+        let batch3_start_group = batch1_groups + batch2_groups;
+        let group_ids_batch3: Vec<i32> = (batch3_start_group
+            ..(batch3_start_group + batch3_groups))
+            .map(|x| x as i32)
+            .collect();
+        let values_batch3: Vec<i32> = vec![1; batch3_rows];
+
+        let batch3 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(group_ids_batch3)),
+                Arc::new(Int32Array::from(values_batch3)),
+            ],
+        )?;
+
+        let input_partitions = vec![vec![batch1, batch2, batch3]];
+
+        let runtime = RuntimeEnvBuilder::default().build_arc()?;
+        let mut task_ctx = TaskContext::default().with_runtime(runtime);
+
+        // Configure skip aggregation settings
+        let mut session_config = task_ctx.session_config().clone();
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_rows_threshold",
+            &datafusion_common::ScalarValue::UInt64(Some(probe_rows_threshold)),
+        );
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_ratio_threshold",
+            &datafusion_common::ScalarValue::Float64(Some(probe_ratio_threshold)),
+        );
+        task_ctx = task_ctx.with_session_config(session_config);
+        let task_ctx = Arc::new(task_ctx);
+
+        // Create aggregate: COUNT(*) GROUP BY group_col
+        let group_expr = vec![(col("group_col", &schema)?, "group_col".to_string())];
+        let aggr_expr = vec![Arc::new(
+            AggregateExprBuilder::new(count_udaf(), vec![col("value_col", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("count_value")
+                .build()?,
+        )];
+
+        let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = Arc::new(TestMemoryExec::update_cache(&Arc::new(exec)));
+
+        // Use Partial mode
+        let aggregate_exec = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::new_single(group_expr),
+            aggr_expr,
+            vec![None],
+            exec,
+            Arc::clone(&schema),
+        )?;
+
+        // Execute and collect results
+        let mut stream =
+            GroupedHashAggregateStream::new(&aggregate_exec, &Arc::clone(&task_ctx), 0)?;
+        let mut results = Vec::new();
+
+        while let Some(result) = stream.next().await {
+            let batch = result?;
+            results.push(batch);
+        }
+
+        // Check that skip aggregation actually happened
+        // The key metric is skipped_aggregation_rows
+        let metrics = aggregate_exec.metrics().unwrap();
+        let skipped_rows = metrics
+            .sum_by_name("skipped_aggregation_rows")
+            .map(|m| m.as_usize())
+            .unwrap_or(0);
+
+        // We expect batch 3's rows to be skipped (100 rows)
+        assert_eq!(
+            skipped_rows, batch3_rows,
+            "Expected batch 3's rows ({batch3_rows}) to be skipped",
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs
index 974aea3b6292c..1fae507d90161 100644
--- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs
+++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs
@@ -22,12 +22,12 @@ use crate::aggregates::topk::heap::Comparable;
 use ahash::RandomState;
 use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
 use arrow::array::{
-    builder::PrimitiveBuilder, cast::AsArray, downcast_primitive, Array, ArrayRef,
-    ArrowPrimitiveType, LargeStringArray, PrimitiveArray, StringArray, StringViewArray,
+    Array, ArrayRef, ArrowPrimitiveType, LargeStringArray, PrimitiveArray, StringArray,
+    StringViewArray, builder::PrimitiveBuilder, cast::AsArray, downcast_primitive,
 };
-use arrow::datatypes::{i256, DataType};
-use datafusion_common::exec_datafusion_err;
+use arrow::datatypes::{DataType, i256};
 use datafusion_common::Result;
+use datafusion_common::exec_datafusion_err;
 use half::f16;
 use hashbrown::raw::RawTable;
 use std::fmt::Debug;
@@ -131,20 +131,24 @@ impl ArrowHashTable for StringHashTable {
     }
 
     unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) {
-        self.map.update_heap_idx(mapper);
+        unsafe {
+            self.map.update_heap_idx(mapper);
+        }
     }
 
     unsafe fn heap_idx_at(&self, map_idx: usize) -> usize {
-        self.map.heap_idx_at(map_idx)
+        unsafe { self.map.heap_idx_at(map_idx) }
     }
 
     unsafe fn take_all(&mut self, indexes: Vec<usize>) -> ArrayRef {
-        let ids = self.map.take_all(indexes);
-        match self.data_type {
-            DataType::Utf8 => Arc::new(StringArray::from(ids)),
-            DataType::LargeUtf8 => Arc::new(LargeStringArray::from(ids)),
-            DataType::Utf8View => Arc::new(StringViewArray::from(ids)),
-            _ => unreachable!(),
+        unsafe {
+            let ids = self.map.take_all(indexes);
+            match self.data_type {
+                DataType::Utf8 => Arc::new(StringArray::from(ids)),
+                DataType::LargeUtf8 => Arc::new(LargeStringArray::from(ids)),
+                DataType::Utf8View => Arc::new(StringViewArray::from(ids)),
+                _ => unreachable!(),
+            }
         }
     }
 
@@ -154,61 +158,63 @@ impl ArrowHashTable for StringHashTable {
         replace_idx: usize,
         mapper: &mut Vec<(usize, usize)>,
     ) -> (usize, bool) {
-        let id = match self.data_type {
-            DataType::Utf8 => {
-                let ids = self
-                    .owned
-                    .as_any()
-                    .downcast_ref::<StringArray>()
-                    .expect("Expected StringArray for DataType::Utf8");
-                if ids.is_null(row_idx) {
-                    None
-                } else {
-                    Some(ids.value(row_idx))
+        unsafe {
+            let id = match self.data_type {
+                DataType::Utf8 => {
+                    let ids = self
+                        .owned
+                        .as_any()
+                        .downcast_ref::<StringArray>()
+                        .expect("Expected StringArray for DataType::Utf8");
+                    if ids.is_null(row_idx) {
+                        None
+                    } else {
+                        Some(ids.value(row_idx))
+                    }
                 }
-            }
-            DataType::LargeUtf8 => {
-                let ids = self
-                    .owned
-                    .as_any()
-                    .downcast_ref::<LargeStringArray>()
-                    .expect("Expected LargeStringArray for DataType::LargeUtf8");
-                if ids.is_null(row_idx) {
-                    None
-                } else {
-                    Some(ids.value(row_idx))
+                DataType::LargeUtf8 => {
+                    let ids = self
+                        .owned
+                        .as_any()
+                        .downcast_ref::<LargeStringArray>()
+                        .expect("Expected LargeStringArray for DataType::LargeUtf8");
+                    if ids.is_null(row_idx) {
+                        None
+                    } else {
+                        Some(ids.value(row_idx))
+                    }
                 }
-            }
-            DataType::Utf8View => {
-                let ids = self
-                    .owned
-                    .as_any()
-                    .downcast_ref::<StringViewArray>()
-                    .expect("Expected StringViewArray for DataType::Utf8View");
-                if ids.is_null(row_idx) {
-                    None
-                } else {
-                    Some(ids.value(row_idx))
+                DataType::Utf8View => {
+                    let ids = self
+                        .owned
+                        .as_any()
+                        .downcast_ref::<StringViewArray>()
+                        .expect("Expected StringViewArray for DataType::Utf8View");
+                    if ids.is_null(row_idx) {
+                        None
+                    } else {
+                        Some(ids.value(row_idx))
+                    }
                 }
+                _ => panic!("Unsupported data type"),
+            };
+
+            let hash = self.rnd.hash_one(id);
+            if let Some(map_idx) = self
+                .map
+                .find(hash, |mi| id == mi.as_ref().map(|id| id.as_str()))
+            {
+                return (map_idx, false);
             }
-            _ => panic!("Unsupported data type"),
-        };
-
-        let hash = self.rnd.hash_one(id);
-        if let Some(map_idx) = self
-            .map
-            .find(hash, |mi| id == mi.as_ref().map(|id| id.as_str()))
-        {
-            return (map_idx, false);
-        }
 
-        // we're full and this is a better value, so remove the worst
-        let heap_idx = self.map.remove_if_full(replace_idx);
+            // we're full and this is a better value, so remove the worst
+            let heap_idx = self.map.remove_if_full(replace_idx);
 
-        // add the new group
-        let id = id.map(|id| id.to_string());
-        let map_idx = self.map.insert(hash, id, heap_idx, mapper);
-        (map_idx, true)
+            // add the new group
+            let id = id.map(|id| id.to_string());
+            let map_idx = self.map.insert(hash, id, heap_idx, mapper);
+            (map_idx, true)
+        }
     }
 }
 
@@ -246,25 +252,29 @@ where
     }
 
     unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) {
-        self.map.update_heap_idx(mapper);
+        unsafe {
+            self.map.update_heap_idx(mapper);
+        }
     }
 
     unsafe fn heap_idx_at(&self, map_idx: usize) -> usize {
-        self.map.heap_idx_at(map_idx)
+        unsafe { self.map.heap_idx_at(map_idx) }
     }
 
     unsafe fn take_all(&mut self, indexes: Vec<usize>) -> ArrayRef {
-        let ids = self.map.take_all(indexes);
-        let mut builder: PrimitiveBuilder<VAL> =
-            PrimitiveArray::builder(ids.len()).with_data_type(self.kt.clone());
-        for id in ids.into_iter() {
-            match id {
-                None => builder.append_null(),
-                Some(id) => builder.append_value(id),
+        unsafe {
+            let ids = self.map.take_all(indexes);
+            let mut builder: PrimitiveBuilder<VAL> =
+                PrimitiveArray::builder(ids.len()).with_data_type(self.kt.clone());
+            for id in ids.into_iter() {
+                match id {
+                    None => builder.append_null(),
+                    Some(id) => builder.append_value(id),
+                }
             }
+            let ids = builder.finish();
+            Arc::new(ids)
         }
-        let ids = builder.finish();
-        Arc::new(ids)
     }
 
     unsafe fn find_or_insert(
@@ -273,24 +283,26 @@ where
         replace_idx: usize,
         mapper: &mut Vec<(usize, usize)>,
     ) -> (usize, bool) {
-        let ids = self.owned.as_primitive::<VAL>();
-        let id: Option<VAL::Native> = if ids.is_null(row_idx) {
-            None
-        } else {
-            Some(ids.value(row_idx))
-        };
+        unsafe {
+            let ids = self.owned.as_primitive::<VAL>();
+            let id: Option<VAL::Native> = if ids.is_null(row_idx) {
+                None
+            } else {
+                Some(ids.value(row_idx))
+            };
 
-        let hash: u64 = id.hash(&self.rnd);
-        if let Some(map_idx) = self.map.find(hash, |mi| id == *mi) {
-            return (map_idx, false);
-        }
+            let hash: u64 = id.hash(&self.rnd);
+            if let Some(map_idx) = self.map.find(hash, |mi| id == *mi) {
+                return (map_idx, false);
+            }
 
-        // we're full and this is a better value, so remove the worst
-        let heap_idx = self.map.remove_if_full(replace_idx);
+            // we're full and this is a better value, so remove the worst
+            let heap_idx = self.map.remove_if_full(replace_idx);
 
-        // add the new group
-        let map_idx = self.map.insert(hash, id, heap_idx, mapper);
-        (map_idx, true)
+            // add the new group
+            let map_idx = self.map.insert(hash, id, heap_idx, mapper);
+            (map_idx, true)
+        }
     }
 }
 
@@ -312,22 +324,28 @@ impl<ID: KeyType> TopKHashTable<ID> {
     }
 
     pub unsafe fn heap_idx_at(&self, map_idx: usize) -> usize {
-        let bucket = unsafe { self.map.bucket(map_idx) };
-        bucket.as_ref().heap_idx
+        unsafe {
+            let bucket = self.map.bucket(map_idx);
+            bucket.as_ref().heap_idx
+        }
     }
 
     pub unsafe fn remove_if_full(&mut self, replace_idx: usize) -> usize {
-        if self.map.len() >= self.limit {
-            self.map.erase(self.map.bucket(replace_idx));
-            0 // if full, always replace top node
-        } else {
-            self.map.len() // if we're not full, always append to end
+        unsafe {
+            if self.map.len() >= self.limit {
+                self.map.erase(self.map.bucket(replace_idx));
+                0 // if full, always replace top node
+            } else {
+                self.map.len() // if we're not full, always append to end
+            }
         }
     }
 
     unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) {
-        for (m, h) in mapper {
-            self.map.bucket(*m).as_mut().heap_idx = *h
+        unsafe {
+            for (m, h) in mapper {
+                self.map.bucket(*m).as_mut().heap_idx = *h
+            }
         }
     }
 
@@ -368,12 +386,14 @@ impl<ID: KeyType> TopKHashTable<ID> {
     }
 
     pub unsafe fn take_all(&mut self, idxs: Vec<usize>) -> Vec<ID> {
-        let ids = idxs
-            .into_iter()
-            .map(|idx| self.map.bucket(idx).as_ref().id.clone())
-            .collect();
-        self.map.clear();
-        ids
+        unsafe {
+            let ids = idxs
+                .into_iter()
+                .map(|idx| self.map.bucket(idx).as_ref().id.clone())
+                .collect();
+            self.map.clear();
+            ids
+        }
     }
 }
 
diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs
index 23ccf5e17ef69..abdf320ea39d8 100644
--- a/datafusion/physical-plan/src/aggregates/topk/heap.rs
+++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs
@@ -17,15 +17,15 @@
 
 //! A custom binary heap implementation for performant top K aggregation
 
+use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, downcast_primitive};
 use arrow::array::{
     cast::AsArray,
     types::{IntervalDayTime, IntervalMonthDayNano},
 };
-use arrow::array::{downcast_primitive, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
 use arrow::buffer::ScalarBuffer;
-use arrow::datatypes::{i256, DataType};
-use datafusion_common::exec_datafusion_err;
+use arrow::datatypes::{DataType, i256};
 use datafusion_common::Result;
+use datafusion_common::exec_datafusion_err;
 
 use half::f16;
 use std::cmp::Ordering;
@@ -311,13 +311,12 @@ impl<VAL: ValueType> TopKHeap<VAL> {
         let mut best_idx = node_idx;
         let mut best_val = &entry.val;
         for child_idx in left_child..=left_child + 1 {
-            if let Some(Some(child)) = self.heap.get(child_idx) {
-                if (!desc && child.val.comp(best_val) == Ordering::Greater)
-                    || (desc && child.val.comp(best_val) == Ordering::Less)
-                {
-                    best_val = &child.val;
-                    best_idx = child_idx;
-                }
+            if let Some(Some(child)) = self.heap.get(child_idx)
+                && ((!desc && child.val.comp(best_val) == Ordering::Greater)
+                    || (desc && child.val.comp(best_val) == Ordering::Less))
+            {
+                best_val = &child.val;
+                best_idx = child_idx;
             }
         }
         if best_val.comp(&entry.val) != Ordering::Equal {
@@ -326,20 +325,10 @@ impl<VAL: ValueType> TopKHeap<VAL> {
         }
     }
 
-    fn _tree_print(
-        &self,
-        idx: usize,
-        prefix: String,
-        is_tail: bool,
-        output: &mut String,
-    ) {
+    fn _tree_print(&self, idx: usize, prefix: &str, is_tail: bool, output: &mut String) {
         if let Some(Some(hi)) = self.heap.get(idx) {
             let connector = if idx != 0 {
-                if is_tail {
-                    "└── "
-                } else {
-                    "├── "
-                }
+                if is_tail { "└── " } else { "├── " }
             } else {
                 ""
             };
@@ -357,10 +346,10 @@ impl<VAL: ValueType> TopKHeap<VAL> {
             let right_exists = right_idx < self.len;
 
             if left_exists {
-                self._tree_print(left_idx, child_prefix.clone(), !right_exists, output);
+                self._tree_print(left_idx, &child_prefix, !right_exists, output);
             }
             if right_exists {
-                self._tree_print(right_idx, child_prefix, true, output);
+                self._tree_print(right_idx, &child_prefix, true, output);
             }
         }
     }
@@ -370,7 +359,7 @@ impl<VAL: ValueType> Display for TopKHeap<VAL> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         let mut output = String::new();
         if !self.heap.is_empty() {
-            self._tree_print(0, String::new(), true, &mut output);
+            self._tree_print(0, "", true, &mut output);
         }
         write!(f, "{output}")
     }
@@ -494,9 +483,7 @@ mod tests {
         heap.append_or_replace(1, 1, &mut map);
 
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=1 idx=0, bucket=1
-            "#);
+        assert_snapshot!(actual, @"val=1 idx=0, bucket=1");
 
         Ok(())
     }
@@ -513,10 +500,10 @@ val=1 idx=0, bucket=1
         assert_eq!(map, vec![(2, 0), (1, 1)]);
 
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=2
-└── val=1 idx=1, bucket=1
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=2
+        └── val=1 idx=1, bucket=1
+        ");
 
         Ok(())
     }
@@ -530,20 +517,20 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(2, 2, &mut map);
         heap.append_or_replace(3, 3, &mut map);
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=3 idx=0, bucket=3
-├── val=1 idx=1, bucket=1
-└── val=2 idx=2, bucket=2
-            "#);
+        assert_snapshot!(actual, @r"
+        val=3 idx=0, bucket=3
+        ├── val=1 idx=1, bucket=1
+        └── val=2 idx=2, bucket=2
+        ");
 
         let mut map = vec![];
         heap.append_or_replace(0, 0, &mut map);
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=2
-├── val=1 idx=1, bucket=1
-└── val=0 idx=2, bucket=0
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=2
+        ├── val=1 idx=1, bucket=1
+        └── val=0 idx=2, bucket=0
+        ");
         assert_eq!(map, vec![(2, 0), (0, 2)]);
 
         Ok(())
@@ -559,22 +546,22 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(3, 3, &mut map);
         heap.append_or_replace(4, 4, &mut map);
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=4 idx=0, bucket=4
-├── val=3 idx=1, bucket=3
-│   └── val=1 idx=3, bucket=1
-└── val=2 idx=2, bucket=2
-            "#);
+        assert_snapshot!(actual, @r"
+        val=4 idx=0, bucket=4
+        ├── val=3 idx=1, bucket=3
+        │   └── val=1 idx=3, bucket=1
+        └── val=2 idx=2, bucket=2
+        ");
 
         let mut map = vec![];
         heap.replace_if_better(1, 0, &mut map);
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=4 idx=0, bucket=4
-├── val=1 idx=1, bucket=1
-│   └── val=0 idx=3, bucket=3
-└── val=2 idx=2, bucket=2
-            "#);
+        assert_snapshot!(actual, @r"
+        val=4 idx=0, bucket=4
+        ├── val=1 idx=1, bucket=1
+        │   └── val=0 idx=3, bucket=3
+        └── val=2 idx=2, bucket=2
+        ");
         assert_eq!(map, vec![(1, 1), (3, 3)]);
 
         Ok(())
@@ -589,10 +576,10 @@ val=4 idx=0, bucket=4
         heap.append_or_replace(2, 2, &mut map);
 
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=2
-└── val=1 idx=1, bucket=1
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=2
+        └── val=1 idx=1, bucket=1
+        ");
 
         assert_eq!(heap.worst_val(), Some(&2));
         assert_eq!(heap.worst_map_idx(), 2);
@@ -609,10 +596,10 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(2, 2, &mut map);
 
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=2
-└── val=1 idx=1, bucket=1
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=2
+        └── val=1 idx=1, bucket=1
+        ");
 
         let (vals, map_idxs) = heap.drain();
         assert_eq!(vals, vec![1, 2]);
@@ -631,18 +618,18 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(2, 2, &mut map);
 
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=2
-└── val=1 idx=1, bucket=1
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=2
+        └── val=1 idx=1, bucket=1
+        ");
 
         let numbers = vec![(0, 1), (1, 2)];
         heap.renumber(numbers.as_slice());
         let actual = heap.to_string();
-        assert_snapshot!(actual, @r#"
-val=2 idx=0, bucket=1
-└── val=1 idx=1, bucket=2
-            "#);
+        assert_snapshot!(actual, @r"
+        val=2 idx=0, bucket=1
+        └── val=1 idx=1, bucket=2
+        ");
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs
index a09d70f7471f3..fdff6b3a1a51c 100644
--- a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs
+++ b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs
@@ -17,8 +17,8 @@
 
 //! A `Map<K, V>` / `PriorityQueue` combo that evicts the worst values after reaching `capacity`
 
-use crate::aggregates::topk::hash_table::{new_hash_table, ArrowHashTable};
-use crate::aggregates::topk::heap::{new_heap, ArrowHeap};
+use crate::aggregates::topk::hash_table::{ArrowHashTable, new_hash_table};
+use crate::aggregates::topk::heap::{ArrowHeap, new_heap};
 use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
 use datafusion_common::Result;
@@ -182,13 +182,13 @@ mod tests {
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
 
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -207,13 +207,13 @@ mod tests {
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
 
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -231,13 +231,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 2        | 2            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 2        | 2            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -255,13 +255,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -279,13 +279,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 2            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 2            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -303,13 +303,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -327,13 +327,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 2        | 2            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 2        | 2            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -351,13 +351,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -375,13 +375,13 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-| 1        | 2            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        | 1        | 2            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
@@ -400,14 +400,14 @@ mod tests {
         let cols = agg.emit()?;
         let batch = RecordBatch::try_new(test_schema(), cols)?;
         let actual = format!("{}", pretty_format_batches(&[batch])?);
-        assert_snapshot!(actual, @r#"
-+----------+--------------+
-| trace_id | timestamp_ms |
-+----------+--------------+
-|          | 3            |
-| 1        | 1            |
-+----------+--------------+
-        "#
+        assert_snapshot!(actual, @r"
+        +----------+--------------+
+        | trace_id | timestamp_ms |
+        +----------+--------------+
+        |          | 3            |
+        | 1        | 1            |
+        +----------+--------------+
+        "
         );
 
         Ok(())
diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs
index eb1b7543cbfd6..1096eb64d3ae7 100644
--- a/datafusion/physical-plan/src/aggregates/topk_stream.rs
+++ b/datafusion/physical-plan/src/aggregates/topk_stream.rs
@@ -20,20 +20,20 @@
 use crate::aggregates::group_values::GroupByMetrics;
 use crate::aggregates::topk::priority_map::PriorityMap;
 use crate::aggregates::{
-    aggregate_expressions, evaluate_group_by, evaluate_many, AggregateExec,
-    PhysicalGroupBy,
+    AggregateExec, PhysicalGroupBy, aggregate_expressions, evaluate_group_by,
+    evaluate_many,
 };
 use crate::metrics::BaselineMetrics;
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::{Array, ArrayRef, RecordBatch};
 use arrow::datatypes::SchemaRef;
 use arrow::util::pretty::print_batches;
-use datafusion_common::internal_datafusion_err;
 use datafusion_common::Result;
+use datafusion_common::internal_datafusion_err;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalExpr;
 use futures::stream::{Stream, StreamExt};
-use log::{trace, Level};
+use log::{Level, trace};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
@@ -54,13 +54,13 @@ pub struct GroupedTopKAggregateStream {
 impl GroupedTopKAggregateStream {
     pub fn new(
         aggr: &AggregateExec,
-        context: Arc<TaskContext>,
+        context: &Arc<TaskContext>,
         partition: usize,
         limit: usize,
     ) -> Result<Self> {
         let agg_schema = Arc::clone(&aggr.schema);
         let group_by = aggr.group_by.clone();
-        let input = aggr.input.execute(partition, Arc::clone(&context))?;
+        let input = aggr.input.execute(partition, Arc::clone(context))?;
         let baseline_metrics = BaselineMetrics::new(&aggr.metrics, partition);
         let group_by_metrics = GroupByMetrics::new(&aggr.metrics, partition);
         let aggregate_arguments =
@@ -97,11 +97,12 @@ impl RecordBatchStream for GroupedTopKAggregateStream {
 }
 
 impl GroupedTopKAggregateStream {
-    fn intern(&mut self, ids: ArrayRef, vals: ArrayRef) -> Result<()> {
+    fn intern(&mut self, ids: &ArrayRef, vals: &ArrayRef) -> Result<()> {
         let _timer = self.group_by_metrics.time_calculating_group_ids.timer();
 
         let len = ids.len();
-        self.priority_map.set_batch(ids, Arc::clone(&vals));
+        self.priority_map
+            .set_batch(Arc::clone(ids), Arc::clone(vals));
 
         let has_nulls = vals.null_count() > 0;
         for row_idx in 0..len {
@@ -167,7 +168,7 @@ impl Stream for GroupedTopKAggregateStream {
                     let input_values = Arc::clone(&input_values[0][0]);
 
                     // iterate over each column of group_by values
-                    (*self).intern(group_by_values, input_values)?;
+                    (*self).intern(&group_by_values, &input_values)?;
                 }
                 // inner is done, emit all rows and switch to producing output
                 None => {
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index c696cf5aa5e60..1fb8f93a38782 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -31,7 +31,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::instant::Instant;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, assert_eq_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
@@ -161,11 +161,11 @@ impl ExecutionPlan for AnalyzeExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return internal_err!(
-                "AnalyzeExec invalid partition. Expected 0, got {partition}"
-            );
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "AnalyzeExec invalid partition. Expected 0, got {partition}"
+        );
 
         // Gather futures that will run each input partition in
         // parallel (on a separate tokio task) using a JoinSet to
@@ -206,8 +206,8 @@ impl ExecutionPlan for AnalyzeExec {
                 show_statistics,
                 total_rows,
                 duration,
-                captured_input,
-                captured_schema,
+                &captured_input,
+                &captured_schema,
                 &metric_types,
             )
         };
@@ -225,8 +225,8 @@ fn create_output_batch(
     show_statistics: bool,
     total_rows: usize,
     duration: std::time::Duration,
-    input: Arc<dyn ExecutionPlan>,
-    schema: SchemaRef,
+    input: &Arc<dyn ExecutionPlan>,
+    schema: &SchemaRef,
     metric_types: &[MetricType],
 ) -> Result<RecordBatch> {
     let mut type_builder = StringBuilder::with_capacity(1, 1024);
@@ -262,7 +262,7 @@ fn create_output_batch(
     }
 
     RecordBatch::try_new(
-        schema,
+        Arc::clone(schema),
         vec![
             Arc::new(type_builder.finish()),
             Arc::new(plan_builder.finish()),
@@ -278,7 +278,7 @@ mod tests {
         collect,
         test::{
             assert_is_pending,
-            exec::{assert_strong_count_converges_to_zero, BlockingExec},
+            exec::{BlockingExec, assert_strong_count_converges_to_zero},
         },
     };
 
diff --git a/datafusion/physical-plan/src/async_func.rs b/datafusion/physical-plan/src/async_func.rs
index 54a76e0ebb971..280995f480042 100644
--- a/datafusion/physical-plan/src/async_func.rs
+++ b/datafusion/physical-plan/src/async_func.rs
@@ -23,12 +23,12 @@ use crate::{
 use arrow::array::RecordBatch;
 use arrow_schema::{Fields, Schema, SchemaRef};
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_expr::ScalarFunctionExpr;
 use datafusion_physical_expr::async_scalar_function::AsyncFuncExpr;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::ScalarFunctionExpr;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use futures::stream::StreamExt;
 use log::trace;
@@ -100,6 +100,14 @@ impl AsyncFuncExec {
             input.boundedness(),
         ))
     }
+
+    pub fn async_exprs(&self) -> &[Arc<AsyncFuncExpr>] {
+        &self.async_exprs
+    }
+
+    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.input
+    }
 }
 
 impl DisplayAs for AsyncFuncExec {
@@ -148,9 +156,11 @@ impl ExecutionPlan for AsyncFuncExec {
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.len() != 1 {
-            return internal_err!("AsyncFuncExec wrong number of children");
-        }
+        assert_eq_or_internal_err!(
+            children.len(),
+            1,
+            "AsyncFuncExec wrong number of children"
+        );
         Ok(Arc::new(AsyncFuncExec::try_new(
             self.async_exprs.clone(),
             Arc::clone(&children[0]),
@@ -252,15 +262,14 @@ impl AsyncMapper {
         physical_expr.apply(|expr| {
             if let Some(scalar_func_expr) =
                 expr.as_any().downcast_ref::<ScalarFunctionExpr>()
+                && scalar_func_expr.fun().as_async().is_some()
             {
-                if scalar_func_expr.fun().as_async().is_some() {
-                    let next_name = self.next_column_name();
-                    self.async_exprs.push(Arc::new(AsyncFuncExpr::try_new(
-                        next_name,
-                        Arc::clone(expr),
-                        schema,
-                    )?));
-                }
+                let next_name = self.next_column_name();
+                self.async_exprs.push(Arc::new(AsyncFuncExpr::try_new(
+                    next_name,
+                    Arc::clone(expr),
+                    schema,
+                )?));
             }
             Ok(TreeNodeRecursion::Continue)
         })?;
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
index 8405a660f063d..b3947170d9e41 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -18,7 +18,7 @@
 use arrow::array::RecordBatch;
 use arrow::compute::BatchCoalescer;
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
 
 /// Concatenate multiple [`RecordBatch`]es and apply a limit
 ///
@@ -88,11 +88,10 @@ impl LimitedBatchCoalescer {
     /// Returns an error if called after [`Self::finish`] or if the internal push
     /// operation fails.
     pub fn push_batch(&mut self, batch: RecordBatch) -> Result<PushBatchStatus> {
-        if self.finished {
-            return internal_err!(
-                "LimitedBatchCoalescer: cannot push batch after finish"
-            );
-        }
+        assert_or_internal_err!(
+            !self.finished,
+            "LimitedBatchCoalescer: cannot push batch after finish"
+        );
 
         // if we are at the limit, return LimitReached
         if let Some(fetch) = self.fetch {
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index eb3c3b5befbdd..13bb862ab9371 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -24,6 +24,7 @@ use std::task::{Context, Poll};
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
+use crate::projection::ProjectionExec;
 use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
@@ -40,7 +41,9 @@ use crate::filter_pushdown::{
     ChildPushdownResult, FilterDescription, FilterPushdownPhase,
     FilterPushdownPropagation,
 };
+use crate::sort_pushdown::SortOrderPushdownResult;
 use datafusion_common::config::ConfigOptions;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 
@@ -224,6 +227,18 @@ impl ExecutionPlan for CoalesceBatchesExec {
         CardinalityEffect::Equal
     }
 
+    fn try_swapping_with_projection(
+        &self,
+        projection: &ProjectionExec,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        match self.input.try_swapping_with_projection(projection)? {
+            Some(new_input) => Ok(Some(
+                Arc::new(self.clone()).with_new_children(vec![new_input])?,
+            )),
+            None => Ok(None),
+        }
+    }
+
     fn gather_filters_for_pushdown(
         &self,
         _phase: FilterPushdownPhase,
@@ -241,6 +256,20 @@ impl ExecutionPlan for CoalesceBatchesExec {
     ) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>> {
         Ok(FilterPushdownPropagation::if_all(child_pushdown_result))
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // CoalesceBatchesExec is transparent for sort ordering - it preserves order
+        // Delegate to the child and wrap with a new CoalesceBatchesExec
+        self.input.try_pushdown_sort(order)?.try_map(|new_input| {
+            Ok(Arc::new(
+                CoalesceBatchesExec::new(new_input, self.target_batch_size)
+                    .with_fetch(self.fetch),
+            ) as Arc<dyn ExecutionPlan>)
+        })
+    }
 }
 
 /// Stream for [`CoalesceBatchesExec`]. See [`CoalesceBatchesExec`] for more details.
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 2597dc6408dee..d83f90eb3d8c1 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -29,11 +29,13 @@ use super::{
 };
 use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType};
 use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase};
-use crate::projection::{make_with_child, ProjectionExec};
+use crate::projection::{ProjectionExec, make_with_child};
+use crate::sort_pushdown::SortOrderPushdownResult;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err, internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalExpr;
 
@@ -160,9 +162,11 @@ impl ExecutionPlan for CoalescePartitionsExec {
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         // CoalescePartitionsExec produces a single partition
-        if 0 != partition {
-            return internal_err!("CoalescePartitionsExec invalid partition {partition}");
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "CoalescePartitionsExec invalid partition {partition}"
+        );
 
         let input_partitions = self.input.output_partitioning().partition_count();
         match input_partitions {
@@ -282,13 +286,49 @@ impl ExecutionPlan for CoalescePartitionsExec {
     ) -> Result<FilterDescription> {
         FilterDescription::from_children(parent_filters, &self.children())
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // CoalescePartitionsExec merges multiple partitions into one, which loses
+        // global ordering. However, we can still push the sort requirement down
+        // to optimize individual partitions - the Sort operator above will handle
+        // the global ordering.
+        //
+        // Note: The result will always be at most Inexact (never Exact) when there
+        // are multiple partitions, because merging destroys global ordering.
+        let result = self.input.try_pushdown_sort(order)?;
+
+        // If we have multiple partitions, we can't return Exact even if the
+        // underlying source claims Exact - merging destroys global ordering
+        let has_multiple_partitions =
+            self.input.output_partitioning().partition_count() > 1;
+
+        result
+            .try_map(|new_input| {
+                Ok(
+                    Arc::new(
+                        CoalescePartitionsExec::new(new_input).with_fetch(self.fetch),
+                    ) as Arc<dyn ExecutionPlan>,
+                )
+            })
+            .map(|r| {
+                if has_multiple_partitions {
+                    // Downgrade Exact to Inexact when merging multiple partitions
+                    r.into_inexact()
+                } else {
+                    r
+                }
+            })
+    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::test::exec::{
-        assert_strong_count_converges_to_zero, BlockingExec, PanicExec,
+        BlockingExec, PanicExec, assert_strong_count_converges_to_zero,
     };
     use crate::test::{self, assert_is_pending};
     use crate::{collect, common};
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index e9a8499a7c9ac..32dc60b56ad48 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -29,7 +29,7 @@ use arrow::array::Array;
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
-use datafusion_common::{plan_err, Result};
+use datafusion_common::{Result, plan_err};
 use datafusion_execution::memory_pool::MemoryReservation;
 
 use futures::{StreamExt, TryStreamExt};
@@ -262,6 +262,7 @@ mod tests {
                     min_value: Precision::Absent,
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -269,6 +270,7 @@ mod tests {
                     min_value: Precision::Absent,
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -302,6 +304,7 @@ mod tests {
                 min_value: Precision::Absent,
                 sum_value: Precision::Absent,
                 null_count: Precision::Exact(3),
+                byte_size: Precision::Absent,
             }],
         };
 
diff --git a/datafusion/physical-plan/src/coop.rs b/datafusion/physical-plan/src/coop.rs
index b62d15e6d2f17..a1fad86777408 100644
--- a/datafusion/physical-plan/src/coop.rs
+++ b/datafusion/physical-plan/src/coop.rs
@@ -79,17 +79,19 @@ use crate::filter_pushdown::{
     ChildPushdownResult, FilterDescription, FilterPushdownPhase,
     FilterPushdownPropagation,
 };
+use crate::projection::ProjectionExec;
 use crate::{
     DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream,
+    SendableRecordBatchStream, SortOrderPushdownResult,
 };
 use arrow::record_batch::RecordBatch;
 use arrow_schema::Schema;
-use datafusion_common::{internal_err, Result, Statistics};
+use datafusion_common::{Result, Statistics, assert_eq_or_internal_err};
 use datafusion_execution::TaskContext;
 
 use crate::execution_plan::SchedulingType;
 use crate::stream::RecordBatchStreamAdapter;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use futures::{Stream, StreamExt};
 
 /// A stream that passes record batches through unchanged while cooperating with the Tokio runtime.
@@ -207,7 +209,7 @@ where
 /// An execution plan decorator that enables cooperative multitasking.
 /// It wraps the streams produced by its input execution plan using the [`make_cooperative`] function,
 /// which makes the stream participate in Tokio cooperative scheduling.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct CooperativeExec {
     input: Arc<dyn ExecutionPlan>,
     properties: PlanProperties,
@@ -269,9 +271,11 @@ impl ExecutionPlan for CooperativeExec {
         self: Arc<Self>,
         mut children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.len() != 1 {
-            return internal_err!("CooperativeExec requires exactly one child");
-        }
+        assert_eq_or_internal_err!(
+            children.len(),
+            1,
+            "CooperativeExec requires exactly one child"
+        );
         Ok(Arc::new(CooperativeExec::new(children.swap_remove(0))))
     }
 
@@ -296,6 +300,18 @@ impl ExecutionPlan for CooperativeExec {
         Equal
     }
 
+    fn try_swapping_with_projection(
+        &self,
+        projection: &ProjectionExec,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        match self.input.try_swapping_with_projection(projection)? {
+            Some(new_input) => Ok(Some(
+                Arc::new(self.clone()).with_new_children(vec![new_input])?,
+            )),
+            None => Ok(None),
+        }
+    }
+
     fn gather_filters_for_pushdown(
         &self,
         _phase: FilterPushdownPhase,
@@ -313,6 +329,27 @@ impl ExecutionPlan for CooperativeExec {
     ) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>> {
         Ok(FilterPushdownPropagation::if_all(child_pushdown_result))
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        let child = self.input();
+
+        match child.try_pushdown_sort(order)? {
+            SortOrderPushdownResult::Exact { inner } => {
+                let new_exec = Arc::new(self.clone()).with_new_children(vec![inner])?;
+                Ok(SortOrderPushdownResult::Exact { inner: new_exec })
+            }
+            SortOrderPushdownResult::Inexact { inner } => {
+                let new_exec = Arc::new(self.clone()).with_new_children(vec![inner])?;
+                Ok(SortOrderPushdownResult::Inexact { inner: new_exec })
+            }
+            SortOrderPushdownResult::Unsupported => {
+                Ok(SortOrderPushdownResult::Unsupported)
+            }
+        }
+    }
 }
 
 /// Creates a [`CooperativeStream`] wrapper around the given [`RecordBatchStream`].
@@ -345,7 +382,7 @@ mod tests {
 
     use arrow_schema::SchemaRef;
 
-    use futures::{stream, StreamExt};
+    use futures::{StreamExt, stream};
 
     // This is the hardcoded value Tokio uses
     const TASK_BUDGET: usize = 128;
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 35ca0b65ae294..52c37a106b39e 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -31,7 +31,7 @@ use datafusion_physical_expr::LexOrdering;
 use crate::metrics::MetricType;
 use crate::render_tree::RenderTree;
 
-use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
+use super::{ExecutionPlan, ExecutionPlanVisitor, accept};
 
 /// Options for controlling how each [`ExecutionPlan`] should format itself
 #[derive(Debug, Clone, Copy, PartialEq)]
@@ -1120,7 +1120,7 @@ mod tests {
     use std::fmt::Write;
     use std::sync::Arc;
 
-    use datafusion_common::{internal_datafusion_err, Result, Statistics};
+    use datafusion_common::{Result, Statistics, internal_datafusion_err};
     use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 
     use crate::{DisplayAs, ExecutionPlan, PlanProperties};
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index 40b4ec61dc102..fcfbcfa3e8277 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -21,15 +21,15 @@ use std::any::Any;
 use std::sync::Arc;
 
 use crate::memory::MemoryStream;
-use crate::{common, DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics};
+use crate::{DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics, common};
 use crate::{
-    execution_plan::{Boundedness, EmissionType},
     DisplayFormatType, ExecutionPlan, Partitioning,
+    execution_plan::{Boundedness, EmissionType},
 };
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
@@ -134,15 +134,19 @@ impl ExecutionPlan for EmptyExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start EmptyExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start EmptyExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
 
-        if partition >= self.partitions {
-            return internal_err!(
-                "EmptyExec invalid partition {} (expected less than {})",
-                partition,
-                self.partitions
-            );
-        }
+        assert_or_internal_err!(
+            partition < self.partitions,
+            "EmptyExec invalid partition {} (expected less than {})",
+            partition,
+            self.partitions
+        );
 
         Ok(Box::pin(MemoryStream::try_new(
             self.data()?,
@@ -157,13 +161,12 @@ impl ExecutionPlan for EmptyExec {
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         if let Some(partition) = partition {
-            if partition >= self.partitions {
-                return internal_err!(
-                    "EmptyExec invalid partition {} (expected less than {})",
-                    partition,
-                    self.partitions
-                );
-            }
+            assert_or_internal_err!(
+                partition < self.partitions,
+                "EmptyExec invalid partition {} (expected less than {})",
+                partition,
+                self.partitions
+            );
         }
 
         let batch = self
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index ffa9611d26e85..06da0b8933c18 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -22,16 +22,17 @@ use crate::filter_pushdown::{
 };
 pub use crate::metrics::Metric;
 pub use crate::ordering::InputOrderMode;
+use crate::sort_pushdown::SortOrderPushdownResult;
 pub use crate::stream::EmptyRecordBatchStream;
 
 pub use datafusion_common::hash_utils;
 pub use datafusion_common::utils::project_schema;
-pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
+pub use datafusion_common::{ColumnStatistics, Statistics, internal_err};
 pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 pub use datafusion_expr::{Accumulator, ColumnarValue};
 pub use datafusion_physical_expr::window::WindowExpr;
 pub use datafusion_physical_expr::{
-    expressions, Distribution, Partitioning, PhysicalExpr,
+    Distribution, Partitioning, PhysicalExpr, expressions,
 };
 
 use std::any::Any;
@@ -47,11 +48,16 @@ use crate::stream::RecordBatchStreamAdapter;
 use arrow::array::{Array, RecordBatch};
 use arrow::datatypes::SchemaRef;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{exec_err, Constraints, DataFusionError, Result};
+use datafusion_common::{
+    Constraints, DataFusionError, Result, assert_eq_or_internal_err,
+    assert_or_internal_err, exec_err,
+};
 use datafusion_common_runtime::JoinSet;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
-use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements};
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrdering, OrderingRequirements, PhysicalSortExpr,
+};
 
 use futures::stream::{StreamExt, TryStreamExt};
 
@@ -82,7 +88,7 @@ use futures::stream::{StreamExt, TryStreamExt};
 /// `ExecutionPlan` with memory tracking and spilling support.
 ///
 /// [`datafusion-examples`]: https://github.com/apache/datafusion/tree/main/datafusion-examples
-/// [`memory_pool_execution_plan.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/memory_pool_execution_plan.rs
+/// [`memory_pool_execution_plan.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/execution_monitoring/memory_pool_execution_plan.rs
 pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     /// Short name for the ExecutionPlan, such as 'DataSourceExec'.
     ///
@@ -484,13 +490,12 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
         if let Some(idx) = partition {
             // Validate partition index
             let partition_count = self.properties().partitioning.partition_count();
-            if idx >= partition_count {
-                return internal_err!(
-                    "Invalid partition index: {}, the partition count is {}",
-                    idx,
-                    partition_count
-                );
-            }
+            assert_or_internal_err!(
+                idx < partition_count,
+                "Invalid partition index: {}, the partition count is {}",
+                idx,
+                partition_count
+            );
         }
         Ok(Statistics::new_unknown(&self.schema()))
     }
@@ -680,6 +685,29 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ) -> Option<Arc<dyn ExecutionPlan>> {
         None
     }
+
+    /// Try to push down sort ordering requirements to this node.
+    ///
+    /// This method is called during sort pushdown optimization to determine if this
+    /// node can optimize for a requested sort ordering. Implementations should:
+    ///
+    /// - Return [`SortOrderPushdownResult::Exact`] if the node can guarantee the exact
+    ///   ordering (allowing the Sort operator to be removed)
+    /// - Return [`SortOrderPushdownResult::Inexact`] if the node can optimize for the
+    ///   ordering but cannot guarantee perfect sorting (Sort operator is kept)
+    /// - Return [`SortOrderPushdownResult::Unsupported`] if the node cannot optimize
+    ///   for the ordering
+    ///
+    /// For transparent nodes (that preserve ordering), implement this to delegate to
+    /// children and wrap the result with a new instance of this node.
+    ///
+    /// Default implementation returns `Unsupported`.
+    fn try_pushdown_sort(
+        &self,
+        _order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        Ok(SortOrderPushdownResult::Unsupported)
+    }
 }
 
 /// [`ExecutionPlan`] Invariant Level
@@ -919,7 +947,7 @@ pub(crate) fn boundedness_from_children<'a>(
             } => {
                 return Boundedness::Unbounded {
                     requires_infinite_memory: true,
-                }
+                };
             }
             Boundedness::Unbounded {
                 requires_infinite_memory: false,
@@ -1082,15 +1110,15 @@ impl PlanProperties {
 macro_rules! check_len {
     ($target:expr, $func_name:ident, $expected_len:expr) => {
         let actual_len = $target.$func_name().len();
-        if actual_len != $expected_len {
-            return internal_err!(
-                "{}::{} returned Vec with incorrect size: {} != {}",
-                $target.name(),
-                stringify!($func_name),
-                actual_len,
-                $expected_len
-            );
-        }
+        assert_eq_or_internal_err!(
+            actual_len,
+            $expected_len,
+            "{}::{} returned Vec with incorrect size: {} != {}",
+            $target.name(),
+            stringify!($func_name),
+            actual_len,
+            $expected_len
+        );
     };
 }
 
@@ -1116,6 +1144,7 @@ pub fn check_default_invariants<P: ExecutionPlan + ?Sized>(
 ///     1. RepartitionExec for changing the partition number between two `ExecutionPlan`s
 ///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
 ///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
+#[expect(clippy::needless_pass_by_value)]
 pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
     plan.properties().evaluation_type == EvaluationType::Eager
 }
@@ -1127,9 +1156,12 @@ pub fn with_new_children_if_necessary(
     children: Vec<Arc<dyn ExecutionPlan>>,
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let old_children = plan.children();
-    if children.len() != old_children.len() {
-        internal_err!("Wrong number of children")
-    } else if children.is_empty()
+    assert_eq_or_internal_err!(
+        children.len(),
+        old_children.len(),
+        "Wrong number of children"
+    );
+    if children.is_empty()
         || children
             .iter()
             .zip(old_children.iter())
@@ -1167,6 +1199,10 @@ pub async fn collect(
 ///
 /// Dropping the stream will abort the execution of the query, and free up
 /// any allocated resources
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_stream(
     plan: Arc<dyn ExecutionPlan>,
     context: Arc<TaskContext>,
@@ -1231,6 +1267,10 @@ pub async fn collect_partitioned(
 ///
 /// Dropping the stream will abort the execution of the query, and free up
 /// any allocated resources
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_stream_partitioned(
     plan: Arc<dyn ExecutionPlan>,
     context: Arc<TaskContext>,
@@ -1262,6 +1302,10 @@ pub fn execute_stream_partitioned(
 /// violate the `not null` constraints specified in the `sink_schema`. If there are
 /// such columns, it wraps the resulting stream to enforce the `not null` constraints
 /// by invoking the [`check_not_null_constraints`] function on each batch of the stream.
+#[expect(
+    clippy::needless_pass_by_value,
+    reason = "Public API that historically takes owned Arcs"
+)]
 pub fn execute_input_stream(
     input: Arc<dyn ExecutionPlan>,
     sink_schema: SchemaRef,
@@ -1516,7 +1560,7 @@ mod tests {
     /// A compilation test to ensure that the `ExecutionPlan::name()` method can
     /// be called from a trait object.
     /// Related ticket: https://github.com/apache/datafusion/pull/11047
-    #[allow(dead_code)]
+    #[expect(unused)]
     fn use_execution_plan_as_trait_object(plan: &dyn ExecutionPlan) {
         let _ = plan.name();
     }
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index bf488ccfae56a..aa3c0afefe8b5 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -27,7 +27,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::display::StringifiedPlan;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
@@ -133,10 +133,17 @@ impl ExecutionPlan for ExplainExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
-        if 0 != partition {
-            return internal_err!("ExplainExec invalid partition {partition}");
-        }
+        trace!(
+            "Start ExplainExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "ExplainExec invalid partition {partition}"
+        );
         let mut type_builder =
             StringBuilder::with_capacity(self.stringified_plans.len(), 1024);
         let mut plan_builder =
@@ -172,7 +179,11 @@ impl ExecutionPlan for ExplainExec {
         )?;
 
         trace!(
-            "Before returning RecordBatchStream in ExplainExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+            "Before returning RecordBatchStream in ExplainExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
 
         Ok(Box::pin(RecordBatchStreamAdapter::new(
             Arc::clone(&self.schema),
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 5ba508a8defe1..674fe6692adf5 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll, ready};
 
 use itertools::Itertools;
 
@@ -26,6 +26,8 @@ use super::{
     ColumnStatistics, DisplayAs, ExecutionPlanProperties, PlanProperties,
     RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
+use crate::coalesce::LimitedBatchCoalescer;
+use crate::coalesce::PushBatchStatus::LimitReached;
 use crate::common::can_project;
 use crate::execution_plan::CardinalityEffect;
 use crate::filter_pushdown::{
@@ -34,12 +36,12 @@ use crate::filter_pushdown::{
 };
 use crate::metrics::{MetricBuilder, MetricType};
 use crate::projection::{
-    make_with_child, try_embed_projection, update_expr, EmbeddedProjection,
-    ProjectionExec, ProjectionExpr,
+    EmbeddedProjection, ProjectionExec, ProjectionExpr, make_with_child,
+    try_embed_projection, update_expr,
 };
 use crate::{
-    metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics},
     DisplayFormatType, ExecutionPlan,
+    metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics},
 };
 
 use arrow::compute::filter_record_batch;
@@ -49,17 +51,17 @@ use datafusion_common::cast::as_boolean_array;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    internal_err, plan_err, project_schema, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, internal_err, plan_err, project_schema,
 };
 use datafusion_execution::TaskContext;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
-use datafusion_physical_expr::expressions::{lit, BinaryExpr, Column};
+use datafusion_physical_expr::expressions::{BinaryExpr, Column, lit};
 use datafusion_physical_expr::intervals::utils::check_support;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{
-    analyze, conjunction, split_conjunction, AcrossPartitions, AnalysisContext,
-    ConstExpr, ExprBoundaries, PhysicalExpr,
+    AcrossPartitions, AnalysisContext, ConstExpr, ExprBoundaries, PhysicalExpr, analyze,
+    conjunction, split_conjunction,
 };
 
 use datafusion_physical_expr_common::physical_expr::fmt_sql;
@@ -67,6 +69,7 @@ use futures::stream::{Stream, StreamExt};
 use log::trace;
 
 const FILTER_EXEC_DEFAULT_SELECTIVITY: u8 = 20;
+const FILTER_EXEC_DEFAULT_BATCH_SIZE: usize = 8192;
 
 /// FilterExec evaluates a boolean predicate against all input batches to determine which rows to
 /// include in its output batches.
@@ -84,10 +87,15 @@ pub struct FilterExec {
     cache: PlanProperties,
     /// The projection indices of the columns in the output schema of join
     projection: Option<Vec<usize>>,
+    /// Target batch size for output batches
+    batch_size: usize,
+    /// Number of rows to fetch
+    fetch: Option<usize>,
 }
 
 impl FilterExec {
     /// Create a FilterExec on an input
+    #[expect(clippy::needless_pass_by_value)]
     pub fn try_new(
         predicate: Arc<dyn PhysicalExpr>,
         input: Arc<dyn ExecutionPlan>,
@@ -108,6 +116,8 @@ impl FilterExec {
                     default_selectivity,
                     cache,
                     projection: None,
+                    batch_size: FILTER_EXEC_DEFAULT_BATCH_SIZE,
+                    fetch: None,
                 })
             }
             other => {
@@ -155,6 +165,21 @@ impl FilterExec {
             default_selectivity: self.default_selectivity,
             cache,
             projection,
+            batch_size: self.batch_size,
+            fetch: self.fetch,
+        })
+    }
+
+    pub fn with_batch_size(&self, batch_size: usize) -> Result<Self> {
+        Ok(Self {
+            predicate: Arc::clone(&self.predicate),
+            input: Arc::clone(&self.input),
+            metrics: self.metrics.clone(),
+            default_selectivity: self.default_selectivity,
+            cache: self.cache.clone(),
+            projection: self.projection.clone(),
+            batch_size,
+            fetch: self.fetch,
         })
     }
 
@@ -180,12 +205,12 @@ impl FilterExec {
 
     /// Calculates `Statistics` for `FilterExec`, by applying selectivity (either default, or estimated) to input statistics.
     fn statistics_helper(
-        schema: SchemaRef,
+        schema: &SchemaRef,
         input_stats: Statistics,
         predicate: &Arc<dyn PhysicalExpr>,
         default_selectivity: u8,
     ) -> Result<Statistics> {
-        if !check_support(predicate, &schema) {
+        if !check_support(predicate, schema) {
             let selectivity = default_selectivity as f64 / 100.0;
             let mut stats = input_stats.to_inexact();
             stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
@@ -197,12 +222,10 @@ impl FilterExec {
 
         let num_rows = input_stats.num_rows;
         let total_byte_size = input_stats.total_byte_size;
-        let input_analysis_ctx = AnalysisContext::try_from_statistics(
-            &schema,
-            &input_stats.column_statistics,
-        )?;
+        let input_analysis_ctx =
+            AnalysisContext::try_from_statistics(schema, &input_stats.column_statistics)?;
 
-        let analysis_ctx = analyze(predicate, input_analysis_ctx, &schema)?;
+        let analysis_ctx = analyze(predicate, input_analysis_ctx, schema)?;
 
         // Estimate (inexact) selectivity of predicate
         let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
@@ -229,22 +252,21 @@ impl FilterExec {
 
         let conjunctions = split_conjunction(predicate);
         for conjunction in conjunctions {
-            if let Some(binary) = conjunction.as_any().downcast_ref::<BinaryExpr>() {
-                if binary.op() == &Operator::Eq {
-                    // Filter evaluates to single value for all partitions
-                    if input_eqs.is_expr_constant(binary.left()).is_some() {
-                        let across = input_eqs
-                            .is_expr_constant(binary.right())
-                            .unwrap_or_default();
-                        res_constants
-                            .push(ConstExpr::new(Arc::clone(binary.right()), across));
-                    } else if input_eqs.is_expr_constant(binary.right()).is_some() {
-                        let across = input_eqs
-                            .is_expr_constant(binary.left())
-                            .unwrap_or_default();
-                        res_constants
-                            .push(ConstExpr::new(Arc::clone(binary.left()), across));
-                    }
+            if let Some(binary) = conjunction.as_any().downcast_ref::<BinaryExpr>()
+                && binary.op() == &Operator::Eq
+            {
+                // Filter evaluates to single value for all partitions
+                if input_eqs.is_expr_constant(binary.left()).is_some() {
+                    let across = input_eqs
+                        .is_expr_constant(binary.right())
+                        .unwrap_or_default();
+                    res_constants
+                        .push(ConstExpr::new(Arc::clone(binary.right()), across));
+                } else if input_eqs.is_expr_constant(binary.right()).is_some() {
+                    let across = input_eqs
+                        .is_expr_constant(binary.left())
+                        .unwrap_or_default();
+                    res_constants.push(ConstExpr::new(Arc::clone(binary.left()), across));
                 }
             }
         }
@@ -259,8 +281,9 @@ impl FilterExec {
     ) -> Result<PlanProperties> {
         // Combine the equal predicates with the input equivalence properties
         // to construct the equivalence properties:
+        let schema = input.schema();
         let stats = Self::statistics_helper(
-            input.schema(),
+            &schema,
             input.partition_statistics(None)?,
             predicate,
             default_selectivity,
@@ -334,7 +357,14 @@ impl DisplayAs for FilterExec {
                 } else {
                     "".to_string()
                 };
-                write!(f, "FilterExec: {}{}", self.predicate, display_projections)
+                let fetch = self
+                    .fetch
+                    .map_or_else(|| "".to_string(), |f| format!(", fetch={f}"));
+                write!(
+                    f,
+                    "FilterExec: {}{}{}",
+                    self.predicate, display_projections, fetch
+                )
             }
             DisplayFormatType::TreeRender => {
                 write!(f, "predicate={}", fmt_sql(self.predicate.as_ref()))
@@ -376,7 +406,7 @@ impl ExecutionPlan for FilterExec {
                 e.with_default_selectivity(selectivity)
             })
             .and_then(|e| e.with_projection(self.projection().cloned()))
-            .map(|e| Arc::new(e) as _)
+            .map(|e| e.with_fetch(self.fetch).unwrap())
     }
 
     fn execute(
@@ -384,7 +414,12 @@ impl ExecutionPlan for FilterExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start FilterExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start FilterExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
         let metrics = FilterExecMetrics::new(&self.metrics, partition);
         Ok(Box::pin(FilterExecStream {
             schema: self.schema(),
@@ -392,6 +427,11 @@ impl ExecutionPlan for FilterExec {
             input: self.input.execute(partition, context)?,
             metrics,
             projection: self.projection.clone(),
+            batch_coalescer: LimitedBatchCoalescer::new(
+                self.schema(),
+                self.batch_size,
+                self.fetch,
+            ),
         }))
     }
 
@@ -407,8 +447,9 @@ impl ExecutionPlan for FilterExec {
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         let input_stats = self.input.partition_statistics(partition)?;
+        let schema = self.schema();
         let stats = Self::statistics_helper(
-            self.schema(),
+            &schema,
             input_stats,
             self.predicate(),
             self.default_selectivity,
@@ -458,6 +499,7 @@ impl ExecutionPlan for FilterExec {
                 .into_iter()
                 .map(PushedDownPredicate::supported)
                 .collect();
+
             return Ok(FilterDescription::new().with_child(ChildFilterDescription {
                 parent_filters: filter_supports,
                 self_filters: vec![],
@@ -549,6 +591,8 @@ impl ExecutionPlan for FilterExec {
                     self.projection.as_ref(),
                 )?,
                 projection: None,
+                batch_size: self.batch_size,
+                fetch: self.fetch,
             };
             Some(Arc::new(new) as _)
         };
@@ -558,6 +602,19 @@ impl ExecutionPlan for FilterExec {
             updated_node,
         })
     }
+
+    fn with_fetch(&self, fetch: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        Some(Arc::new(Self {
+            predicate: Arc::clone(&self.predicate),
+            input: Arc::clone(&self.input),
+            metrics: self.metrics.clone(),
+            default_selectivity: self.default_selectivity,
+            cache: self.cache.clone(),
+            projection: self.projection.clone(),
+            batch_size: self.batch_size,
+            fetch,
+        }))
+    }
 }
 
 impl EmbeddedProjection for FilterExec {
@@ -594,6 +651,7 @@ fn collect_new_statistics(
                         min_value: Precision::Exact(ScalarValue::Null),
                         sum_value: Precision::Exact(ScalarValue::Null),
                         distinct_count: Precision::Exact(0),
+                        byte_size: input_column_stats[idx].byte_size,
                     };
                 };
                 let (lower, upper) = interval.into_bounds();
@@ -608,6 +666,7 @@ fn collect_new_statistics(
                     min_value,
                     sum_value: Precision::Absent,
                     distinct_count: distinct_count.to_inexact(),
+                    byte_size: input_column_stats[idx].byte_size,
                 }
             },
         )
@@ -627,14 +686,18 @@ struct FilterExecStream {
     metrics: FilterExecMetrics,
     /// The projection indices of the columns in the input schema
     projection: Option<Vec<usize>>,
+    /// Batch coalescer to combine small batches
+    batch_coalescer: LimitedBatchCoalescer,
 }
 
 /// The metrics for `FilterExec`
 struct FilterExecMetrics {
-    // Common metrics for most operators
+    /// Common metrics for most operators
     baseline_metrics: BaselineMetrics,
-    // Selectivity of the filter, calculated as output_rows / input_rows
+    /// Selectivity of the filter, calculated as output_rows / input_rows
     selectivity: RatioMetrics,
+    // Remember to update `docs/source/user-guide/metrics.md` when adding new metrics,
+    // or modifying metrics comments
 }
 
 impl FilterExecMetrics {
@@ -648,18 +711,34 @@ impl FilterExecMetrics {
     }
 }
 
+impl FilterExecStream {
+    fn flush_remaining_batches(
+        &mut self,
+    ) -> Poll<Option<std::result::Result<RecordBatch, DataFusionError>>> {
+        // Flush any remaining buffered batch
+        match self.batch_coalescer.finish() {
+            Ok(()) => {
+                Poll::Ready(self.batch_coalescer.next_completed_batch().map(|batch| {
+                    self.metrics.selectivity.add_part(batch.num_rows());
+                    Ok(batch)
+                }))
+            }
+            Err(e) => Poll::Ready(Some(Err(e))),
+        }
+    }
+}
+
 pub fn batch_filter(
     batch: &RecordBatch,
     predicate: &Arc<dyn PhysicalExpr>,
 ) -> Result<RecordBatch> {
-    filter_and_project(batch, predicate, None, &batch.schema())
+    filter_and_project(batch, predicate, None)
 }
 
 fn filter_and_project(
     batch: &RecordBatch,
     predicate: &Arc<dyn PhysicalExpr>,
     projection: Option<&Vec<usize>>,
-    output_schema: &SchemaRef,
 ) -> Result<RecordBatch> {
     predicate
         .evaluate(batch)
@@ -669,14 +748,7 @@ fn filter_and_project(
                 // Apply filter array to record batch
                 (Ok(filter_array), None) => filter_record_batch(batch, filter_array)?,
                 (Ok(filter_array), Some(projection)) => {
-                    let projected_columns = projection
-                        .iter()
-                        .map(|i| Arc::clone(batch.column(*i)))
-                        .collect();
-                    let projected_batch = RecordBatch::try_new(
-                        Arc::clone(output_schema),
-                        projected_columns,
-                    )?;
+                    let projected_batch = batch.project(projection)?;
                     filter_record_batch(&projected_batch, filter_array)?
                 }
                 (Err(_), _) => {
@@ -696,26 +768,62 @@ impl Stream for FilterExecStream {
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
         let poll;
+        let elapsed_compute = self.metrics.baseline_metrics.elapsed_compute().clone();
         loop {
             match ready!(self.input.poll_next_unpin(cx)) {
                 Some(Ok(batch)) => {
-                    let timer = self.metrics.baseline_metrics.elapsed_compute().timer();
-                    let filtered_batch = filter_and_project(
-                        &batch,
-                        &self.predicate,
-                        self.projection.as_ref(),
-                        &self.schema,
-                    )?;
+                    let timer = elapsed_compute.timer();
+                    let status = self.predicate.as_ref()
+                        .evaluate(&batch)
+                        .and_then(|v| v.into_array(batch.num_rows()))
+                        .and_then(|array| {
+                            Ok(match self.projection {
+                                Some(ref projection) => {
+                                    let projected_batch = batch.project(projection)?;
+                                    (array, projected_batch)
+                                },
+                                None => (array, batch)
+                            })
+                        }).and_then(|(array, batch)| {
+                            match as_boolean_array(&array) {
+                                Ok(filter_array) => {
+                                    self.metrics.selectivity.add_total(batch.num_rows());
+                                    // TODO: support push_batch_with_filter in LimitedBatchCoalescer
+                                    let batch = filter_record_batch(&batch, filter_array)?;
+                                    let state = self.batch_coalescer.push_batch(batch)?;
+                                    Ok(state)
+                                }
+                                Err(_) => {
+                                    internal_err!(
+                                        "Cannot create filter_array from non-boolean predicates"
+                                    )
+                                }
+                            }
+                        })?;
                     timer.done();
 
-                    self.metrics.selectivity.add_part(filtered_batch.num_rows());
-                    self.metrics.selectivity.add_total(batch.num_rows());
+                    if let LimitReached = status {
+                        poll = self.flush_remaining_batches();
+                        break;
+                    }
 
-                    // Skip entirely filtered batches
-                    if filtered_batch.num_rows() == 0 {
-                        continue;
+                    if let Some(batch) = self.batch_coalescer.next_completed_batch() {
+                        self.metrics.selectivity.add_part(batch.num_rows());
+                        poll = Poll::Ready(Some(Ok(batch)));
+                        break;
+                    }
+                    continue;
+                }
+                None => {
+                    // Flush any remaining buffered batch
+                    match self.batch_coalescer.finish() {
+                        Ok(()) => {
+                            poll = self.flush_remaining_batches();
+                        }
+                        Err(e) => {
+                            poll = Poll::Ready(Some(Err(e)));
+                        }
                     }
-                    poll = Poll::Ready(Some(Ok(filtered_batch)));
                     break;
                 }
                 value => {
@@ -732,7 +840,6 @@ impl Stream for FilterExecStream {
         self.input.size_hint()
     }
 }
-
 impl RecordBatchStream for FilterExecStream {
     fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
@@ -1256,6 +1363,7 @@ mod tests {
                     sum_value: Precision::Exact(ScalarValue::Null),
                     distinct_count: Precision::Exact(0),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     min_value: Precision::Exact(ScalarValue::Null),
@@ -1263,6 +1371,7 @@ mod tests {
                     sum_value: Precision::Exact(ScalarValue::Null),
                     distinct_count: Precision::Exact(0),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
             ]
         );
@@ -1364,6 +1473,7 @@ mod tests {
                 max_value: Precision::Inexact(ScalarValue::Int32(Some(10))),
                 sum_value: Precision::Absent,
                 distinct_count: Precision::Absent,
+                byte_size: Precision::Absent,
             }],
         };
 
diff --git a/datafusion/physical-plan/src/filter_pushdown.rs b/datafusion/physical-plan/src/filter_pushdown.rs
index f6b1b7448f885..1274e954eaeb3 100644
--- a/datafusion/physical-plan/src/filter_pushdown.rs
+++ b/datafusion/physical-plan/src/filter_pushdown.rs
@@ -412,6 +412,7 @@ impl FilterDescription {
     /// This method automatically determines filter routing based on column analysis:
     /// - If all columns referenced by a filter exist in a child's schema, it can be pushed down
     /// - Otherwise, it cannot be pushed down to that child
+    #[expect(clippy::needless_pass_by_value)]
     pub fn from_children(
         parent_filters: Vec<Arc<dyn PhysicalExpr>>,
         children: &[&Arc<dyn crate::ExecutionPlan>],
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index fc32bb6fc94c7..4f32b6176ec39 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -21,33 +21,35 @@
 use std::{any::Any, sync::Arc, task::Poll};
 
 use super::utils::{
-    adjust_right_output_partitioning, reorder_output_after_swap, BatchSplitter,
-    BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer, OnceAsync, OnceFut,
-    StatefulStreamResult,
+    BatchSplitter, BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer,
+    OnceAsync, OnceFut, StatefulStreamResult, adjust_right_output_partitioning,
+    reorder_output_after_swap,
 };
-use crate::execution_plan::{boundedness_from_children, EmissionType};
+use crate::execution_plan::{EmissionType, boundedness_from_children};
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::projection::{
-    join_allows_pushdown, join_table_borders, new_join_children,
-    physical_to_column_exprs, ProjectionExec,
+    ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children,
+    physical_to_column_exprs,
 };
 use crate::{
-    handle_state, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan,
+    ExecutionPlanProperties, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream, Statistics, handle_state,
 };
 
 use arrow::array::{RecordBatch, RecordBatchOptions};
 use arrow::compute::concat_batches;
 use arrow::datatypes::{Fields, Schema, SchemaRef};
 use datafusion_common::stats::Precision;
-use datafusion_common::{internal_err, JoinType, Result, ScalarValue};
-use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
+use datafusion_common::{
+    JoinType, Result, ScalarValue, assert_eq_or_internal_err, internal_err,
+};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 
 use async_trait::async_trait;
-use futures::{ready, Stream, StreamExt, TryStreamExt};
+use futures::{Stream, StreamExt, TryStreamExt, ready};
 
 /// Data of the left side that is buffered into memory
 #[derive(Debug)]
@@ -59,7 +61,7 @@ struct JoinLeftData {
     _reservation: MemoryReservation,
 }
 
-#[allow(rustdoc::private_intra_doc_links)]
+#[expect(rustdoc::private_intra_doc_links)]
 /// Cross Join Execution Plan
 ///
 /// This operator is used when there are no predicates between two tables and
@@ -300,12 +302,12 @@ impl ExecutionPlan for CrossJoinExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if self.left.output_partitioning().partition_count() != 1 {
-            return internal_err!(
-                "Invalid CrossJoinExec, the output partition count of the left child must be 1,\
+        assert_eq_or_internal_err!(
+            self.left.output_partitioning().partition_count(),
+            1,
+            "Invalid CrossJoinExec, the output partition count of the left child must be 1,\
                  consider using CoalescePartitionsExec or the EnforceDistribution rule"
-            );
-        }
+        );
 
         let stream = self.right.execute(partition, Arc::clone(&context))?;
 
@@ -447,6 +449,7 @@ fn stats_cartesian_product(
                 })
                 .map(|row_count| s.sum_value.multiply(&row_count))
                 .unwrap_or(Precision::Absent),
+            byte_size: Precision::Absent,
         })
         .chain(right_col_stats.into_iter().map(|s| {
             ColumnStatistics {
@@ -465,6 +468,7 @@ fn stats_cartesian_product(
                     })
                     .map(|row_count| s.sum_value.multiply(&row_count))
                     .unwrap_or(Precision::Absent),
+                byte_size: Precision::Absent,
             }
         }))
         .collect();
@@ -650,7 +654,6 @@ impl<T: BatchTransformer> CrossJoinStream<T> {
                         self.left_index += 1;
                     }
 
-                    self.join_metrics.output_batches.add(1);
                     return Ok(StatefulStreamResult::Ready(Some(batch)));
                 }
             }
@@ -703,6 +706,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -710,6 +714,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -723,6 +728,7 @@ mod tests {
                 min_value: Precision::Exact(ScalarValue::Int64(Some(0))),
                 sum_value: Precision::Exact(ScalarValue::Int64(Some(20))),
                 null_count: Precision::Exact(2),
+                byte_size: Precision::Absent,
             }],
         };
 
@@ -740,6 +746,7 @@ mod tests {
                         42 * right_row_count as i64,
                     ))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -747,6 +754,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3 * right_row_count),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(3),
@@ -756,6 +764,7 @@ mod tests {
                         20 * left_row_count as i64,
                     ))),
                     null_count: Precision::Exact(2 * left_row_count),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -777,6 +786,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -784,6 +794,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -797,6 +808,7 @@ mod tests {
                 min_value: Precision::Exact(ScalarValue::Int64(Some(0))),
                 sum_value: Precision::Exact(ScalarValue::Int64(Some(20))),
                 null_count: Precision::Exact(2),
+                byte_size: Precision::Absent,
             }],
         };
 
@@ -812,6 +824,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Absent, // we don't know the row count on the right
                     null_count: Precision::Absent, // we don't know the row count on the right
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -819,6 +832,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Absent, // we don't know the row count on the right
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(3),
@@ -828,6 +842,7 @@ mod tests {
                         20 * left_row_count as i64,
                     ))),
                     null_count: Precision::Exact(2 * left_row_count),
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -854,18 +869,18 @@ mod tests {
 
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
 
-        assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 4  | 7  | 10 | 12 | 14 |
-            | 1  | 4  | 7  | 11 | 13 | 15 |
-            | 2  | 5  | 8  | 10 | 12 | 14 |
-            | 2  | 5  | 8  | 11 | 13 | 15 |
-            | 3  | 6  | 9  | 10 | 12 | 14 |
-            | 3  | 6  | 9  | 11 | 13 | 15 |
-            +----+----+----+----+----+----+
-            "#);
+        assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b2 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 4  | 7  | 10 | 12 | 14 |
+        | 1  | 4  | 7  | 11 | 13 | 15 |
+        | 2  | 5  | 8  | 10 | 12 | 14 |
+        | 2  | 5  | 8  | 11 | 13 | 15 |
+        | 3  | 6  | 9  | 10 | 12 | 14 |
+        | 3  | 6  | 9  | 11 | 13 | 15 |
+        +----+----+----+----+----+----+
+        ");
 
         assert_join_metrics!(metrics, 6);
 
diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs
index c552e6954c8f9..bd92cf496426f 100644
--- a/datafusion/physical-plan/src/joins/hash_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs
@@ -21,37 +21,41 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, OnceLock};
 use std::{any::Any, vec};
 
-use crate::execution_plan::{boundedness_from_children, EmissionType};
+use crate::ExecutionPlanProperties;
+use crate::execution_plan::{EmissionType, boundedness_from_children};
 use crate::filter_pushdown::{
     ChildPushdownResult, FilterDescription, FilterPushdownPhase,
     FilterPushdownPropagation,
 };
-use crate::joins::hash_join::shared_bounds::{ColumnBounds, SharedBoundsAccumulator};
+use crate::joins::hash_join::inlist_builder::build_struct_inlist_values;
+use crate::joins::hash_join::shared_bounds::{
+    ColumnBounds, PartitionBounds, PushdownStrategy, SharedBuildAccumulator,
+};
 use crate::joins::hash_join::stream::{
     BuildSide, BuildSideInitialState, HashJoinStream, HashJoinStreamState,
 };
 use crate::joins::join_hash_map::{JoinHashMapU32, JoinHashMapU64};
 use crate::joins::utils::{
-    asymmetric_join_output_partitioning, reorder_output_after_swap, swap_join_projection,
-    update_hash, OnceAsync, OnceFut,
+    OnceAsync, OnceFut, asymmetric_join_output_partitioning, reorder_output_after_swap,
+    swap_join_projection, update_hash,
 };
 use crate::joins::{JoinOn, JoinOnRef, PartitionMode, SharedBitmapBuilder};
 use crate::projection::{
-    try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData,
-    ProjectionExec,
+    EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection,
+    try_pushdown_through_join,
 };
+use crate::repartition::REPARTITION_RANDOM_STATE;
 use crate::spill::get_record_batch_memory_size;
-use crate::ExecutionPlanProperties;
 use crate::{
+    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
+    PlanProperties, SendableRecordBatchStream, Statistics,
     common::can_project,
     joins::utils::{
+        BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType,
         build_join_schema, check_join_is_valid, estimate_join_statistics,
         need_produce_result_in_final, symmetric_join_output_partitioning,
-        BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType,
     },
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
-    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::array::{ArrayRef, BooleanBufferBuilder};
@@ -63,31 +67,36 @@ use arrow_schema::DataType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::utils::memory::estimate_memory_size;
 use datafusion_common::{
-    internal_err, plan_err, project_schema, JoinSide, JoinType, NullEquality, Result,
+    JoinSide, JoinType, NullEquality, Result, assert_or_internal_err, plan_err,
+    project_schema,
 };
-use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_expr::Accumulator;
 use datafusion_functions_aggregate_common::min_max::{MaxAccumulator, MinAccumulator};
 use datafusion_physical_expr::equivalence::{
-    join_equivalence_properties, ProjectionMapping,
+    ProjectionMapping, join_equivalence_properties,
 };
-use datafusion_physical_expr::expressions::{lit, DynamicFilterPhysicalExpr};
+use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef};
 
 use ahash::RandomState;
 use datafusion_physical_expr_common::physical_expr::fmt_sql;
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use futures::TryStreamExt;
 use parking_lot::Mutex;
 
+use super::partitioned_hash_eval::SeededRandomState;
+
 /// Hard-coded seed to ensure hash values from the hash join differ from `RepartitionExec`, avoiding collisions.
-const HASH_JOIN_SEED: RandomState =
-    RandomState::with_seeds('J' as u64, 'O' as u64, 'I' as u64, 'N' as u64);
+pub(crate) const HASH_JOIN_SEED: SeededRandomState =
+    SeededRandomState::with_seeds('J' as u64, 'O' as u64, 'I' as u64, 'N' as u64);
 
 /// HashTable and input data for the left (build side) of a join
 pub(super) struct JoinLeftData {
     /// The hash table with indices into `batch`
-    pub(super) hash_map: Box<dyn JoinHashMapType>,
+    /// Arc is used to allow sharing with SharedBuildAccumulator for hash map pushdown
+    pub(super) hash_map: Arc<dyn JoinHashMapType>,
     /// The input rows for the build side
     batch: RecordBatch,
     /// The build side on expressions values
@@ -102,32 +111,16 @@ pub(super) struct JoinLeftData {
     /// This could hide potential out-of-memory issues, especially when upstream operators increase their memory consumption.
     /// The MemoryReservation ensures proper tracking of memory resources throughout the join operation's lifecycle.
     _reservation: MemoryReservation,
-    /// Bounds computed from the build side for dynamic filter pushdown
-    pub(super) bounds: Option<Vec<ColumnBounds>>,
+    /// Bounds computed from the build side for dynamic filter pushdown.
+    /// If the partition is empty (no rows) this will be None.
+    /// If the partition has some rows this will be Some with the bounds for each join key column.
+    pub(super) bounds: Option<PartitionBounds>,
+    /// Membership testing strategy for filter pushdown
+    /// Contains either InList values for small build sides or hash table reference for large build sides
+    pub(super) membership: PushdownStrategy,
 }
 
 impl JoinLeftData {
-    /// Create a new `JoinLeftData` from its parts
-    pub(super) fn new(
-        hash_map: Box<dyn JoinHashMapType>,
-        batch: RecordBatch,
-        values: Vec<ArrayRef>,
-        visited_indices_bitmap: SharedBitmapBuilder,
-        probe_threads_counter: AtomicUsize,
-        reservation: MemoryReservation,
-        bounds: Option<Vec<ColumnBounds>>,
-    ) -> Self {
-        Self {
-            hash_map,
-            batch,
-            values,
-            visited_indices_bitmap,
-            probe_threads_counter,
-            _reservation: reservation,
-            bounds,
-        }
-    }
-
     /// return a reference to the hash map
     pub(super) fn hash_map(&self) -> &dyn JoinHashMapType {
         &*self.hash_map
@@ -148,6 +141,11 @@ impl JoinLeftData {
         &self.visited_indices_bitmap
     }
 
+    /// returns a reference to the InList values for filter pushdown
+    pub(super) fn membership(&self) -> &PushdownStrategy {
+        &self.membership
+    }
+
     /// Decrements the counter of running threads, and returns `true`
     /// if caller is the last running thread
     pub(super) fn report_probe_completed(&self) -> bool {
@@ -155,7 +153,7 @@ impl JoinLeftData {
     }
 }
 
-#[allow(rustdoc::private_intra_doc_links)]
+#[expect(rustdoc::private_intra_doc_links)]
 /// Join execution plan: Evaluates equijoin predicates in parallel on multiple
 /// partitions using a hash table and an optional filter list to apply post
 /// join.
@@ -338,8 +336,8 @@ pub struct HashJoinExec {
     /// Each output stream waits on the `OnceAsync` to signal the completion of
     /// the hash table creation.
     left_fut: Arc<OnceAsync<JoinLeftData>>,
-    /// Shared the `RandomState` for the hashing algorithm
-    random_state: RandomState,
+    /// Shared the `SeededRandomState` for the hashing algorithm (seeds preserved for serialization)
+    random_state: SeededRandomState,
     /// Partitioning mode to use
     pub mode: PartitionMode,
     /// Execution metrics
@@ -362,9 +360,9 @@ pub struct HashJoinExec {
 struct HashJoinExecDynamicFilter {
     /// Dynamic filter that we'll update with the results of the build side once that is done.
     filter: Arc<DynamicFilterPhysicalExpr>,
-    /// Bounds accumulator to keep track of the min/max bounds on the join keys for each partition.
+    /// Build accumulator to collect build-side information (hash maps and/or bounds) from each partition.
     /// It is lazily initialized during execution to make sure we use the actual execution time partition counts.
-    bounds_accumulator: OnceLock<Arc<SharedBoundsAccumulator>>,
+    build_accumulator: OnceLock<Arc<SharedBuildAccumulator>>,
 }
 
 impl fmt::Debug for HashJoinExec {
@@ -400,7 +398,7 @@ impl HashJoinExec {
     ///
     /// # Error
     /// This function errors when it is not possible to join the left and right sides on keys `on`.
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub fn try_new(
         left: Arc<dyn ExecutionPlan>,
         right: Arc<dyn ExecutionPlan>,
@@ -432,7 +430,7 @@ impl HashJoinExec {
         let cache = Self::compute_properties(
             &left,
             &right,
-            Arc::clone(&join_schema),
+            &join_schema,
             *join_type,
             &on,
             partition_mode,
@@ -563,7 +561,7 @@ impl HashJoinExec {
     fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
-        schema: SchemaRef,
+        schema: &SchemaRef,
         join_type: JoinType,
         on: JoinOnRef,
         mode: PartitionMode,
@@ -574,7 +572,7 @@ impl HashJoinExec {
             left.equivalence_properties().clone(),
             right.equivalence_properties().clone(),
             &join_type,
-            Arc::clone(&schema),
+            Arc::clone(schema),
             &Self::maintains_input_order(join_type),
             Some(Self::probe_side()),
             on,
@@ -618,9 +616,8 @@ impl HashJoinExec {
         // If contains projection, update the PlanProperties.
         if let Some(projection) = projection {
             // construct a map from the input expressions to the output expression of the Projection
-            let projection_mapping =
-                ProjectionMapping::from_indices(projection, &schema)?;
-            let out_schema = project_schema(&schema, Some(projection))?;
+            let projection_mapping = ProjectionMapping::from_indices(projection, schema)?;
+            let out_schema = project_schema(schema, Some(projection))?;
             output_partitioning =
                 output_partitioning.project(&projection_mapping, &eq_properties);
             eq_properties = eq_properties.project(&projection_mapping, out_schema);
@@ -865,7 +862,7 @@ impl ExecutionPlan for HashJoinExec {
             cache: Self::compute_properties(
                 &children[0],
                 &children[1],
-                Arc::clone(&self.join_schema),
+                &self.join_schema,
                 self.join_type,
                 &self.on,
                 self.mode,
@@ -911,20 +908,18 @@ impl ExecutionPlan for HashJoinExec {
         let left_partitions = self.left.output_partitioning().partition_count();
         let right_partitions = self.right.output_partitioning().partition_count();
 
-        if self.mode == PartitionMode::Partitioned && left_partitions != right_partitions
-        {
-            return internal_err!(
-                "Invalid HashJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
-                 consider using RepartitionExec"
-            );
-        }
+        assert_or_internal_err!(
+            self.mode != PartitionMode::Partitioned
+                || left_partitions == right_partitions,
+            "Invalid HashJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
+             consider using RepartitionExec"
+        );
 
-        if self.mode == PartitionMode::CollectLeft && left_partitions != 1 {
-            return internal_err!(
-                "Invalid HashJoinExec, the output partition count of the left child must be 1 in CollectLeft mode,\
-                 consider using CoalescePartitionsExec or the EnforceDistribution rule"
-            );
-        }
+        assert_or_internal_err!(
+            self.mode != PartitionMode::CollectLeft || left_partitions == 1,
+            "Invalid HashJoinExec, the output partition count of the left child must be 1 in CollectLeft mode,\
+             consider using CoalescePartitionsExec or the EnforceDistribution rule"
+        );
 
         let enable_dynamic_filter_pushdown = self.dynamic_filter.is_some();
 
@@ -937,7 +932,7 @@ impl ExecutionPlan for HashJoinExec {
                     MemoryConsumer::new("HashJoinInput").register(context.memory_pool());
 
                 Ok(collect_left_input(
-                    self.random_state.clone(),
+                    self.random_state.random_state().clone(),
                     left_stream,
                     on_left.clone(),
                     join_metrics.clone(),
@@ -945,6 +940,16 @@ impl ExecutionPlan for HashJoinExec {
                     need_produce_result_in_final(self.join_type),
                     self.right().output_partitioning().partition_count(),
                     enable_dynamic_filter_pushdown,
+                    context
+                        .session_config()
+                        .options()
+                        .optimizer
+                        .hash_join_inlist_pushdown_max_size,
+                    context
+                        .session_config()
+                        .options()
+                        .optimizer
+                        .hash_join_inlist_pushdown_max_distinct_values,
                 ))
             })?,
             PartitionMode::Partitioned => {
@@ -955,7 +960,7 @@ impl ExecutionPlan for HashJoinExec {
                         .register(context.memory_pool());
 
                 OnceFut::new(collect_left_input(
-                    self.random_state.clone(),
+                    self.random_state.random_state().clone(),
                     left_stream,
                     on_left.clone(),
                     join_metrics.clone(),
@@ -963,6 +968,16 @@ impl ExecutionPlan for HashJoinExec {
                     need_produce_result_in_final(self.join_type),
                     1,
                     enable_dynamic_filter_pushdown,
+                    context
+                        .session_config()
+                        .options()
+                        .optimizer
+                        .hash_join_inlist_pushdown_max_size,
+                    context
+                        .session_config()
+                        .options()
+                        .optimizer
+                        .hash_join_inlist_pushdown_max_distinct_values,
                 ))
             }
             PartitionMode::Auto => {
@@ -975,8 +990,10 @@ impl ExecutionPlan for HashJoinExec {
 
         let batch_size = context.session_config().batch_size();
 
-        // Initialize bounds_accumulator lazily with runtime partition counts (only if enabled)
-        let bounds_accumulator = enable_dynamic_filter_pushdown
+        // Initialize build_accumulator lazily with runtime partition counts (only if enabled)
+        // Use RepartitionExec's random state (seeds: 0,0,0,0) for partition routing
+        let repartition_random_state = REPARTITION_RANDOM_STATE;
+        let build_accumulator = enable_dynamic_filter_pushdown
             .then(|| {
                 self.dynamic_filter.as_ref().map(|df| {
                     let filter = Arc::clone(&df.filter);
@@ -985,13 +1002,14 @@ impl ExecutionPlan for HashJoinExec {
                         .iter()
                         .map(|(_, right_expr)| Arc::clone(right_expr))
                         .collect::<Vec<_>>();
-                    Some(Arc::clone(df.bounds_accumulator.get_or_init(|| {
-                        Arc::new(SharedBoundsAccumulator::new_from_partition_mode(
+                    Some(Arc::clone(df.build_accumulator.get_or_init(|| {
+                        Arc::new(SharedBuildAccumulator::new_from_partition_mode(
                             self.mode,
                             self.left.as_ref(),
                             self.right.as_ref(),
                             filter,
                             on_right,
+                            repartition_random_state,
                         ))
                     })))
                 })
@@ -1025,7 +1043,7 @@ impl ExecutionPlan for HashJoinExec {
             self.filter.clone(),
             self.join_type,
             right_stream,
-            self.random_state.clone(),
+            self.random_state.random_state().clone(),
             join_metrics,
             column_indices_after_projection,
             self.null_equality,
@@ -1034,7 +1052,7 @@ impl ExecutionPlan for HashJoinExec {
             batch_size,
             vec![],
             self.right.output_ordering().is_some(),
-            bounds_accumulator,
+            build_accumulator,
             self.mode,
         )))
     }
@@ -1057,7 +1075,7 @@ impl ExecutionPlan for HashJoinExec {
         let stats = estimate_join_statistics(
             self.left.partition_statistics(None)?,
             self.right.partition_statistics(None)?,
-            self.on.clone(),
+            &self.on,
             &self.join_type,
             &self.join_schema,
         )?;
@@ -1077,6 +1095,7 @@ impl ExecutionPlan for HashJoinExec {
             return Ok(None);
         }
 
+        let schema = self.schema();
         if let Some(JoinData {
             projected_left_child,
             projected_right_child,
@@ -1087,7 +1106,7 @@ impl ExecutionPlan for HashJoinExec {
             self.left(),
             self.right(),
             self.on(),
-            self.schema(),
+            &schema,
             self.filter(),
         )? {
             Ok(Some(Arc::new(HashJoinExec::try_new(
@@ -1169,7 +1188,7 @@ impl ExecutionPlan for HashJoinExec {
         let mut result = FilterPushdownPropagation::if_any(child_pushdown_result.clone());
         assert_eq!(child_pushdown_result.self_filters.len(), 2); // Should always be 2, we have 2 children
         let right_child_self_filters = &child_pushdown_result.self_filters[1]; // We only push down filters to the right child
-                                                                               // We expect 0 or 1 self filters
+        // We expect 0 or 1 self filters
         if let Some(filter) = right_child_self_filters.first() {
             // Note that we don't check PushdDownPredicate::discrimnant because even if nothing said
             // "yes, I can fully evaluate this filter" things might still use it for statistics -> it's worth updating
@@ -1195,7 +1214,7 @@ impl ExecutionPlan for HashJoinExec {
                     cache: self.cache.clone(),
                     dynamic_filter: Some(HashJoinExecDynamicFilter {
                         filter: dynamic_filter,
-                        bounds_accumulator: OnceLock::new(),
+                        build_accumulator: OnceLock::new(),
                     }),
                 });
                 result = result.with_updated_node(new_node as Arc<dyn ExecutionPlan>);
@@ -1301,20 +1320,18 @@ impl BuildSideState {
         reservation: MemoryReservation,
         on_left: Vec<Arc<dyn PhysicalExpr>>,
         schema: &SchemaRef,
-        should_compute_bounds: bool,
+        should_compute_dynamic_filters: bool,
     ) -> Result<Self> {
         Ok(Self {
             batches: Vec::new(),
             num_rows: 0,
             metrics,
             reservation,
-            bounds_accumulators: should_compute_bounds
+            bounds_accumulators: should_compute_dynamic_filters
                 .then(|| {
                     on_left
-                        .iter()
-                        .map(|expr| {
-                            CollectLeftAccumulator::try_new(Arc::clone(expr), schema)
-                        })
+                        .into_iter()
+                        .map(|expr| CollectLeftAccumulator::try_new(expr, schema))
                         .collect::<Result<Vec<_>>>()
                 })
                 .transpose()?,
@@ -1338,19 +1355,19 @@ impl BuildSideState {
 /// * `reservation` - Memory reservation tracker for the hash table and data
 /// * `with_visited_indices_bitmap` - Whether to track visited indices (for outer joins)
 /// * `probe_threads_count` - Number of threads that will probe this hash table
-/// * `should_compute_bounds` - Whether to compute min/max bounds for dynamic filtering
+/// * `should_compute_dynamic_filters` - Whether to compute min/max bounds for dynamic filtering
 ///
 /// # Dynamic Filter Coordination
-/// When `should_compute_bounds` is true, this function computes the min/max bounds
+/// When `should_compute_dynamic_filters` is true, this function computes the min/max bounds
 /// for each join key column but does NOT update the dynamic filter. Instead, the
 /// bounds are stored in the returned `JoinLeftData` and later coordinated by
-/// `SharedBoundsAccumulator` to ensure all partitions contribute their bounds
+/// `SharedBuildAccumulator` to ensure all partitions contribute their bounds
 /// before updating the filter exactly once.
 ///
 /// # Returns
 /// `JoinLeftData` containing the hash map, consolidated batch, join key values,
 /// visited indices bitmap, and computed bounds (if requested).
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 async fn collect_left_input(
     random_state: RandomState,
     left_stream: SendableRecordBatchStream,
@@ -1359,7 +1376,9 @@ async fn collect_left_input(
     reservation: MemoryReservation,
     with_visited_indices_bitmap: bool,
     probe_threads_count: usize,
-    should_compute_bounds: bool,
+    should_compute_dynamic_filters: bool,
+    max_inlist_size: usize,
+    max_inlist_distinct_values: usize,
 ) -> Result<JoinLeftData> {
     let schema = left_stream.schema();
 
@@ -1371,7 +1390,7 @@ async fn collect_left_input(
         reservation,
         on_left.clone(),
         &schema,
-        should_compute_bounds,
+        should_compute_dynamic_filters,
     )?;
 
     let state = left_stream
@@ -1415,6 +1434,7 @@ async fn collect_left_input(
 
     // Use `u32` indices for the JoinHashMap when num_rows ≤ u32::MAX, otherwise use the
     // `u64` indice variant
+    // Arc is used instead of Box to allow sharing with SharedBuildAccumulator for hash map pushdown
     let mut hashmap: Box<dyn JoinHashMapType> = if num_rows > u32::MAX as usize {
         let estimated_hashtable_size =
             estimate_memory_size::<(u64, u64)>(num_rows, fixed_size_u64)?;
@@ -1450,28 +1470,22 @@ async fn collect_left_input(
         offset += batch.num_rows();
     }
     // Merge all batches into a single batch, so we can directly index into the arrays
-    let single_batch = concat_batches(&schema, batches_iter)?;
+    let batch = concat_batches(&schema, batches_iter)?;
 
     // Reserve additional memory for visited indices bitmap and create shared builder
     let visited_indices_bitmap = if with_visited_indices_bitmap {
-        let bitmap_size = bit_util::ceil(single_batch.num_rows(), 8);
+        let bitmap_size = bit_util::ceil(batch.num_rows(), 8);
         reservation.try_grow(bitmap_size)?;
         metrics.build_mem_used.add(bitmap_size);
 
-        let mut bitmap_buffer = BooleanBufferBuilder::new(single_batch.num_rows());
+        let mut bitmap_buffer = BooleanBufferBuilder::new(batch.num_rows());
         bitmap_buffer.append_n(num_rows, false);
         bitmap_buffer
     } else {
         BooleanBufferBuilder::new(0)
     };
 
-    let left_values = on_left
-        .iter()
-        .map(|c| {
-            c.evaluate(&single_batch)?
-                .into_array(single_batch.num_rows())
-        })
-        .collect::<Result<Vec<_>>>()?;
+    let left_values = evaluate_expressions_to_arrays(&on_left, &batch)?;
 
     // Compute bounds for dynamic filter if enabled
     let bounds = match bounds_accumulators {
@@ -1480,20 +1494,47 @@ async fn collect_left_input(
                 .into_iter()
                 .map(CollectLeftAccumulator::evaluate)
                 .collect::<Result<Vec<_>>>()?;
-            Some(bounds)
+            Some(PartitionBounds::new(bounds))
         }
         _ => None,
     };
 
-    let data = JoinLeftData::new(
-        hashmap,
-        single_batch,
-        left_values.clone(),
-        Mutex::new(visited_indices_bitmap),
-        AtomicUsize::new(probe_threads_count),
-        reservation,
+    // Convert Box to Arc for sharing with SharedBuildAccumulator
+    let hash_map: Arc<dyn JoinHashMapType> = hashmap.into();
+
+    let membership = if num_rows == 0 {
+        PushdownStrategy::Empty
+    } else {
+        // If the build side is small enough we can use IN list pushdown.
+        // If it's too big we fall back to pushing down a reference to the hash table.
+        // See `PushdownStrategy` for more details.
+        let estimated_size = left_values
+            .iter()
+            .map(|arr| arr.get_array_memory_size())
+            .sum::<usize>();
+        if left_values.is_empty()
+            || left_values[0].is_empty()
+            || estimated_size > max_inlist_size
+            || hash_map.len() > max_inlist_distinct_values
+        {
+            PushdownStrategy::HashTable(Arc::clone(&hash_map))
+        } else if let Some(in_list_values) = build_struct_inlist_values(&left_values)? {
+            PushdownStrategy::InList(in_list_values)
+        } else {
+            PushdownStrategy::HashTable(Arc::clone(&hash_map))
+        }
+    };
+
+    let data = JoinLeftData {
+        hash_map,
+        batch,
+        values: left_values,
+        visited_indices_bitmap: Mutex::new(visited_indices_bitmap),
+        probe_threads_counter: AtomicUsize::new(probe_threads_count),
+        _reservation: reservation,
         bounds,
-    );
+        membership,
+    };
 
     Ok(data)
 }
@@ -1503,7 +1544,7 @@ mod tests {
     use super::*;
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::joins::hash_join::stream::lookup_join_hashmap;
-    use crate::test::{assert_join_metrics, TestMemoryExec};
+    use crate::test::{TestMemoryExec, assert_join_metrics};
     use crate::{
         common, expressions::Column, repartition::RepartitionExec, test::build_table_i32,
         test::exec::MockExec,
@@ -1516,14 +1557,14 @@ mod tests {
     use datafusion_common::hash_utils::create_hashes;
     use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
     use datafusion_common::{
-        assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err,
-        ScalarValue,
+        ScalarValue, assert_batches_eq, assert_batches_sorted_eq, assert_contains,
+        exec_err, internal_err,
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
     use datafusion_physical_expr::PhysicalExpr;
+    use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
     use hashbrown::HashTable;
     use insta::{allow_duplicates, assert_snapshot};
     use rstest::*;
@@ -1652,7 +1693,7 @@ mod tests {
                 Partitioning::Hash(left_expr, partition_count),
             )?),
             PartitionMode::Auto => {
-                return internal_err!("Unexpected PartitionMode::Auto in join tests")
+                return internal_err!("Unexpected PartitionMode::Auto in join tests");
             }
         };
 
@@ -1673,7 +1714,7 @@ mod tests {
                 Partitioning::Hash(right_expr, partition_count),
             )?),
             PartitionMode::Auto => {
-                return internal_err!("Unexpected PartitionMode::Auto in join tests")
+                return internal_err!("Unexpected PartitionMode::Auto in join tests");
             }
         };
 
@@ -1740,15 +1781,15 @@ mod tests {
 
         allow_duplicates! {
             // Inner join output is expected to preserve both inputs order
-            assert_snapshot!(batches_to_string(&batches), @r#"
-                +----+----+----+----+----+----+
-                | a1 | b1 | c1 | a2 | b1 | c2 |
-                +----+----+----+----+----+----+
-                | 1  | 4  | 7  | 10 | 4  | 70 |
-                | 2  | 5  | 8  | 20 | 5  | 80 |
-                | 3  | 5  | 9  | 20 | 5  | 80 |
-                +----+----+----+----+----+----+
-                "#);
+            assert_snapshot!(batches_to_string(&batches), @r"
+            +----+----+----+----+----+----+
+            | a1 | b1 | c1 | a2 | b1 | c2 |
+            +----+----+----+----+----+----+
+            | 1  | 4  | 7  | 10 | 4  | 70 |
+            | 2  | 5  | 8  | 20 | 5  | 80 |
+            | 3  | 5  | 9  | 20 | 5  | 80 |
+            +----+----+----+----+----+----+
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -1788,15 +1829,15 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
-                +----+----+----+----+----+----+
-                | a1 | b1 | c1 | a2 | b1 | c2 |
-                +----+----+----+----+----+----+
-                | 1  | 4  | 7  | 10 | 4  | 70 |
-                | 2  | 5  | 8  | 20 | 5  | 80 |
-                | 3  | 5  | 9  | 20 | 5  | 80 |
-                +----+----+----+----+----+----+
-                "#);
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
+            +----+----+----+----+----+----+
+            | a1 | b1 | c1 | a2 | b1 | c2 |
+            +----+----+----+----+----+----+
+            | 1  | 4  | 7  | 10 | 4  | 70 |
+            | 2  | 5  | 8  | 20 | 5  | 80 |
+            | 3  | 5  | 9  | 20 | 5  | 80 |
+            +----+----+----+----+----+----+
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -1836,7 +1877,7 @@ mod tests {
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -1844,7 +1885,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 5  | 9  | 20 | 5  | 80 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -1884,7 +1925,7 @@ mod tests {
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -1893,7 +1934,7 @@ mod tests {
             | 0  | 4  | 6  | 10 | 4  | 70 |
             | 1  | 4  | 7  | 10 | 4  | 70 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 4);
@@ -1952,11 +1993,16 @@ mod tests {
             div_ceil(9, batch_size)
         };
 
-        assert_eq!(batches.len(), expected_batch_count);
+        // With batch coalescing, we may have fewer batches than expected
+        assert!(
+            batches.len() <= expected_batch_count,
+            "expected at most {expected_batch_count} batches, got {}",
+            batches.len()
+        );
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b2 | c1 | a1 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -1964,7 +2010,7 @@ mod tests {
             | 2  | 2  | 8  | 2  | 2  | 80 |
             | 2  | 2  | 9  | 2  | 2  | 80 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -2032,11 +2078,16 @@ mod tests {
             div_ceil(9, batch_size)
         };
 
-        assert_eq!(batches.len(), expected_batch_count);
+        // With batch coalescing, we may have fewer batches than expected
+        assert!(
+            batches.len() <= expected_batch_count,
+            "expected at most {expected_batch_count} batches, got {}",
+            batches.len()
+        );
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b2 | c1 | a1 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -2044,7 +2095,7 @@ mod tests {
             | 2  | 2  | 8  | 2  | 2  | 80 |
             | 2  | 2  | 9  | 2  | 2  | 80 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -2095,7 +2146,7 @@ mod tests {
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -2104,7 +2155,7 @@ mod tests {
             | 0  | 4  | 6  | 10 | 4  | 70 |
             | 1  | 4  | 7  | 10 | 4  | 70 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 4);
@@ -2168,17 +2219,22 @@ mod tests {
             // and filtered later.
             div_ceil(6, batch_size)
         };
-        assert_eq!(batches.len(), expected_batch_count);
+        // With batch coalescing, we may have fewer batches than expected
+        assert!(
+            batches.len() <= expected_batch_count,
+            "expected at most {expected_batch_count} batches, got {}",
+            batches.len()
+        );
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
             | 1  | 4  | 7  | 10 | 4  | 70 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         // second part
@@ -2193,18 +2249,23 @@ mod tests {
             // and filtered later.
             div_ceil(3, batch_size)
         };
-        assert_eq!(batches.len(), expected_batch_count);
+        // With batch coalescing, we may have fewer batches than expected
+        assert!(
+            batches.len() <= expected_batch_count,
+            "expected at most {expected_batch_count} batches, got {}",
+            batches.len()
+        );
 
         // Inner join output is expected to preserve both inputs order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
             | 2  | 5  | 8  | 30 | 5  | 90 |
             | 3  | 5  | 9  | 30 | 5  | 90 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2255,7 +2316,7 @@ mod tests {
         let batches = common::collect(stream).await.unwrap();
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -2265,7 +2326,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
     }
 
@@ -2305,7 +2366,7 @@ mod tests {
         let batches = common::collect(stream).await.unwrap();
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -2317,7 +2378,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
     }
 
@@ -2353,7 +2414,7 @@ mod tests {
         let batches = common::collect(stream).await.unwrap();
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -2361,7 +2422,7 @@ mod tests {
             | 2  | 5  | 8  |    |    |    |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
     }
 
@@ -2397,7 +2458,7 @@ mod tests {
         let batches = common::collect(stream).await.unwrap();
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -2405,7 +2466,7 @@ mod tests {
             | 2  | 5  | 8  |    |    |    |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
     }
 
@@ -2441,7 +2502,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -2449,7 +2510,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -2489,7 +2550,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -2497,7 +2558,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -2553,7 +2614,7 @@ mod tests {
 
         // ignore the order
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+-----+
             | a1 | b1 | c1  |
             +----+----+-----+
@@ -2561,7 +2622,7 @@ mod tests {
             | 13 | 10 | 130 |
             | 9  | 8  | 90  |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2654,13 +2715,13 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+-----+
             | a1 | b1 | c1  |
             +----+----+-----+
             | 13 | 10 | 130 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2695,7 +2756,7 @@ mod tests {
 
         // RightSemi join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
@@ -2703,7 +2764,7 @@ mod tests {
             | 12 | 10 | 40  |
             | 10 | 10 | 100 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2758,7 +2819,7 @@ mod tests {
 
         // RightSemi join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
@@ -2766,7 +2827,7 @@ mod tests {
             | 12 | 10 | 40  |
             | 10 | 10 | 100 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         // left_table right semi join right_table on left_table.b1 = right_table.b2 on left_table.a1!=9
@@ -2795,14 +2856,14 @@ mod tests {
 
         // RightSemi join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
             | 12 | 10 | 40  |
             | 10 | 10 | 100 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2835,7 +2896,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+
             | a1 | b1 | c1 |
             +----+----+----+
@@ -2844,7 +2905,7 @@ mod tests {
             | 5  | 5  | 50 |
             | 7  | 7  | 70 |
             +----+----+----+
-                "#);
+            ");
         }
         Ok(())
     }
@@ -2895,7 +2956,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+-----+
             | a1 | b1 | c1  |
             +----+----+-----+
@@ -2906,7 +2967,7 @@ mod tests {
             | 7  | 7  | 70  |
             | 9  | 8  | 90  |
             +----+----+-----+
-                "#);
+            ");
         }
 
         // left_table left anti join right_table on left_table.b1 = right_table.b2 and right_table.a2 != 13
@@ -2938,7 +2999,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+-----+
             | a1 | b1 | c1  |
             +----+----+-----+
@@ -2949,7 +3010,7 @@ mod tests {
             | 7  | 7  | 70  |
             | 9  | 8  | 90  |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -2982,7 +3043,7 @@ mod tests {
 
         // RightAnti join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
@@ -2990,7 +3051,7 @@ mod tests {
             | 2  | 2  | 80  |
             | 4  | 4  | 120 |
             +----+----+-----+
-                "#);
+            ");
         }
         Ok(())
     }
@@ -3043,7 +3104,7 @@ mod tests {
 
         // RightAnti join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
@@ -3053,7 +3114,7 @@ mod tests {
             | 10 | 10 | 100 |
             | 4  | 4  | 120 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         // left_table right anti join right_table on left_table.b1 = right_table.b2 and right_table.b2!=8
@@ -3090,7 +3151,7 @@ mod tests {
 
         // RightAnti join output is expected to preserve right input order
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +----+----+-----+
             | a2 | b2 | c2  |
             +----+----+-----+
@@ -3099,7 +3160,7 @@ mod tests {
             | 2  | 2  | 80  |
             | 4  | 4  | 120 |
             +----+----+-----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -3137,7 +3198,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -3145,7 +3206,7 @@ mod tests {
             | 1  | 4  | 7  | 10 | 4  | 70 |
             | 2  | 5  | 8  | 20 | 5  | 80 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -3185,7 +3246,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b1 | c2 |
             +----+----+----+----+----+----+
@@ -3193,7 +3254,7 @@ mod tests {
             | 1  | 4  | 7  | 10 | 4  | 70 |
             | 2  | 5  | 8  | 20 | 5  | 80 |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -3235,7 +3296,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+----+----+----+
             | a1 | b1 | c1 | a2 | b2 | c2 |
             +----+----+----+----+----+----+
@@ -3244,7 +3305,7 @@ mod tests {
             | 2  | 5  | 8  | 20 | 5  | 80 |
             | 3  | 7  | 9  |    |    |    |
             +----+----+----+----+----+----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -3282,7 +3343,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+-------+
             | a1 | b1 | c1 | mark  |
             +----+----+----+-------+
@@ -3290,7 +3351,7 @@ mod tests {
             | 2  | 5  | 8  | true  |
             | 3  | 7  | 9  | false |
             +----+----+----+-------+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -3330,7 +3391,7 @@ mod tests {
         assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +----+----+----+-------+
             | a1 | b1 | c1 | mark  |
             +----+----+----+-------+
@@ -3338,7 +3399,7 @@ mod tests {
             | 2  | 5  | 8  | true  |
             | 3  | 7  | 9  | false |
             +----+----+----+-------+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -3483,6 +3544,8 @@ mod tests {
         let mut hashes_buffer = vec![0; right.num_rows()];
         create_hashes([&right_keys_values], &random_state, &mut hashes_buffer)?;
 
+        let mut probe_indices_buffer = Vec::new();
+        let mut build_indices_buffer = Vec::new();
         let (l, r, _) = lookup_join_hashmap(
             &join_hash_map,
             &[left_keys_values],
@@ -3491,6 +3554,8 @@ mod tests {
             &hashes_buffer,
             8192,
             (0, None),
+            &mut probe_indices_buffer,
+            &mut build_indices_buffer,
         )?;
 
         let left_ids: UInt64Array = vec![0, 1].into();
@@ -3540,6 +3605,8 @@ mod tests {
         let mut hashes_buffer = vec![0; right.num_rows()];
         create_hashes([&right_keys_values], &random_state, &mut hashes_buffer)?;
 
+        let mut probe_indices_buffer = Vec::new();
+        let mut build_indices_buffer = Vec::new();
         let (l, r, _) = lookup_join_hashmap(
             &join_hash_map,
             &[left_keys_values],
@@ -3548,6 +3615,8 @@ mod tests {
             &hashes_buffer,
             8192,
             (0, None),
+            &mut probe_indices_buffer,
+            &mut build_indices_buffer,
         )?;
 
         // We still expect to match rows 0 and 1 on both sides
@@ -3594,14 +3663,14 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +---+---+---+----+---+----+
             | a | b | c | a  | b | c  |
             +---+---+---+----+---+----+
             | 1 | 4 | 7 | 10 | 1 | 70 |
             | 2 | 5 | 8 | 20 | 2 | 80 |
             +---+---+---+----+---+----+
-                "#);
+            ");
         }
 
         Ok(())
@@ -3671,14 +3740,14 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +---+---+---+----+---+---+
             | a | b | c | a  | b | c |
             +---+---+---+----+---+---+
             | 2 | 7 | 9 | 10 | 2 | 7 |
             | 2 | 7 | 9 | 20 | 2 | 5 |
             +---+---+---+----+---+---+
-                "#);
+            ");
         }
 
         Ok(())
@@ -3720,7 +3789,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +---+---+---+----+---+---+
             | a | b | c | a  | b | c |
             +---+---+---+----+---+---+
@@ -3730,7 +3799,7 @@ mod tests {
             | 2 | 7 | 9 | 20 | 2 | 5 |
             | 2 | 8 | 1 |    |   |   |
             +---+---+---+----+---+---+
-                "#);
+            ");
         }
 
         Ok(())
@@ -3772,7 +3841,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +---+---+---+----+---+---+
             | a | b | c | a  | b | c |
             +---+---+---+----+---+---+
@@ -3781,7 +3850,7 @@ mod tests {
             | 2 | 7 | 9 | 10 | 2 | 7 |
             | 2 | 7 | 9 | 20 | 2 | 5 |
             +---+---+---+----+---+---+
-                "#);
+            ");
         }
 
         Ok(())
@@ -4027,7 +4096,7 @@ mod tests {
         let batches = common::collect(stream).await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches), @r"
             +------------+---+------------+---+
             | date       | n | date       | n |
             +------------+---+------------+---+
@@ -4035,7 +4104,7 @@ mod tests {
             | 2022-04-26 | 2 | 2022-04-26 | 5 |
             | 2022-04-27 | 3 | 2022-04-27 | 6 |
             +------------+---+------------+---+
-                "#);
+            ");
         }
 
         Ok(())
@@ -4213,10 +4282,11 @@ mod tests {
                     }
                     _ => div_ceil(expected_resultset_records, batch_size) + 1,
                 };
-                assert_eq!(
-                    batches.len(),
-                    expected_batch_count,
-                    "expected {expected_batch_count} output batches for {join_type} join with batch_size = {batch_size}"
+                // With batch coalescing, we may have fewer batches than expected
+                assert!(
+                    batches.len() <= expected_batch_count,
+                    "expected at most {expected_batch_count} output batches for {join_type} join with batch_size = {batch_size}, got {}",
+                    batches.len()
                 );
 
                 let expected = match join_type {
@@ -4226,7 +4296,17 @@ mod tests {
                     JoinType::LeftAnti => left_empty.to_vec(),
                     _ => common_result.to_vec(),
                 };
-                assert_batches_eq!(expected, &batches);
+                // For anti joins with empty results, we may get zero batches
+                // (with coalescing) instead of one empty batch with schema
+                if batches.is_empty() {
+                    // Verify this is an expected empty result case
+                    assert!(
+                        matches!(join_type, JoinType::RightAnti | JoinType::LeftAnti),
+                        "Unexpected empty result for {join_type} join"
+                    );
+                } else {
+                    assert_batches_eq!(expected, &batches);
+                }
             }
         }
     }
@@ -4364,7 +4444,6 @@ mod tests {
             assert_contains!(
                 err.to_string(),
                 "Resources exhausted: Additional allocation failed for HashJoinInput[1] with top memory consumers (across reservations) as:\n  HashJoinInput[1]"
-
             );
 
             assert_contains!(
@@ -4427,7 +4506,7 @@ mod tests {
         assert_eq!(columns, vec!["n1", "n2"]);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&batches), @r#"
+            assert_snapshot!(batches_to_string(&batches), @r"
             +--------+--------+
             | n1     | n2     |
             +--------+--------+
@@ -4435,7 +4514,7 @@ mod tests {
             | {a: 1} | {a: 1} |
             | {a: 2} | {a: 2} |
             +--------+--------+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 3);
@@ -4466,13 +4545,13 @@ mod tests {
         .await?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_sort_string(&batches_null_eq), @r#"
+            assert_snapshot!(batches_to_sort_string(&batches_null_eq), @r"
             +----+----+
             | n1 | n2 |
             +----+----+
             |    |    |
             +----+----+
-                "#);
+            ");
         }
 
         assert_join_metrics!(metrics, 1);
@@ -4489,9 +4568,15 @@ mod tests {
 
         assert_join_metrics!(metrics, 0);
 
-        let expected_null_neq =
-            ["+----+----+", "| n1 | n2 |", "+----+----+", "+----+----+"];
-        assert_batches_eq!(expected_null_neq, &batches_null_neq);
+        // With batch coalescing, empty results may not emit any batches
+        // Check that either we have no batches, or an empty batch with proper schema
+        if batches_null_neq.is_empty() {
+            // This is fine - no output rows
+        } else {
+            let expected_null_neq =
+                ["+----+----+", "| n1 | n2 |", "+----+----+", "+----+----+"];
+            assert_batches_eq!(expected_null_neq, &batches_null_neq);
+        }
 
         Ok(())
     }
@@ -4500,4 +4585,103 @@ mod tests {
     fn columns(schema: &Schema) -> Vec<String> {
         schema.fields().iter().map(|f| f.name().clone()).collect()
     }
+
+    /// This test verifies that the dynamic filter is marked as complete after HashJoinExec finishes building the hash table.
+    #[tokio::test]
+    async fn test_hash_join_marks_filter_complete() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 5, 6]),
+            ("c1", &vec![7, 8, 9]),
+        );
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![4, 5, 6]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        )];
+
+        // Create a dynamic filter manually
+        let dynamic_filter = HashJoinExec::create_dynamic_filter(&on);
+        let dynamic_filter_clone = Arc::clone(&dynamic_filter);
+
+        // Create HashJoinExec with the dynamic filter
+        let mut join = HashJoinExec::try_new(
+            left,
+            right,
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::CollectLeft,
+            NullEquality::NullEqualsNothing,
+        )?;
+        join.dynamic_filter = Some(HashJoinExecDynamicFilter {
+            filter: dynamic_filter,
+            build_accumulator: OnceLock::new(),
+        });
+
+        // Execute the join
+        let stream = join.execute(0, task_ctx)?;
+        let _batches = common::collect(stream).await?;
+
+        // After the join completes, the dynamic filter should be marked as complete
+        // wait_complete() should return immediately
+        dynamic_filter_clone.wait_complete().await;
+
+        Ok(())
+    }
+
+    /// This test verifies that the dynamic filter is marked as complete even when the build side is empty.
+    #[tokio::test]
+    async fn test_hash_join_marks_filter_complete_empty_build_side() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+        // Empty left side (build side)
+        let left = build_table(("a1", &vec![]), ("b1", &vec![]), ("c1", &vec![]));
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![4, 5, 6]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        )];
+
+        // Create a dynamic filter manually
+        let dynamic_filter = HashJoinExec::create_dynamic_filter(&on);
+        let dynamic_filter_clone = Arc::clone(&dynamic_filter);
+
+        // Create HashJoinExec with the dynamic filter
+        let mut join = HashJoinExec::try_new(
+            left,
+            right,
+            on,
+            None,
+            &JoinType::Inner,
+            None,
+            PartitionMode::CollectLeft,
+            NullEquality::NullEqualsNothing,
+        )?;
+        join.dynamic_filter = Some(HashJoinExecDynamicFilter {
+            filter: dynamic_filter,
+            build_accumulator: OnceLock::new(),
+        });
+
+        // Execute the join
+        let stream = join.execute(0, task_ctx)?;
+        let _batches = common::collect(stream).await?;
+
+        // Even with empty build side, the dynamic filter should be marked as complete
+        // wait_complete() should return immediately
+        dynamic_filter_clone.wait_complete().await;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs
new file mode 100644
index 0000000000000..7dccc5b0ba7c2
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/hash_join/inlist_builder.rs
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utilities for building InList expressions from hash join build side data
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, StructArray};
+use arrow::datatypes::{Field, FieldRef, Fields};
+use arrow::downcast_dictionary_array;
+use arrow_schema::DataType;
+use datafusion_common::Result;
+
+pub(super) fn build_struct_fields(data_types: &[DataType]) -> Result<Fields> {
+    data_types
+        .iter()
+        .enumerate()
+        .map(|(i, dt)| Ok(Field::new(format!("c{i}"), dt.clone(), true)))
+        .collect()
+}
+
+/// Flattens dictionary-encoded arrays to their underlying value arrays.
+/// Non-dictionary arrays are returned as-is.
+fn flatten_dictionary_array(array: &ArrayRef) -> ArrayRef {
+    downcast_dictionary_array! {
+        array => {
+            // Recursively flatten in case of nested dictionaries
+            flatten_dictionary_array(array.values())
+        }
+        _ => Arc::clone(array)
+    }
+}
+
+/// Builds InList values from join key column arrays.
+///
+/// If `join_key_arrays` is:
+/// 1. A single array, let's say Int32, this will produce a flat
+///    InList expression where the lookup is expected to be scalar Int32 values,
+///    that is: this will produce `IN LIST (1, 2, 3)` expected to be used as `2 IN LIST (1, 2, 3)`.
+/// 2. An Int32 array and a Utf8 array, this will produce a Struct InList expression
+///    where the lookup is expected to be Struct values with two fields (Int32, Utf8),
+///    that is: this will produce `IN LIST ((1, "a"), (2, "b"))` expected to be used as `(2, "b") IN LIST ((1, "a"), (2, "b"))`.
+///    The field names of the struct are auto-generated as "c0", "c1", ... and should match the struct expression used in the join keys.
+///
+/// Note that this function does not deduplicate values - deduplication will happen later
+/// when building an InList expression from this array via `InListExpr::try_new_from_array`.
+///
+/// Returns `None` if the estimated size exceeds `max_size_bytes` or if the number of rows
+/// exceeds `max_distinct_values`.
+pub(super) fn build_struct_inlist_values(
+    join_key_arrays: &[ArrayRef],
+) -> Result<Option<ArrayRef>> {
+    // Flatten any dictionary-encoded arrays
+    let flattened_arrays: Vec<ArrayRef> = join_key_arrays
+        .iter()
+        .map(flatten_dictionary_array)
+        .collect();
+
+    // Build the source array/struct
+    let source_array: ArrayRef = if flattened_arrays.len() == 1 {
+        // Single column: use directly
+        Arc::clone(&flattened_arrays[0])
+    } else {
+        // Multi-column: build StructArray once from all columns
+        let fields = build_struct_fields(
+            &flattened_arrays
+                .iter()
+                .map(|arr| arr.data_type().clone())
+                .collect::<Vec<_>>(),
+        )?;
+
+        // Build field references with proper Arc wrapping
+        let arrays_with_fields: Vec<(FieldRef, ArrayRef)> = fields
+            .iter()
+            .cloned()
+            .zip(flattened_arrays.iter().cloned())
+            .collect();
+
+        Arc::new(StructArray::from(arrays_with_fields))
+    };
+
+    Ok(Some(source_array))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::{Int32Array, StringArray};
+    use arrow_schema::DataType;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_build_single_column_inlist_array() {
+        let array = Arc::new(Int32Array::from(vec![1, 2, 3, 2, 1])) as ArrayRef;
+        let result = build_struct_inlist_values(std::slice::from_ref(&array))
+            .unwrap()
+            .unwrap();
+
+        assert!(array.eq(&result));
+    }
+
+    #[test]
+    fn test_build_multi_column_inlist() {
+        let array1 = Arc::new(Int32Array::from(vec![1, 2, 3, 2, 1])) as ArrayRef;
+        let array2 =
+            Arc::new(StringArray::from(vec!["a", "b", "c", "b", "a"])) as ArrayRef;
+
+        let result = build_struct_inlist_values(&[array1, array2])
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(
+            *result.data_type(),
+            DataType::Struct(
+                build_struct_fields(&[DataType::Int32, DataType::Utf8]).unwrap()
+            )
+        );
+    }
+}
diff --git a/datafusion/physical-plan/src/joins/hash_join/mod.rs b/datafusion/physical-plan/src/joins/hash_join/mod.rs
index 7f1e5cae13a3e..8592e1d968535 100644
--- a/datafusion/physical-plan/src/joins/hash_join/mod.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/mod.rs
@@ -18,7 +18,10 @@
 //! [`HashJoinExec`] Partitioned Hash Join Operator
 
 pub use exec::HashJoinExec;
+pub use partitioned_hash_eval::{HashExpr, HashTableLookupExpr, SeededRandomState};
 
 mod exec;
+mod inlist_builder;
+mod partitioned_hash_eval;
 mod shared_bounds;
 mod stream;
diff --git a/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs b/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs
new file mode 100644
index 0000000000000..4c437e813139d
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs
@@ -0,0 +1,627 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Hash computation and hash table lookup expressions for dynamic filtering
+
+use std::{any::Any, fmt::Display, hash::Hash, sync::Arc};
+
+use ahash::RandomState;
+use arrow::{
+    array::{BooleanArray, UInt64Array},
+    buffer::MutableBuffer,
+    datatypes::{DataType, Schema},
+    util::bit_util,
+};
+use datafusion_common::{Result, internal_datafusion_err, internal_err};
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr_common::physical_expr::{
+    DynHash, PhysicalExpr, PhysicalExprRef,
+};
+
+use crate::{hash_utils::create_hashes, joins::utils::JoinHashMapType};
+
+/// RandomState wrapper that preserves the seeds used to create it.
+///
+/// This is needed because ahash's `RandomState` doesn't expose its seeds after creation,
+/// but we need them for serialization (e.g., protobuf serde).
+#[derive(Clone, Debug)]
+pub struct SeededRandomState {
+    random_state: RandomState,
+    seeds: (u64, u64, u64, u64),
+}
+
+impl SeededRandomState {
+    /// Create a new SeededRandomState with the given seeds.
+    pub const fn with_seeds(k0: u64, k1: u64, k2: u64, k3: u64) -> Self {
+        Self {
+            random_state: RandomState::with_seeds(k0, k1, k2, k3),
+            seeds: (k0, k1, k2, k3),
+        }
+    }
+
+    /// Get the inner RandomState.
+    pub fn random_state(&self) -> &RandomState {
+        &self.random_state
+    }
+
+    /// Get the seeds used to create this RandomState.
+    pub fn seeds(&self) -> (u64, u64, u64, u64) {
+        self.seeds
+    }
+}
+
+/// Physical expression that computes hash values for a set of columns
+///
+/// This expression computes the hash of join key columns using a specific RandomState.
+/// It returns a UInt64Array containing the hash values.
+///
+/// This is used for:
+/// - Computing routing hashes (with RepartitionExec's 0,0,0,0 seeds)
+/// - Computing lookup hashes (with HashJoin's 'J','O','I','N' seeds)
+pub struct HashExpr {
+    /// Columns to hash
+    on_columns: Vec<PhysicalExprRef>,
+    /// Random state for hashing (with seeds preserved for serialization)
+    random_state: SeededRandomState,
+    /// Description for display
+    description: String,
+}
+
+impl HashExpr {
+    /// Create a new HashExpr
+    ///
+    /// # Arguments
+    /// * `on_columns` - Columns to hash
+    /// * `random_state` - SeededRandomState for hashing
+    /// * `description` - Description for debugging (e.g., "hash_repartition", "hash_join")
+    pub fn new(
+        on_columns: Vec<PhysicalExprRef>,
+        random_state: SeededRandomState,
+        description: String,
+    ) -> Self {
+        Self {
+            on_columns,
+            random_state,
+            description,
+        }
+    }
+
+    /// Get the columns being hashed.
+    pub fn on_columns(&self) -> &[PhysicalExprRef] {
+        &self.on_columns
+    }
+
+    /// Get the seeds used for hashing.
+    pub fn seeds(&self) -> (u64, u64, u64, u64) {
+        self.random_state.seeds()
+    }
+
+    /// Get the description.
+    pub fn description(&self) -> &str {
+        &self.description
+    }
+}
+
+impl std::fmt::Debug for HashExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let cols = self
+            .on_columns
+            .iter()
+            .map(|e| e.to_string())
+            .collect::<Vec<_>>()
+            .join(", ");
+        let (s1, s2, s3, s4) = self.seeds();
+        write!(f, "{}({cols}, [{s1},{s2},{s3},{s4}])", self.description)
+    }
+}
+
+impl Hash for HashExpr {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.on_columns.dyn_hash(state);
+        self.description.hash(state);
+        self.seeds().hash(state);
+    }
+}
+
+impl PartialEq for HashExpr {
+    fn eq(&self, other: &Self) -> bool {
+        self.on_columns == other.on_columns
+            && self.description == other.description
+            && self.seeds() == other.seeds()
+    }
+}
+
+impl Eq for HashExpr {}
+
+impl Display for HashExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+impl PhysicalExpr for HashExpr {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
+        self.on_columns.iter().collect()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(HashExpr::new(
+            children,
+            self.random_state.clone(),
+            self.description.clone(),
+        )))
+    }
+
+    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
+        Ok(DataType::UInt64)
+    }
+
+    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn evaluate(
+        &self,
+        batch: &arrow::record_batch::RecordBatch,
+    ) -> Result<ColumnarValue> {
+        let num_rows = batch.num_rows();
+
+        // Evaluate columns
+        let keys_values = self
+            .on_columns
+            .iter()
+            .map(|c| c.evaluate(batch)?.into_array(num_rows))
+            .collect::<Result<Vec<_>>>()?;
+
+        // Compute hashes
+        let mut hashes_buffer = vec![0; num_rows];
+        create_hashes(
+            &keys_values,
+            self.random_state.random_state(),
+            &mut hashes_buffer,
+        )?;
+
+        Ok(ColumnarValue::Array(Arc::new(UInt64Array::from(
+            hashes_buffer,
+        ))))
+    }
+
+    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+/// Physical expression that checks if hash values exist in a hash table
+///
+/// Takes a UInt64Array of hash values and checks membership in a hash table.
+/// Returns a BooleanArray indicating which hashes exist.
+pub struct HashTableLookupExpr {
+    /// Expression that computes hash values (should be a HashExpr)
+    hash_expr: PhysicalExprRef,
+    /// Hash table to check against
+    hash_map: Arc<dyn JoinHashMapType>,
+    /// Description for display
+    description: String,
+}
+
+impl HashTableLookupExpr {
+    /// Create a new HashTableLookupExpr
+    ///
+    /// # Arguments
+    /// * `hash_expr` - Expression that computes hash values
+    /// * `hash_map` - Hash table to check membership
+    /// * `description` - Description for debugging
+    ///
+    /// # Note
+    /// This is public for internal testing purposes only and is not
+    /// guaranteed to be stable across versions.
+    pub fn new(
+        hash_expr: PhysicalExprRef,
+        hash_map: Arc<dyn JoinHashMapType>,
+        description: String,
+    ) -> Self {
+        Self {
+            hash_expr,
+            hash_map,
+            description,
+        }
+    }
+}
+
+impl std::fmt::Debug for HashTableLookupExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}({:?})", self.description, self.hash_expr)
+    }
+}
+
+impl Hash for HashTableLookupExpr {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.hash_expr.dyn_hash(state);
+        self.description.hash(state);
+        // Note that we compare hash_map by pointer equality.
+        // Actually comparing the contents of the hash maps would be expensive.
+        // The way these hash maps are used in actuality is that HashJoinExec creates
+        // one per partition per query execution, thus it is never possible for two different
+        // hash maps to have the same content in practice.
+        // Theoretically this is a public API and users could create identical hash maps,
+        // but that seems unlikely and not worth paying the cost of deep comparison all the time.
+        Arc::as_ptr(&self.hash_map).hash(state);
+    }
+}
+
+impl PartialEq for HashTableLookupExpr {
+    fn eq(&self, other: &Self) -> bool {
+        // Note that we compare hash_map by pointer equality.
+        // Actually comparing the contents of the hash maps would be expensive.
+        // The way these hash maps are used in actuality is that HashJoinExec creates
+        // one per partition per query execution, thus it is never possible for two different
+        // hash maps to have the same content in practice.
+        // Theoretically this is a public API and users could create identical hash maps,
+        // but that seems unlikely and not worth paying the cost of deep comparison all the time.
+        self.hash_expr.as_ref() == other.hash_expr.as_ref()
+            && self.description == other.description
+            && Arc::ptr_eq(&self.hash_map, &other.hash_map)
+    }
+}
+
+impl Eq for HashTableLookupExpr {}
+
+impl Display for HashTableLookupExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+impl PhysicalExpr for HashTableLookupExpr {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
+        vec![&self.hash_expr]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        if children.len() != 1 {
+            return internal_err!(
+                "HashTableLookupExpr expects exactly 1 child, got {}",
+                children.len()
+            );
+        }
+        Ok(Arc::new(HashTableLookupExpr::new(
+            Arc::clone(&children[0]),
+            Arc::clone(&self.hash_map),
+            self.description.clone(),
+        )))
+    }
+
+    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
+        Ok(DataType::Boolean)
+    }
+
+    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn evaluate(
+        &self,
+        batch: &arrow::record_batch::RecordBatch,
+    ) -> Result<ColumnarValue> {
+        let num_rows = batch.num_rows();
+
+        // Evaluate hash expression to get hash values
+        let hash_array = self.hash_expr.evaluate(batch)?.into_array(num_rows)?;
+        let hash_array = hash_array.as_any().downcast_ref::<UInt64Array>().ok_or(
+            internal_datafusion_err!(
+                "HashTableLookupExpr expects UInt64Array from hash expression"
+            ),
+        )?;
+
+        // Check each hash against the hash table
+        let mut buf = MutableBuffer::from_len_zeroed(bit_util::ceil(num_rows, 8));
+        for (idx, hash_value) in hash_array.values().iter().enumerate() {
+            // Use get_matched_indices to check - if it returns any indices, the hash exists
+            let (matched_indices, _) = self
+                .hash_map
+                .get_matched_indices(Box::new(std::iter::once((idx, hash_value))), None);
+
+            if !matched_indices.is_empty() {
+                bit_util::set_bit(buf.as_slice_mut(), idx);
+            }
+        }
+
+        Ok(ColumnarValue::Array(Arc::new(
+            BooleanArray::new_from_packed(buf, 0, num_rows),
+        )))
+    }
+
+    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::joins::join_hash_map::JoinHashMapU32;
+    use datafusion_physical_expr::expressions::Column;
+    use std::collections::hash_map::DefaultHasher;
+    use std::hash::Hasher;
+
+    fn compute_hash<T: Hash>(value: &T) -> u64 {
+        let mut hasher = DefaultHasher::new();
+        value.hash(&mut hasher);
+        hasher.finish()
+    }
+
+    #[test]
+    fn test_hash_expr_eq_same() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let col_b: PhysicalExprRef = Arc::new(Column::new("b", 1));
+
+        let expr1 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        let expr2 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        assert_eq!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_expr_eq_different_columns() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let col_b: PhysicalExprRef = Arc::new(Column::new("b", 1));
+        let col_c: PhysicalExprRef = Arc::new(Column::new("c", 2));
+
+        let expr1 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        let expr2 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_c)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_expr_eq_different_description() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+
+        let expr1 = HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "hash_one".to_string(),
+        );
+
+        let expr2 = HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "hash_two".to_string(),
+        );
+
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_expr_eq_different_seeds() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+
+        let expr1 = HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        let expr2 = HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(5, 6, 7, 8),
+            "test_hash".to_string(),
+        );
+
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_expr_hash_consistency() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let col_b: PhysicalExprRef = Arc::new(Column::new("b", 1));
+
+        let expr1 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        let expr2 = HashExpr::new(
+            vec![Arc::clone(&col_a), Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "test_hash".to_string(),
+        );
+
+        // Equal expressions should have equal hashes
+        assert_eq!(expr1, expr2);
+        assert_eq!(compute_hash(&expr1), compute_hash(&expr2));
+    }
+
+    #[test]
+    fn test_hash_table_lookup_expr_eq_same() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let hash_expr: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+        let hash_map: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+
+        let expr1 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        let expr2 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        assert_eq!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_table_lookup_expr_eq_different_hash_expr() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let col_b: PhysicalExprRef = Arc::new(Column::new("b", 1));
+
+        let hash_expr1: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+
+        let hash_expr2: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_b)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+
+        let hash_map: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+
+        let expr1 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr1),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        let expr2 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr2),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_table_lookup_expr_eq_different_description() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let hash_expr: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+        let hash_map: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+
+        let expr1 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup_one".to_string(),
+        );
+
+        let expr2 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup_two".to_string(),
+        );
+
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_table_lookup_expr_eq_different_hash_map() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let hash_expr: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+
+        // Two different Arc pointers (even with same content) should not be equal
+        let hash_map1: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+        let hash_map2: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+
+        let expr1 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            hash_map1,
+            "lookup".to_string(),
+        );
+
+        let expr2 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            hash_map2,
+            "lookup".to_string(),
+        );
+
+        // Different Arc pointers means not equal (uses Arc::ptr_eq)
+        assert_ne!(expr1, expr2);
+    }
+
+    #[test]
+    fn test_hash_table_lookup_expr_hash_consistency() {
+        let col_a: PhysicalExprRef = Arc::new(Column::new("a", 0));
+        let hash_expr: PhysicalExprRef = Arc::new(HashExpr::new(
+            vec![Arc::clone(&col_a)],
+            SeededRandomState::with_seeds(1, 2, 3, 4),
+            "inner_hash".to_string(),
+        ));
+        let hash_map: Arc<dyn JoinHashMapType> =
+            Arc::new(JoinHashMapU32::with_capacity(10));
+
+        let expr1 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        let expr2 = HashTableLookupExpr::new(
+            Arc::clone(&hash_expr),
+            Arc::clone(&hash_map),
+            "lookup".to_string(),
+        );
+
+        // Equal expressions should have equal hashes
+        assert_eq!(expr1, expr2);
+        assert_eq!(compute_hash(&expr1), compute_hash(&expr2));
+    }
+}
diff --git a/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs b/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs
index 25f7a0de31acd..7d34ce9acbd57 100644
--- a/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs
@@ -15,22 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utilities for shared bounds. Used in dynamic filter pushdown in Hash Joins.
+//! Utilities for shared build-side information. Used in dynamic filter pushdown in Hash Joins.
 // TODO: include the link to the Dynamic Filter blog post.
 
 use std::fmt;
 use std::sync::Arc;
 
-use crate::joins::PartitionMode;
 use crate::ExecutionPlan;
 use crate::ExecutionPlanProperties;
-
+use crate::joins::PartitionMode;
+use crate::joins::hash_join::exec::HASH_JOIN_SEED;
+use crate::joins::hash_join::inlist_builder::build_struct_fields;
+use crate::joins::hash_join::partitioned_hash_eval::{
+    HashExpr, HashTableLookupExpr, SeededRandomState,
+};
+use crate::joins::utils::JoinHashMapType;
+use arrow::array::ArrayRef;
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{lit, BinaryExpr, DynamicFilterPhysicalExpr};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef};
+use datafusion_functions::core::r#struct as struct_func;
+use datafusion_physical_expr::expressions::{
+    BinaryExpr, CaseExpr, DynamicFilterPhysicalExpr, InListExpr, lit,
+};
+use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef, ScalarFunctionExpr};
 
-use itertools::Itertools;
 use parking_lot::Mutex;
 use tokio::sync::Barrier;
 
@@ -54,42 +64,151 @@ impl ColumnBounds {
 /// This contains the min/max values computed from one partition's build-side data.
 #[derive(Debug, Clone)]
 pub(crate) struct PartitionBounds {
-    /// Partition identifier for debugging and determinism (not strictly necessary)
-    partition: usize,
     /// Min/max bounds for each join key column in this partition.
     /// Index corresponds to the join key expression index.
     column_bounds: Vec<ColumnBounds>,
 }
 
 impl PartitionBounds {
-    pub(crate) fn new(partition: usize, column_bounds: Vec<ColumnBounds>) -> Self {
-        Self {
-            partition,
-            column_bounds,
+    pub(crate) fn new(column_bounds: Vec<ColumnBounds>) -> Self {
+        Self { column_bounds }
+    }
+
+    pub(crate) fn get_column_bounds(&self, index: usize) -> Option<&ColumnBounds> {
+        self.column_bounds.get(index)
+    }
+}
+
+/// Creates a membership predicate for filter pushdown.
+///
+/// If `inlist_values` is provided (for small build sides), creates an InList expression.
+/// Otherwise, creates a HashTableLookup expression (for large build sides).
+///
+/// Supports both single-column and multi-column joins using struct expressions.
+fn create_membership_predicate(
+    on_right: &[PhysicalExprRef],
+    pushdown: PushdownStrategy,
+    random_state: &SeededRandomState,
+    schema: &Schema,
+) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+    match pushdown {
+        // Use InList expression for small build sides
+        PushdownStrategy::InList(in_list_array) => {
+            // Build the expression to compare against
+            let expr = if on_right.len() == 1 {
+                // Single column: col IN (val1, val2, ...)
+                Arc::clone(&on_right[0])
+            } else {
+                let fields = build_struct_fields(
+                    on_right
+                        .iter()
+                        .map(|r| r.data_type(schema))
+                        .collect::<Result<Vec<_>>>()?
+                        .as_ref(),
+                )?;
+
+                // The return field name and the function field name don't really matter here.
+                let return_field =
+                    Arc::new(Field::new("struct", DataType::Struct(fields), true));
+
+                Arc::new(ScalarFunctionExpr::new(
+                    "struct",
+                    struct_func(),
+                    on_right.to_vec(),
+                    return_field,
+                    Arc::new(ConfigOptions::default()),
+                )) as Arc<dyn PhysicalExpr>
+            };
+
+            // Use in_list_from_array() helper to create InList with static_filter optimization (hash-based lookup)
+            Ok(Some(Arc::new(InListExpr::try_new_from_array(
+                expr,
+                in_list_array,
+                false,
+            )?)))
         }
+        // Use hash table lookup for large build sides
+        PushdownStrategy::HashTable(hash_map) => {
+            let lookup_hash_expr = Arc::new(HashExpr::new(
+                on_right.to_vec(),
+                random_state.clone(),
+                "hash_join".to_string(),
+            )) as Arc<dyn PhysicalExpr>;
+
+            Ok(Some(Arc::new(HashTableLookupExpr::new(
+                lookup_hash_expr,
+                hash_map,
+                "hash_lookup".to_string(),
+            )) as Arc<dyn PhysicalExpr>))
+        }
+        // Empty partition - should not create a filter for this
+        PushdownStrategy::Empty => Ok(None),
     }
+}
 
-    pub(crate) fn len(&self) -> usize {
-        self.column_bounds.len()
+/// Creates a bounds predicate from partition bounds.
+///
+/// Returns `None` if no column bounds are available.
+/// Returns a combined predicate (col >= min AND col <= max) for all columns with bounds.
+fn create_bounds_predicate(
+    on_right: &[PhysicalExprRef],
+    bounds: &PartitionBounds,
+) -> Option<Arc<dyn PhysicalExpr>> {
+    let mut column_predicates = Vec::new();
+
+    for (col_idx, right_expr) in on_right.iter().enumerate() {
+        if let Some(column_bounds) = bounds.get_column_bounds(col_idx) {
+            // Create predicate: col >= min AND col <= max
+            let min_expr = Arc::new(BinaryExpr::new(
+                Arc::clone(right_expr),
+                Operator::GtEq,
+                lit(column_bounds.min.clone()),
+            )) as Arc<dyn PhysicalExpr>;
+            let max_expr = Arc::new(BinaryExpr::new(
+                Arc::clone(right_expr),
+                Operator::LtEq,
+                lit(column_bounds.max.clone()),
+            )) as Arc<dyn PhysicalExpr>;
+            let range_expr = Arc::new(BinaryExpr::new(min_expr, Operator::And, max_expr))
+                as Arc<dyn PhysicalExpr>;
+            column_predicates.push(range_expr);
+        }
     }
 
-    pub(crate) fn get_column_bounds(&self, index: usize) -> Option<&ColumnBounds> {
-        self.column_bounds.get(index)
+    if column_predicates.is_empty() {
+        None
+    } else {
+        Some(
+            column_predicates
+                .into_iter()
+                .reduce(|acc, pred| {
+                    Arc::new(BinaryExpr::new(acc, Operator::And, pred))
+                        as Arc<dyn PhysicalExpr>
+                })
+                .unwrap(),
+        )
     }
 }
 
-/// Coordinates dynamic filter bounds collection across multiple partitions
+/// Coordinates build-side information collection across multiple partitions
 ///
-/// This structure ensures that dynamic filters are built with complete information from all
-/// relevant partitions before being applied to probe-side scans. Incomplete filters would
+/// This structure collects information from the build side (hash tables and/or bounds) and
+/// ensures that dynamic filters are built with complete information from all relevant
+/// partitions before being applied to probe-side scans. Incomplete filters would
 /// incorrectly eliminate valid join results.
 ///
 /// ## Synchronization Strategy
 ///
-/// 1. Each partition computes bounds from its build-side data
-/// 2. Bounds are stored in the shared vector
-/// 3. A barrier tracks how many partitions have reported their bounds
-/// 4. When the last partition reports, bounds are merged and the filter is updated exactly once
+/// 1. Each partition computes information from its build-side data (hash maps and/or bounds)
+/// 2. Information is stored in the shared state
+/// 3. A barrier tracks how many partitions have reported
+/// 4. When the last partition reports, information is merged and the filter is updated exactly once
+///
+/// ## Hash Map vs Bounds
+///
+/// - **Hash Maps (Partitioned mode)**: Collects Arc references to hash tables from each partition.
+///   Creates a `PartitionedHashLookupPhysicalExpr` that routes rows to the correct partition's hash table.
+/// - **Bounds (CollectLeft mode)**: Collects min/max bounds and creates range predicates.
 ///
 /// ## Partition Counting
 ///
@@ -101,25 +220,64 @@ impl PartitionBounds {
 ///
 /// All fields use a single mutex to ensure correct coordination between concurrent
 /// partition executions.
-pub(crate) struct SharedBoundsAccumulator {
-    /// Shared state protected by a single mutex to avoid ordering concerns
-    inner: Mutex<SharedBoundsState>,
+pub(crate) struct SharedBuildAccumulator {
+    /// Build-side data protected by a single mutex to avoid ordering concerns
+    inner: Mutex<AccumulatedBuildData>,
     barrier: Barrier,
     /// Dynamic filter for pushdown to probe side
     dynamic_filter: Arc<DynamicFilterPhysicalExpr>,
-    /// Right side join expressions needed for creating filter bounds
+    /// Right side join expressions needed for creating filter expressions
     on_right: Vec<PhysicalExprRef>,
+    /// Random state for partitioning (RepartitionExec's hash function with 0,0,0,0 seeds)
+    /// Used for PartitionedHashLookupPhysicalExpr
+    repartition_random_state: SeededRandomState,
+    /// Schema of the probe (right) side for evaluating filter expressions
+    probe_schema: Arc<Schema>,
+}
+
+/// Strategy for filter pushdown (decided at collection time)
+#[derive(Clone)]
+pub(crate) enum PushdownStrategy {
+    /// Use InList for small build sides (< 128MB)
+    InList(ArrayRef),
+    /// Use hash table lookup for large build sides
+    HashTable(Arc<dyn JoinHashMapType>),
+    /// There was no data in this partition, do not build a dynamic filter for it
+    Empty,
+}
+
+/// Build-side data reported by a single partition
+pub(crate) enum PartitionBuildData {
+    Partitioned {
+        partition_id: usize,
+        pushdown: PushdownStrategy,
+        bounds: PartitionBounds,
+    },
+    CollectLeft {
+        pushdown: PushdownStrategy,
+        bounds: PartitionBounds,
+    },
+}
+
+/// Per-partition accumulated data (Partitioned mode)
+#[derive(Clone)]
+struct PartitionData {
+    bounds: PartitionBounds,
+    pushdown: PushdownStrategy,
 }
 
-/// State protected by SharedBoundsAccumulator's mutex
-struct SharedBoundsState {
-    /// Bounds from completed partitions.
-    /// Each element represents the column bounds computed by one partition.
-    bounds: Vec<PartitionBounds>,
+/// Build-side data organized by partition mode
+enum AccumulatedBuildData {
+    Partitioned {
+        partitions: Vec<Option<PartitionData>>,
+    },
+    CollectLeft {
+        data: Option<PartitionData>,
+    },
 }
 
-impl SharedBoundsAccumulator {
-    /// Creates a new SharedBoundsAccumulator configured for the given partition mode
+impl SharedBuildAccumulator {
+    /// Creates a new SharedBuildAccumulator configured for the given partition mode
     ///
     /// This method calculates how many times `collect_build_side` will be called based on the
     /// partition mode's execution pattern. This count is critical for determining when we have
@@ -137,12 +295,12 @@ impl SharedBoundsAccumulator {
     ///   `collect_build_side` once. Expected calls = number of build partitions.
     ///
     /// - **Auto**: Placeholder mode resolved during optimization. Uses 1 as safe default since
-    ///   the actual mode will be determined and a new bounds_accumulator created before execution.
+    ///   the actual mode will be determined and a new accumulator created before execution.
     ///
     /// ## Why This Matters
     ///
     /// We cannot build a partial filter from some partitions - it would incorrectly eliminate
-    /// valid join results. We must wait until we have complete bounds information from ALL
+    /// valid join results. We must wait until we have complete information from ALL
     /// relevant partitions before updating the dynamic filter.
     pub(crate) fn new_from_partition_mode(
         partition_mode: PartitionMode,
@@ -150,6 +308,7 @@ impl SharedBoundsAccumulator {
         right_child: &dyn ExecutionPlan,
         dynamic_filter: Arc<DynamicFilterPhysicalExpr>,
         on_right: Vec<PhysicalExprRef>,
+        repartition_random_state: SeededRandomState,
     ) -> Self {
         // Troubleshooting: If partition counts are incorrect, verify this logic matches
         // the actual execution pattern in collect_build_side()
@@ -163,151 +322,280 @@ impl SharedBoundsAccumulator {
                 left_child.output_partitioning().partition_count()
             }
             // Default value, will be resolved during optimization (does not exist once `execute()` is called; will be replaced by one of the other two)
-            PartitionMode::Auto => unreachable!("PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"),
+            PartitionMode::Auto => unreachable!(
+                "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"
+            ),
+        };
+
+        let mode_data = match partition_mode {
+            PartitionMode::Partitioned => AccumulatedBuildData::Partitioned {
+                partitions: vec![
+                    None;
+                    left_child.output_partitioning().partition_count()
+                ],
+            },
+            PartitionMode::CollectLeft => {
+                AccumulatedBuildData::CollectLeft { data: None }
+            }
+            PartitionMode::Auto => unreachable!(
+                "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"
+            ),
         };
+
         Self {
-            inner: Mutex::new(SharedBoundsState {
-                bounds: Vec::with_capacity(expected_calls),
-            }),
+            inner: Mutex::new(mode_data),
             barrier: Barrier::new(expected_calls),
             dynamic_filter,
             on_right,
+            repartition_random_state,
+            probe_schema: right_child.schema(),
         }
     }
 
-    /// Create a filter expression from individual partition bounds using OR logic.
+    /// Report build-side data from a partition
     ///
-    /// This creates a filter where each partition's bounds form a conjunction (AND)
-    /// of column range predicates, and all partitions are combined with OR.
-    ///
-    /// For example, with 2 partitions and 2 columns:
-    /// ((col0 >= p0_min0 AND col0 <= p0_max0 AND col1 >= p0_min1 AND col1 <= p0_max1)
-    ///  OR
-    ///  (col0 >= p1_min0 AND col0 <= p1_max0 AND col1 >= p1_min1 AND col1 <= p1_max1))
-    pub(crate) fn create_filter_from_partition_bounds(
-        &self,
-        bounds: &[PartitionBounds],
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        if bounds.is_empty() {
-            return Ok(lit(true));
-        }
-
-        // Create a predicate for each partition
-        let mut partition_predicates = Vec::with_capacity(bounds.len());
-
-        for partition_bounds in bounds.iter().sorted_by_key(|b| b.partition) {
-            // Create range predicates for each join key in this partition
-            let mut column_predicates = Vec::with_capacity(partition_bounds.len());
-
-            for (col_idx, right_expr) in self.on_right.iter().enumerate() {
-                if let Some(column_bounds) = partition_bounds.get_column_bounds(col_idx) {
-                    // Create predicate: col >= min AND col <= max
-                    let min_expr = Arc::new(BinaryExpr::new(
-                        Arc::clone(right_expr),
-                        Operator::GtEq,
-                        lit(column_bounds.min.clone()),
-                    )) as Arc<dyn PhysicalExpr>;
-                    let max_expr = Arc::new(BinaryExpr::new(
-                        Arc::clone(right_expr),
-                        Operator::LtEq,
-                        lit(column_bounds.max.clone()),
-                    )) as Arc<dyn PhysicalExpr>;
-                    let range_expr =
-                        Arc::new(BinaryExpr::new(min_expr, Operator::And, max_expr))
-                            as Arc<dyn PhysicalExpr>;
-                    column_predicates.push(range_expr);
-                }
-            }
-
-            // Combine all column predicates for this partition with AND
-            if !column_predicates.is_empty() {
-                let partition_predicate = column_predicates
-                    .into_iter()
-                    .reduce(|acc, pred| {
-                        Arc::new(BinaryExpr::new(acc, Operator::And, pred))
-                            as Arc<dyn PhysicalExpr>
-                    })
-                    .unwrap();
-                partition_predicates.push(partition_predicate);
-            }
-        }
-
-        // Combine all partition predicates with OR
-        let combined_predicate = partition_predicates
-            .into_iter()
-            .reduce(|acc, pred| {
-                Arc::new(BinaryExpr::new(acc, Operator::Or, pred))
-                    as Arc<dyn PhysicalExpr>
-            })
-            .unwrap_or_else(|| lit(true));
-
-        Ok(combined_predicate)
-    }
-
-    /// Report bounds from a completed partition and update dynamic filter if all partitions are done
-    ///
-    /// This method coordinates the dynamic filter updates across all partitions. It stores the
-    /// bounds from the current partition, increments the completion counter, and when all
-    /// partitions have reported, creates an OR'd filter from individual partition bounds.
-    ///
-    /// This method is async and uses a [`tokio::sync::Barrier`] to wait for all partitions
-    /// to report their bounds. Once that occurs, the method will resolve for all callers and the
-    /// dynamic filter will be updated exactly once.
-    ///
-    /// # Note
-    ///
-    /// As barriers are reusable, it is likely an error to call this method more times than the
-    /// total number of partitions - as it can lead to pending futures that never resolve. We rely
-    /// on correct usage from the caller rather than imposing additional checks here. If this is a concern,
-    /// consider making the resulting future shared so the ready result can be reused.
+    /// This unified method handles both CollectLeft and Partitioned modes. When all partitions
+    /// have reported (barrier wait), the leader builds the appropriate filter expression:
+    /// - CollectLeft: Simple conjunction of bounds and membership check
+    /// - Partitioned: CASE expression routing to per-partition filters
     ///
     /// # Arguments
-    /// * `left_side_partition_id` - The identifier for the **left-side** partition reporting its bounds
-    /// * `partition_bounds` - The bounds computed by this partition (if any)
+    /// * `data` - Build data including hash map, pushdown strategy, and bounds
     ///
     /// # Returns
-    /// * `Result<()>` - Ok if successful, Err if filter update failed
-    pub(crate) async fn report_partition_bounds(
-        &self,
-        left_side_partition_id: usize,
-        partition_bounds: Option<Vec<ColumnBounds>>,
-    ) -> Result<()> {
-        // Store bounds in the accumulator - this runs once per partition
-        if let Some(bounds) = partition_bounds {
+    /// * `Result<()>` - Ok if successful, Err if filter update failed or mode mismatch
+    pub(crate) async fn report_build_data(&self, data: PartitionBuildData) -> Result<()> {
+        // Store data in the accumulator
+        {
             let mut guard = self.inner.lock();
 
-            let should_push = if let Some(last_bound) = guard.bounds.last() {
-                // In `PartitionMode::CollectLeft`, all streams on the left side share the same partition id (0).
-                // Since this function can be called multiple times for that same partition, we must deduplicate
-                // by checking against the last recorded bound.
-                last_bound.partition != left_side_partition_id
-            } else {
-                true
-            };
-
-            if should_push {
-                guard
-                    .bounds
-                    .push(PartitionBounds::new(left_side_partition_id, bounds));
+            match (data, &mut *guard) {
+                // Partitioned mode
+                (
+                    PartitionBuildData::Partitioned {
+                        partition_id,
+                        pushdown,
+                        bounds,
+                    },
+                    AccumulatedBuildData::Partitioned { partitions },
+                ) => {
+                    partitions[partition_id] = Some(PartitionData { pushdown, bounds });
+                }
+                // CollectLeft mode (store once, deduplicate across partitions)
+                (
+                    PartitionBuildData::CollectLeft { pushdown, bounds },
+                    AccumulatedBuildData::CollectLeft { data },
+                ) => {
+                    // Deduplicate - all partitions report the same data in CollectLeft
+                    if data.is_none() {
+                        *data = Some(PartitionData { pushdown, bounds });
+                    }
+                }
+                // Mismatched modes - should never happen
+                _ => {
+                    return datafusion_common::internal_err!(
+                        "Build data mode mismatch in report_build_data"
+                    );
+                }
             }
         }
 
+        // Wait for all partitions to report
         if self.barrier.wait().await.is_leader() {
-            // All partitions have reported, so we can update the filter
+            // All partitions have reported, so we can create and update the filter
             let inner = self.inner.lock();
-            if !inner.bounds.is_empty() {
-                let filter_expr =
-                    self.create_filter_from_partition_bounds(&inner.bounds)?;
-                self.dynamic_filter.update(filter_expr)?;
+
+            match &*inner {
+                // CollectLeft: Simple conjunction of bounds and membership check
+                AccumulatedBuildData::CollectLeft { data } => {
+                    if let Some(partition_data) = data {
+                        // Create membership predicate (InList for small build sides, hash lookup otherwise)
+                        let membership_expr = create_membership_predicate(
+                            &self.on_right,
+                            partition_data.pushdown.clone(),
+                            &HASH_JOIN_SEED,
+                            self.probe_schema.as_ref(),
+                        )?;
+
+                        // Create bounds check expression (if bounds available)
+                        let bounds_expr = create_bounds_predicate(
+                            &self.on_right,
+                            &partition_data.bounds,
+                        );
+
+                        // Combine membership and bounds expressions for multi-layer optimization:
+                        // - Bounds (min/max): Enable statistics-based pruning (Parquet row group/file skipping)
+                        // - Membership (InList/hash lookup): Enables:
+                        //   * Precise filtering (exact value matching)
+                        //   * Bloom filter utilization (if present in Parquet files)
+                        //   * Better pruning for data types where min/max isn't effective (e.g., UUIDs)
+                        // Together, they provide complementary benefits and maximize data skipping.
+                        // Only update the filter if we have something to push down
+                        if let Some(filter_expr) = match (membership_expr, bounds_expr) {
+                            (Some(membership), Some(bounds)) => {
+                                // Both available: combine with AND
+                                Some(Arc::new(BinaryExpr::new(
+                                    bounds,
+                                    Operator::And,
+                                    membership,
+                                ))
+                                    as Arc<dyn PhysicalExpr>)
+                            }
+                            (Some(membership), None) => {
+                                // Membership available but no bounds
+                                // This is reachable when we have data but bounds aren't available
+                                // (e.g., unsupported data types or no columns with bounds)
+                                Some(membership)
+                            }
+                            (None, Some(bounds)) => {
+                                // Bounds available but no membership.
+                                // This should be unreachable in practice: we can always push down a reference
+                                // to the hash table.
+                                // But it seems safer to handle it defensively.
+                                Some(bounds)
+                            }
+                            (None, None) => {
+                                // No filter available (e.g., empty build side)
+                                // Don't update the filter, but continue to mark complete
+                                None
+                            }
+                        } {
+                            self.dynamic_filter.update(filter_expr)?;
+                        }
+                    }
+                }
+                // Partitioned: CASE expression routing to per-partition filters
+                AccumulatedBuildData::Partitioned { partitions } => {
+                    // Collect all partition data (should all be Some at this point)
+                    let partition_data: Vec<_> =
+                        partitions.iter().filter_map(|p| p.as_ref()).collect();
+
+                    if !partition_data.is_empty() {
+                        // Build a CASE expression that combines range checks AND membership checks
+                        // CASE (hash_repartition(join_keys) % num_partitions)
+                        //   WHEN 0 THEN (col >= min_0 AND col <= max_0 AND ...) AND membership_check_0
+                        //   WHEN 1 THEN (col >= min_1 AND col <= max_1 AND ...) AND membership_check_1
+                        //   ...
+                        //   ELSE false
+                        // END
+
+                        let num_partitions = partition_data.len();
+
+                        // Create base expression: hash_repartition(join_keys) % num_partitions
+                        let routing_hash_expr = Arc::new(HashExpr::new(
+                            self.on_right.clone(),
+                            self.repartition_random_state.clone(),
+                            "hash_repartition".to_string(),
+                        ))
+                            as Arc<dyn PhysicalExpr>;
+
+                        let modulo_expr = Arc::new(BinaryExpr::new(
+                            routing_hash_expr,
+                            Operator::Modulo,
+                            lit(ScalarValue::UInt64(Some(num_partitions as u64))),
+                        ))
+                            as Arc<dyn PhysicalExpr>;
+
+                        // Create WHEN branches for each partition
+                        let when_then_branches: Vec<(
+                            Arc<dyn PhysicalExpr>,
+                            Arc<dyn PhysicalExpr>,
+                        )> = partitions
+                            .iter()
+                            .enumerate()
+                            .filter_map(|(partition_id, partition_opt)| {
+                                partition_opt.as_ref().and_then(|partition| {
+                                    // Skip empty partitions - they would always return false anyway
+                                    match &partition.pushdown {
+                                        PushdownStrategy::Empty => None,
+                                        _ => Some((partition_id, partition)),
+                                    }
+                                })
+                            })
+                            .map(|(partition_id, partition)| -> Result<_> {
+                                // WHEN partition_id
+                                let when_expr =
+                                    lit(ScalarValue::UInt64(Some(partition_id as u64)));
+
+                                // THEN: Combine bounds check AND membership predicate
+
+                                // 1. Create membership predicate (InList for small build sides, hash lookup otherwise)
+                                let membership_expr = create_membership_predicate(
+                                    &self.on_right,
+                                    partition.pushdown.clone(),
+                                    &HASH_JOIN_SEED,
+                                    self.probe_schema.as_ref(),
+                                )?;
+
+                                // 2. Create bounds check expression for this partition (if bounds available)
+                                let bounds_expr = create_bounds_predicate(
+                                    &self.on_right,
+                                    &partition.bounds,
+                                );
+
+                                // 3. Combine membership and bounds expressions
+                                let then_expr = match (membership_expr, bounds_expr) {
+                                    (Some(membership), Some(bounds)) => {
+                                        // Both available: combine with AND
+                                        Arc::new(BinaryExpr::new(
+                                            bounds,
+                                            Operator::And,
+                                            membership,
+                                        ))
+                                            as Arc<dyn PhysicalExpr>
+                                    }
+                                    (Some(membership), None) => {
+                                        // Membership available but no bounds (e.g., unsupported data types)
+                                        membership
+                                    }
+                                    (None, Some(bounds)) => {
+                                        // Bounds available but no membership.
+                                        // This should be unreachable in practice: we can always push down a reference
+                                        // to the hash table.
+                                        // But it seems safer to handle it defensively.
+                                        bounds
+                                    }
+                                    (None, None) => {
+                                        // No filter for this partition - should not happen due to filter_map above
+                                        // but handle defensively by returning a "true" literal
+                                        lit(true)
+                                    }
+                                };
+
+                                Ok((when_expr, then_expr))
+                            })
+                            .collect::<Result<Vec<_>>>()?;
+
+                        // Optimize for single partition: skip CASE expression entirely
+                        let filter_expr = if when_then_branches.is_empty() {
+                            // All partitions are empty: no rows can match
+                            lit(false)
+                        } else if when_then_branches.len() == 1 {
+                            // Single partition: just use the condition directly
+                            // since hash % 1 == 0 always, the WHEN 0 branch will always match
+                            Arc::clone(&when_then_branches[0].1)
+                        } else {
+                            // Multiple partitions: create CASE expression
+                            Arc::new(CaseExpr::try_new(
+                                Some(modulo_expr),
+                                when_then_branches,
+                                Some(lit(false)), // ELSE false
+                            )?) as Arc<dyn PhysicalExpr>
+                        };
+
+                        self.dynamic_filter.update(filter_expr)?;
+                    }
+                }
             }
+            self.dynamic_filter.mark_complete();
         }
 
         Ok(())
     }
 }
 
-impl fmt::Debug for SharedBoundsAccumulator {
+impl fmt::Debug for SharedBuildAccumulator {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "SharedBoundsAccumulator")
+        write!(f, "SharedBuildAccumulator")
     }
 }
diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs
index bb3465365ec96..e6735675125bd 100644
--- a/datafusion/physical-plan/src/joins/hash_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs
@@ -23,35 +23,38 @@
 use std::sync::Arc;
 use std::task::Poll;
 
+use crate::joins::PartitionMode;
 use crate::joins::hash_join::exec::JoinLeftData;
-use crate::joins::hash_join::shared_bounds::SharedBoundsAccumulator;
+use crate::joins::hash_join::shared_bounds::{
+    PartitionBounds, PartitionBuildData, SharedBuildAccumulator,
+};
 use crate::joins::utils::{
-    equal_rows_arr, get_final_indices_from_shared_bitmap, OnceFut,
+    OnceFut, equal_rows_arr, get_final_indices_from_shared_bitmap,
 };
-use crate::joins::PartitionMode;
 use crate::{
-    handle_state,
+    RecordBatchStream, SendableRecordBatchStream, handle_state,
     hash_utils::create_hashes,
     joins::join_hash_map::JoinHashMapOffset,
     joins::utils::{
-        adjust_indices_by_join_type, apply_join_filter_to_indices,
+        BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMapType,
+        StatefulStreamResult, adjust_indices_by_join_type, apply_join_filter_to_indices,
         build_batch_empty_build_side, build_batch_from_indices,
-        need_produce_result_in_final, BuildProbeJoinMetrics, ColumnIndex, JoinFilter,
-        JoinHashMapType, StatefulStreamResult,
+        need_produce_result_in_final,
     },
-    RecordBatchStream, SendableRecordBatchStream,
 };
 
-use arrow::array::{ArrayRef, UInt32Array, UInt64Array};
+use arrow::array::{Array, ArrayRef, UInt32Array, UInt64Array};
+use arrow::compute::BatchCoalescer;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, JoinSide, JoinType, NullEquality, Result,
+    JoinSide, JoinType, NullEquality, Result, internal_datafusion_err, internal_err,
 };
 use datafusion_physical_expr::PhysicalExprRef;
 
 use ahash::RandomState;
-use futures::{ready, Stream, StreamExt};
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
+use futures::{Stream, StreamExt, ready};
 
 /// Represents build-side of hash join.
 pub(super) enum BuildSide {
@@ -203,16 +206,23 @@ pub(super) struct HashJoinStream {
     batch_size: usize,
     /// Scratch space for computing hashes
     hashes_buffer: Vec<u64>,
+    /// Scratch space for probe indices during hash lookup
+    probe_indices_buffer: Vec<u32>,
+    /// Scratch space for build indices during hash lookup
+    build_indices_buffer: Vec<u64>,
     /// Specifies whether the right side has an ordering to potentially preserve
     right_side_ordered: bool,
-    /// Shared bounds accumulator for coordinating dynamic filter updates (optional)
-    bounds_accumulator: Option<Arc<SharedBoundsAccumulator>>,
-    /// Optional future to signal when bounds have been reported by all partitions
+    /// Shared build accumulator for coordinating dynamic filter updates (collects hash maps and/or bounds, optional)
+    build_accumulator: Option<Arc<SharedBuildAccumulator>>,
+    /// Optional future to signal when build information has been reported by all partitions
     /// and the dynamic filter has been updated
-    bounds_waiter: Option<OnceFut<()>>,
-
+    build_waiter: Option<OnceFut<()>>,
     /// Partitioning mode to use
     mode: PartitionMode,
+    /// Output buffer for coalescing small batches into larger ones.
+    /// Uses `BatchCoalescer` from arrow to efficiently combine batches.
+    /// When batches are already close to target size, they bypass coalescing.
+    output_buffer: Box<BatchCoalescer>,
 }
 
 impl RecordBatchStream for HashJoinStream {
@@ -269,7 +279,7 @@ impl RecordBatchStream for HashJoinStream {
 /// Build indices: 4, 5, 6, 6
 /// Probe indices: 3, 3, 4, 5
 /// ```
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 pub(super) fn lookup_join_hashmap(
     build_hashmap: &dyn JoinHashMapType,
     build_side_values: &[ArrayRef],
@@ -278,26 +288,70 @@ pub(super) fn lookup_join_hashmap(
     hashes_buffer: &[u64],
     limit: usize,
     offset: JoinHashMapOffset,
+    probe_indices_buffer: &mut Vec<u32>,
+    build_indices_buffer: &mut Vec<u64>,
 ) -> Result<(UInt64Array, UInt32Array, Option<JoinHashMapOffset>)> {
-    let (probe_indices, build_indices, next_offset) =
-        build_hashmap.get_matched_indices_with_limit_offset(hashes_buffer, limit, offset);
-
-    let build_indices: UInt64Array = build_indices.into();
-    let probe_indices: UInt32Array = probe_indices.into();
-
+    let next_offset = build_hashmap.get_matched_indices_with_limit_offset(
+        hashes_buffer,
+        limit,
+        offset,
+        probe_indices_buffer,
+        build_indices_buffer,
+    );
+
+    let build_indices_unfiltered: UInt64Array =
+        std::mem::take(build_indices_buffer).into();
+    let probe_indices_unfiltered: UInt32Array =
+        std::mem::take(probe_indices_buffer).into();
+
+    // TODO: optimize equal_rows_arr to avoid allocation of intermediate arrays
+    // https://github.com/apache/datafusion/issues/12131
     let (build_indices, probe_indices) = equal_rows_arr(
-        &build_indices,
-        &probe_indices,
+        &build_indices_unfiltered,
+        &probe_indices_unfiltered,
         build_side_values,
         probe_side_values,
         null_equality,
     )?;
 
+    // Reclaim buffers
+    *build_indices_buffer = build_indices_unfiltered.into_parts().1.into();
+    *probe_indices_buffer = probe_indices_unfiltered.into_parts().1.into();
+
     Ok((build_indices, probe_indices, next_offset))
 }
 
+/// Counts the number of distinct elements in the input array.
+///
+/// The input array must be sorted (e.g., `[0, 1, 1, 2, 2, ...]`) and contain no null values.
+#[inline]
+fn count_distinct_sorted_indices(indices: &UInt32Array) -> usize {
+    if indices.is_empty() {
+        return 0;
+    }
+
+    debug_assert!(indices.null_count() == 0);
+
+    let values_buf = indices.values();
+    let values = values_buf.as_ref();
+    let mut iter = values.iter();
+    let Some(&first) = iter.next() else {
+        return 0;
+    };
+
+    let mut count = 1usize;
+    let mut last = first;
+    for &value in iter {
+        if value != last {
+            last = value;
+            count += 1;
+        }
+    }
+    count
+}
+
 impl HashJoinStream {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub(super) fn new(
         partition: usize,
         schema: Arc<Schema>,
@@ -314,9 +368,17 @@ impl HashJoinStream {
         batch_size: usize,
         hashes_buffer: Vec<u64>,
         right_side_ordered: bool,
-        bounds_accumulator: Option<Arc<SharedBoundsAccumulator>>,
+        build_accumulator: Option<Arc<SharedBuildAccumulator>>,
         mode: PartitionMode,
     ) -> Self {
+        // Create output buffer with coalescing.
+        // Use biggest_coalesce_batch_size to bypass coalescing for batches
+        // that are already close to target size (within 50%).
+        let output_buffer = Box::new(
+            BatchCoalescer::new(Arc::clone(&schema), batch_size)
+                .with_biggest_coalesce_batch_size(Some(batch_size / 2)),
+        );
+
         Self {
             partition,
             schema,
@@ -332,10 +394,13 @@ impl HashJoinStream {
             build_side,
             batch_size,
             hashes_buffer,
+            probe_indices_buffer: Vec::with_capacity(batch_size),
+            build_indices_buffer: Vec::with_capacity(batch_size),
             right_side_ordered,
-            bounds_accumulator,
-            bounds_waiter: None,
+            build_accumulator,
+            build_waiter: None,
             mode,
+            output_buffer,
         }
     }
 
@@ -346,6 +411,14 @@ impl HashJoinStream {
         cx: &mut std::task::Context<'_>,
     ) -> Poll<Option<Result<RecordBatch>>> {
         loop {
+            // First, check if we have any completed batches ready to emit
+            if let Some(batch) = self.output_buffer.next_completed_batch() {
+                return self
+                    .join_metrics
+                    .baseline
+                    .record_poll(Poll::Ready(Some(Ok(batch))));
+            }
+
             return match self.state {
                 HashJoinStreamState::WaitBuildSide => {
                     handle_state!(ready!(self.collect_build_side(cx)))
@@ -357,24 +430,28 @@ impl HashJoinStream {
                     handle_state!(ready!(self.fetch_probe_batch(cx)))
                 }
                 HashJoinStreamState::ProcessProbeBatch(_) => {
-                    let poll = handle_state!(self.process_probe_batch());
-                    self.join_metrics.baseline.record_poll(poll)
+                    handle_state!(self.process_probe_batch())
                 }
                 HashJoinStreamState::ExhaustedProbeSide => {
-                    let poll = handle_state!(self.process_unmatched_build_batch());
-                    self.join_metrics.baseline.record_poll(poll)
+                    handle_state!(self.process_unmatched_build_batch())
+                }
+                HashJoinStreamState::Completed if !self.output_buffer.is_empty() => {
+                    // Flush any remaining buffered data
+                    self.output_buffer.finish_buffered_batch()?;
+                    // Continue loop to emit the flushed batch
+                    continue;
                 }
                 HashJoinStreamState::Completed => Poll::Ready(None),
             };
         }
     }
 
-    /// Optional step to wait until bounds have been reported by all partitions.
-    /// This state is only entered if a bounds accumulator is present.
+    /// Optional step to wait until build-side information (hash maps or bounds) has been reported by all partitions.
+    /// This state is only entered if a build accumulator is present.
     ///
     /// ## Why wait?
     ///
-    /// The dynamic filter is only built once all partitions have reported their bounds.
+    /// The dynamic filter is only built once all partitions have reported their information (hash maps or bounds).
     /// If we do not wait here, the probe-side scan may start before the filter is ready.
     /// This can lead to the probe-side scan missing the opportunity to apply the filter
     /// and skip reading unnecessary data.
@@ -382,7 +459,7 @@ impl HashJoinStream {
         &mut self,
         cx: &mut std::task::Context<'_>,
     ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
-        if let Some(ref mut fut) = self.bounds_waiter {
+        if let Some(ref mut fut) = self.build_waiter {
             ready!(fut.get_shared(cx))?;
         }
         self.state = HashJoinStreamState::FetchProbeBatch;
@@ -398,31 +475,57 @@ impl HashJoinStream {
     ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
         let build_timer = self.join_metrics.build_time.timer();
         // build hash table from left (build) side, if not yet done
-        let left_data = ready!(self
-            .build_side
-            .try_as_initial_mut()?
-            .left_fut
-            .get_shared(cx))?;
+        let left_data = ready!(
+            self.build_side
+                .try_as_initial_mut()?
+                .left_fut
+                .get_shared(cx)
+        )?;
         build_timer.done();
 
-        // Handle dynamic filter bounds accumulation
+        // Handle dynamic filter build-side information accumulation
         //
         // Dynamic filter coordination between partitions:
-        // Report bounds to the accumulator which will handle synchronization and filter updates
-        if let Some(ref bounds_accumulator) = self.bounds_accumulator {
-            let bounds_accumulator = Arc::clone(bounds_accumulator);
+        // Report hash maps (Partitioned mode) or bounds (CollectLeft mode) to the accumulator
+        // which will handle synchronization and filter updates
+        if let Some(ref build_accumulator) = self.build_accumulator {
+            let build_accumulator = Arc::clone(build_accumulator);
 
             let left_side_partition_id = match self.mode {
                 PartitionMode::Partitioned => self.partition,
                 PartitionMode::CollectLeft => 0,
-                PartitionMode::Auto => unreachable!("PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"),
+                PartitionMode::Auto => unreachable!(
+                    "PartitionMode::Auto should not be present at execution time. This is a bug in DataFusion, please report it!"
+                ),
+            };
+
+            // Determine pushdown strategy based on availability of InList values
+            let pushdown = left_data.membership().clone();
+
+            // Construct the appropriate build data enum variant based on partition mode
+            let build_data = match self.mode {
+                PartitionMode::Partitioned => PartitionBuildData::Partitioned {
+                    partition_id: left_side_partition_id,
+                    pushdown,
+                    bounds: left_data
+                        .bounds
+                        .clone()
+                        .unwrap_or_else(|| PartitionBounds::new(vec![])),
+                },
+                PartitionMode::CollectLeft => PartitionBuildData::CollectLeft {
+                    pushdown,
+                    bounds: left_data
+                        .bounds
+                        .clone()
+                        .unwrap_or_else(|| PartitionBounds::new(vec![])),
+                },
+                PartitionMode::Auto => unreachable!(
+                    "PartitionMode::Auto should not be present at execution time"
+                ),
             };
 
-            let left_data_bounds = left_data.bounds.clone();
-            self.bounds_waiter = Some(OnceFut::new(async move {
-                bounds_accumulator
-                    .report_partition_bounds(left_side_partition_id, left_data_bounds)
-                    .await
+            self.build_waiter = Some(OnceFut::new(async move {
+                build_accumulator.report_build_data(build_data).await
             }));
             self.state = HashJoinStreamState::WaitPartitionBoundsReport;
         } else {
@@ -447,11 +550,7 @@ impl HashJoinStream {
             }
             Some(Ok(batch)) => {
                 // Precalculate hash values for fetched batch
-                let keys_values = self
-                    .on_right
-                    .iter()
-                    .map(|c| c.evaluate(&batch)?.into_array(batch.num_rows()))
-                    .collect::<Result<Vec<_>>>()?;
+                let keys_values = evaluate_expressions_to_arrays(&self.on_right, &batch)?;
 
                 self.hashes_buffer.clear();
                 self.hashes_buffer.resize(batch.num_rows(), 0);
@@ -483,6 +582,10 @@ impl HashJoinStream {
         let state = self.state.try_as_process_probe_batch_mut()?;
         let build_side = self.build_side.try_as_ready_mut()?;
 
+        self.join_metrics
+            .probe_hit_rate
+            .add_total(state.batch.num_rows());
+
         let timer = self.join_metrics.join_time.timer();
 
         // if the left side is empty, we can skip the (potentially expensive) join operation
@@ -494,7 +597,6 @@ impl HashJoinStream {
                 &self.column_indices,
                 self.join_type,
             )?;
-            self.join_metrics.output_batches.add(1);
             timer.done();
 
             self.state = HashJoinStreamState::FetchProbeBatch;
@@ -511,8 +613,22 @@ impl HashJoinStream {
             &self.hashes_buffer,
             self.batch_size,
             state.offset,
+            &mut self.probe_indices_buffer,
+            &mut self.build_indices_buffer,
         )?;
 
+        let distinct_right_indices_count = count_distinct_sorted_indices(&right_indices);
+
+        self.join_metrics
+            .probe_hit_rate
+            .add_part(distinct_right_indices_count);
+
+        self.join_metrics.avg_fanout.add_part(left_indices.len());
+
+        self.join_metrics
+            .avg_fanout
+            .add_total(distinct_right_indices_count);
+
         // apply join filter if exists
         let (left_indices, right_indices) = if let Some(filter) = &self.filter {
             apply_join_filter_to_indices(
@@ -575,29 +691,26 @@ impl HashJoinStream {
             self.right_side_ordered,
         )?;
 
-        let result = if self.join_type == JoinType::RightMark {
-            build_batch_from_indices(
-                &self.schema,
-                &state.batch,
-                build_side.left_data.batch(),
-                &left_indices,
-                &right_indices,
-                &self.column_indices,
-                JoinSide::Right,
-            )?
-        } else {
-            build_batch_from_indices(
-                &self.schema,
-                build_side.left_data.batch(),
-                &state.batch,
-                &left_indices,
-                &right_indices,
-                &self.column_indices,
-                JoinSide::Left,
-            )?
-        };
+        // Build output batch and push to coalescer
+        let (build_batch, probe_batch, join_side) =
+            if self.join_type == JoinType::RightMark {
+                (&state.batch, build_side.left_data.batch(), JoinSide::Right)
+            } else {
+                (build_side.left_data.batch(), &state.batch, JoinSide::Left)
+            };
+
+        let batch = build_batch_from_indices(
+            &self.schema,
+            build_batch,
+            probe_batch,
+            &left_indices,
+            &right_indices,
+            &self.column_indices,
+            join_side,
+        )?;
+
+        self.output_buffer.push_batch(batch)?;
 
-        self.join_metrics.output_batches.add(1);
         timer.done();
 
         if next_offset.is_none() {
@@ -610,7 +723,7 @@ impl HashJoinStream {
             )
         };
 
-        Ok(StatefulStreamResult::Ready(Some(result)))
+        Ok(StatefulStreamResult::Continue)
     }
 
     /// Processes unmatched build-side rows for certain join types and produces output batch
@@ -638,29 +751,30 @@ impl HashJoinStream {
             self.join_type,
             true,
         );
-        let empty_right_batch = RecordBatch::new_empty(self.right.schema());
-        // use the left and right indices to produce the batch result
-        let result = build_batch_from_indices(
-            &self.schema,
-            build_side.left_data.batch(),
-            &empty_right_batch,
-            &left_side,
-            &right_side,
-            &self.column_indices,
-            JoinSide::Left,
-        );
 
-        if let Ok(ref batch) = result {
-            self.join_metrics.input_batches.add(1);
-            self.join_metrics.input_rows.add(batch.num_rows());
+        self.join_metrics.input_batches.add(1);
+        self.join_metrics.input_rows.add(left_side.len());
 
-            self.join_metrics.output_batches.add(1);
-        }
         timer.done();
 
         self.state = HashJoinStreamState::Completed;
 
-        Ok(StatefulStreamResult::Ready(Some(result?)))
+        // Push final unmatched indices to output buffer
+        if !left_side.is_empty() {
+            let empty_right_batch = RecordBatch::new_empty(self.right.schema());
+            let batch = build_batch_from_indices(
+                &self.schema,
+                build_side.left_data.batch(),
+                &empty_right_batch,
+                &left_side,
+                &right_side,
+                &self.column_indices,
+                JoinSide::Left,
+            )?;
+            self.output_buffer.push_batch(batch)?;
+        }
+
+        Ok(StatefulStreamResult::Continue)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/join_hash_map.rs b/datafusion/physical-plan/src/joins/join_hash_map.rs
index bdd4bfeeb0fbe..b0ed6dcc7c255 100644
--- a/datafusion/physical-plan/src/joins/join_hash_map.rs
+++ b/datafusion/physical-plan/src/joins/join_hash_map.rs
@@ -22,8 +22,9 @@
 use std::fmt::{self, Debug};
 use std::ops::Sub;
 
-use hashbrown::hash_table::Entry::{Occupied, Vacant};
+use arrow::datatypes::ArrowNativeType;
 use hashbrown::HashTable;
+use hashbrown::hash_table::Entry::{Occupied, Vacant};
 
 /// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value.
 ///
@@ -93,6 +94,12 @@ use hashbrown::HashTable;
 ///
 /// At runtime we choose between using `JoinHashMapU32` and `JoinHashMapU64` which oth implement
 /// `JoinHashMapType`.
+///
+/// ## Note on use of this trait as a public API
+/// This is currently a public trait but is mainly intended for internal use within DataFusion.
+/// For example, we may compare references to `JoinHashMapType` implementations by pointer equality
+/// rather than deep equality of contents, as deep equality would be expensive and in our usage
+/// patterns it is impossible for two different hash maps to have identical contents in a practical sense.
 pub trait JoinHashMapType: Send + Sync {
     fn extend_zero(&mut self, len: usize);
 
@@ -113,10 +120,15 @@ pub trait JoinHashMapType: Send + Sync {
         hash_values: &[u64],
         limit: usize,
         offset: JoinHashMapOffset,
-    ) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>);
+        input_indices: &mut Vec<u32>,
+        match_indices: &mut Vec<u64>,
+    ) -> Option<JoinHashMapOffset>;
 
     /// Returns `true` if the join hash map contains no entries.
     fn is_empty(&self) -> bool;
+
+    /// Returns the number of entries in the join hash map.
+    fn len(&self) -> usize;
 }
 
 pub struct JoinHashMapU32 {
@@ -170,19 +182,27 @@ impl JoinHashMapType for JoinHashMapU32 {
         hash_values: &[u64],
         limit: usize,
         offset: JoinHashMapOffset,
-    ) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>) {
+        input_indices: &mut Vec<u32>,
+        match_indices: &mut Vec<u64>,
+    ) -> Option<JoinHashMapOffset> {
         get_matched_indices_with_limit_offset::<u32>(
             &self.map,
             &self.next,
             hash_values,
             limit,
             offset,
+            input_indices,
+            match_indices,
         )
     }
 
     fn is_empty(&self) -> bool {
         self.map.is_empty()
     }
+
+    fn len(&self) -> usize {
+        self.map.len()
+    }
 }
 
 pub struct JoinHashMapU64 {
@@ -236,57 +256,76 @@ impl JoinHashMapType for JoinHashMapU64 {
         hash_values: &[u64],
         limit: usize,
         offset: JoinHashMapOffset,
-    ) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>) {
+        input_indices: &mut Vec<u32>,
+        match_indices: &mut Vec<u64>,
+    ) -> Option<JoinHashMapOffset> {
         get_matched_indices_with_limit_offset::<u64>(
             &self.map,
             &self.next,
             hash_values,
             limit,
             offset,
+            input_indices,
+            match_indices,
         )
     }
 
     fn is_empty(&self) -> bool {
         self.map.is_empty()
     }
+
+    fn len(&self) -> usize {
+        self.map.len()
+    }
 }
 
 // Type of offsets for obtaining indices from JoinHashMap.
 pub(crate) type JoinHashMapOffset = (usize, Option<u64>);
 
-// Macro for traversing chained values with limit.
-// Early returns in case of reaching output tuples limit.
-macro_rules! chain_traverse {
-    (
-        $input_indices:ident, $match_indices:ident,
-        $hash_values:ident, $next_chain:ident,
-        $input_idx:ident, $chain_idx:ident, $remaining_output:ident, $one:ident, $zero:ident
-    ) => {{
-        // now `one` and `zero` are in scope from the outer function
-        let mut match_row_idx = $chain_idx - $one;
-        loop {
-            $match_indices.push(match_row_idx.into());
-            $input_indices.push($input_idx as u32);
-            $remaining_output -= 1;
-
-            let next = $next_chain[match_row_idx.into() as usize];
-
-            if $remaining_output == 0 {
-                // we compare against `zero` (of type T) here too
-                let next_offset = if $input_idx == $hash_values.len() - 1 && next == $zero
-                {
-                    None
-                } else {
-                    Some(($input_idx, Some(next.into())))
-                };
-                return ($input_indices, $match_indices, next_offset);
-            }
-            if next == $zero {
-                break;
-            }
-            match_row_idx = next - $one;
+/// Traverses the chain of matching indices, collecting results up to the remaining limit.
+/// Returns `Some(offset)` if the limit was reached and there are more results to process,
+/// or `None` if the chain was fully traversed.
+#[inline(always)]
+fn traverse_chain<T>(
+    next_chain: &[T],
+    input_idx: usize,
+    start_chain_idx: T,
+    remaining: &mut usize,
+    input_indices: &mut Vec<u32>,
+    match_indices: &mut Vec<u64>,
+    is_last_input: bool,
+) -> Option<JoinHashMapOffset>
+where
+    T: Copy + TryFrom<usize> + PartialOrd + Into<u64> + Sub<Output = T>,
+    <T as TryFrom<usize>>::Error: Debug,
+    T: ArrowNativeType,
+{
+    let zero = T::usize_as(0);
+    let one = T::usize_as(1);
+    let mut match_row_idx = start_chain_idx - one;
+
+    loop {
+        match_indices.push(match_row_idx.into());
+        input_indices.push(input_idx as u32);
+        *remaining -= 1;
+
+        let next = next_chain[match_row_idx.into() as usize];
+
+        if *remaining == 0 {
+            // Limit reached - return offset for next call
+            return if is_last_input && next == zero {
+                // Finished processing the last input row
+                None
+            } else {
+                Some((input_idx, Some(next.into())))
+            };
         }
-    }};
+        if next == zero {
+            // End of chain
+            return None;
+        }
+        match_row_idx = next - one;
+    }
 }
 
 pub fn update_from_iter<'a, T>(
@@ -376,14 +415,17 @@ pub fn get_matched_indices_with_limit_offset<T>(
     hash_values: &[u64],
     limit: usize,
     offset: JoinHashMapOffset,
-) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>)
+    input_indices: &mut Vec<u32>,
+    match_indices: &mut Vec<u64>,
+) -> Option<JoinHashMapOffset>
 where
     T: Copy + TryFrom<usize> + PartialOrd + Into<u64> + Sub<Output = T>,
     <T as TryFrom<usize>>::Error: Debug,
+    T: ArrowNativeType,
 {
-    let mut input_indices = Vec::with_capacity(limit);
-    let mut match_indices = Vec::with_capacity(limit);
-    let zero = T::try_from(0).unwrap();
+    // Clear the buffer before producing new results
+    input_indices.clear();
+    match_indices.clear();
     let one = T::try_from(1).unwrap();
 
     // Check if hashmap consists of unique values
@@ -397,19 +439,18 @@ where
                 match_indices.push((*idx - one).into());
             }
         }
-        let next_off = if end == hash_values.len() {
+        return if end == hash_values.len() {
             None
         } else {
             Some((end, None))
         };
-        return (input_indices, match_indices, next_off);
     }
 
     let mut remaining_output = limit;
 
     // Calculate initial `hash_values` index before iterating
     let to_skip = match offset {
-        // None `initial_next_idx` indicates that `initial_idx` processing has'n been started
+        // None `initial_next_idx` indicates that `initial_idx` processing hasn't been started
         (idx, None) => idx,
         // Zero `initial_next_idx` indicates that `initial_idx` has been processed during
         // previous iteration, and it should be skipped
@@ -417,39 +458,41 @@ where
         // Otherwise, process remaining `initial_idx` matches by traversing `next_chain`,
         // to start with the next index
         (idx, Some(next_idx)) => {
-            let next_idx: T = T::try_from(next_idx as usize).unwrap();
-            chain_traverse!(
-                input_indices,
-                match_indices,
-                hash_values,
+            let next_idx: T = T::usize_as(next_idx as usize);
+            let is_last = idx == hash_values.len() - 1;
+            if let Some(next_offset) = traverse_chain(
                 next_chain,
                 idx,
                 next_idx,
-                remaining_output,
-                one,
-                zero
-            );
+                &mut remaining_output,
+                input_indices,
+                match_indices,
+                is_last,
+            ) {
+                return Some(next_offset);
+            }
             idx + 1
         }
     };
 
-    let mut row_idx = to_skip;
-    for &hash in &hash_values[to_skip..] {
+    let hash_values_len = hash_values.len();
+    for (i, &hash) in hash_values[to_skip..].iter().enumerate() {
+        let row_idx = to_skip + i;
         if let Some((_, idx)) = map.find(hash, |(h, _)| hash == *h) {
             let idx: T = *idx;
-            chain_traverse!(
-                input_indices,
-                match_indices,
-                hash_values,
+            let is_last = row_idx == hash_values_len - 1;
+            if let Some(next_offset) = traverse_chain(
                 next_chain,
                 row_idx,
                 idx,
-                remaining_output,
-                one,
-                zero
-            );
+                &mut remaining_output,
+                input_indices,
+                match_indices,
+                is_last,
+            ) {
+                return Some(next_offset);
+            }
         }
-        row_idx += 1;
     }
-    (input_indices, match_indices, None)
+    None
 }
diff --git a/datafusion/physical-plan/src/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs
index b0c28cf994f71..3ff61ecf1dacc 100644
--- a/datafusion/physical-plan/src/joins/mod.rs
+++ b/datafusion/physical-plan/src/joins/mod.rs
@@ -20,7 +20,7 @@
 use arrow::array::BooleanBufferBuilder;
 pub use cross_join::CrossJoinExec;
 use datafusion_physical_expr::PhysicalExprRef;
-pub use hash_join::HashJoinExec;
+pub use hash_join::{HashExpr, HashJoinExec, HashTableLookupExpr, SeededRandomState};
 pub use nested_loop_join::NestedLoopJoinExec;
 use parking_lot::Mutex;
 // Note: SortMergeJoin is not used in plans yet
@@ -37,7 +37,11 @@ mod symmetric_hash_join;
 pub mod utils;
 
 mod join_filter;
-mod join_hash_map;
+/// Hash map implementations for join operations.
+///
+/// Note: This module is public for internal testing purposes only
+/// and is not guaranteed to be stable across versions.
+pub mod join_hash_map;
 
 #[cfg(test)]
 pub mod test_utils;
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 1f0cdf391c1f9..76dca7239114b 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -20,8 +20,8 @@
 use std::any::Any;
 use std::fmt::Formatter;
 use std::ops::{BitOr, ControlFlow};
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::task::Poll;
 
 use super::utils::{
@@ -29,19 +29,19 @@ use super::utils::{
     reorder_output_after_swap, swap_join_projection,
 };
 use crate::common::can_project;
-use crate::execution_plan::{boundedness_from_children, EmissionType};
+use crate::execution_plan::{EmissionType, boundedness_from_children};
+use crate::joins::SharedBitmapBuilder;
 use crate::joins::utils::{
+    BuildProbeJoinMetrics, ColumnIndex, JoinFilter, OnceAsync, OnceFut,
     build_join_schema, check_join_is_valid, estimate_join_statistics,
-    need_produce_right_in_final, BuildProbeJoinMetrics, ColumnIndex, JoinFilter,
-    OnceAsync, OnceFut,
+    need_produce_right_in_final,
 };
-use crate::joins::SharedBitmapBuilder;
 use crate::metrics::{
     Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, MetricsSet, RatioMetrics,
 };
 use crate::projection::{
-    try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData,
-    ProjectionExec,
+    EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection,
+    try_pushdown_through_join,
 };
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
@@ -49,33 +49,33 @@ use crate::{
 };
 
 use arrow::array::{
-    new_null_array, Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions,
-    UInt64Array,
+    Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions, UInt32Array,
+    UInt64Array, new_null_array,
 };
 use arrow::buffer::BooleanBuffer;
 use arrow::compute::{
-    concat_batches, filter, filter_record_batch, not, take, BatchCoalescer,
+    BatchCoalescer, concat_batches, filter, filter_record_batch, not, take,
 };
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::DataType;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{
-    arrow_err, internal_datafusion_err, internal_err, project_schema,
-    unwrap_or_internal_err, DataFusionError, JoinSide, Result, ScalarValue, Statistics,
+    JoinSide, Result, ScalarValue, Statistics, arrow_err, assert_eq_or_internal_err,
+    internal_datafusion_err, internal_err, project_schema, unwrap_or_internal_err,
 };
-use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::equivalence::{
-    join_equivalence_properties, ProjectionMapping,
+    ProjectionMapping, join_equivalence_properties,
 };
 
 use futures::{Stream, StreamExt, TryStreamExt};
 use log::debug;
 use parking_lot::Mutex;
 
-#[allow(rustdoc::private_intra_doc_links)]
+#[expect(rustdoc::private_intra_doc_links)]
 /// NestedLoopJoinExec is a build-probe join operator designed for joins that
 /// do not have equijoin keys in their `ON` clause.
 ///
@@ -218,7 +218,7 @@ impl NestedLoopJoinExec {
         let cache = Self::compute_properties(
             &left,
             &right,
-            Arc::clone(&join_schema),
+            &join_schema,
             *join_type,
             projection.as_ref(),
         )?;
@@ -265,7 +265,7 @@ impl NestedLoopJoinExec {
     fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
-        schema: SchemaRef,
+        schema: &SchemaRef,
         join_type: JoinType,
         projection: Option<&Vec<usize>>,
     ) -> Result<PlanProperties> {
@@ -274,7 +274,7 @@ impl NestedLoopJoinExec {
             left.equivalence_properties().clone(),
             right.equivalence_properties().clone(),
             &join_type,
-            Arc::clone(&schema),
+            Arc::clone(schema),
             &Self::maintains_input_order(join_type),
             None,
             // No on columns in nested loop join
@@ -309,9 +309,8 @@ impl NestedLoopJoinExec {
 
         if let Some(projection) = projection {
             // construct a map from the input expressions to the output expression of the Projection
-            let projection_mapping =
-                ProjectionMapping::from_indices(projection, &schema)?;
-            let out_schema = project_schema(&schema, Some(projection))?;
+            let projection_mapping = ProjectionMapping::from_indices(projection, schema)?;
+            let out_schema = project_schema(schema, Some(projection))?;
             output_partitioning =
                 output_partitioning.project(&projection_mapping, &eq_properties);
             eq_properties = eq_properties.project(&projection_mapping, out_schema);
@@ -491,12 +490,12 @@ impl ExecutionPlan for NestedLoopJoinExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if self.left.output_partitioning().partition_count() != 1 {
-            return internal_err!(
-                "Invalid NestedLoopJoinExec, the output partition count of the left child must be 1,\
+        assert_eq_or_internal_err!(
+            self.left.output_partitioning().partition_count(),
+            1,
+            "Invalid NestedLoopJoinExec, the output partition count of the left child must be 1,\
                  consider using CoalescePartitionsExec or the EnforceDistribution rule"
-            );
-        }
+        );
 
         let metrics = NestedLoopJoinMetrics::new(&self.metrics, partition);
 
@@ -554,10 +553,11 @@ impl ExecutionPlan for NestedLoopJoinExec {
         if partition.is_some() {
             return Ok(Statistics::new_unknown(&self.schema()));
         }
+        let join_columns = Vec::new();
         estimate_join_statistics(
             self.left.partition_statistics(None)?,
             self.right.partition_statistics(None)?,
-            vec![],
+            &join_columns,
             &self.join_type,
             &self.schema(),
         )
@@ -575,6 +575,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
             return Ok(None);
         }
 
+        let schema = self.schema();
         if let Some(JoinData {
             projected_left_child,
             projected_right_child,
@@ -585,7 +586,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
             self.left(),
             self.right(),
             &[],
-            self.schema(),
+            &schema,
             self.filter(),
         )? {
             Ok(Some(Arc::new(NestedLoopJoinExec::try_new(
@@ -784,7 +785,7 @@ pub(crate) struct NestedLoopJoinStream {
     left_exhausted: bool,
     /// If we can buffer all left data in one pass
     /// TODO(now): this is for the (unimplemented) memory-limited execution
-    #[allow(dead_code)]
+    #[expect(dead_code)]
     left_buffered_in_one_pass: bool,
 
     // Probe(right) side
@@ -931,7 +932,7 @@ impl Stream for NestedLoopJoinStream {
                     match self.handle_probe_right() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.metrics.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll);
                         }
                     }
                 }
@@ -952,7 +953,7 @@ impl Stream for NestedLoopJoinStream {
                     match self.handle_emit_right_unmatched() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.metrics.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll);
                         }
                     }
                 }
@@ -982,7 +983,7 @@ impl Stream for NestedLoopJoinStream {
                     match self.handle_emit_left_unmatched() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.metrics.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll);
                         }
                     }
                 }
@@ -1012,7 +1013,7 @@ impl RecordBatchStream for NestedLoopJoinStream {
 }
 
 impl NestedLoopJoinStream {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub(crate) fn new(
         schema: Arc<Schema>,
         filter: Option<JoinFilter>,
@@ -1269,11 +1270,54 @@ impl NestedLoopJoinStream {
         // and push the result into output_buffer
         // ========
 
+        // Special case:
+        // When the right batch is very small, join with multiple left rows at once,
+        //
+        // The regular implementation is not efficient if the plan's right child is
+        // very small (e.g. 1 row total), because inside the inner loop of NLJ, it's
+        // handling one input right batch at once, if it's not large enough, the
+        // overheads like filter evaluation can't be amortized through vectorization.
+        debug_assert_ne!(
+            right_batch.num_rows(),
+            0,
+            "When fetching the right batch, empty batches will be skipped"
+        );
+
+        let l_row_cnt_ratio = self.batch_size / right_batch.num_rows();
+        if l_row_cnt_ratio > 10 {
+            // Calculate max left rows to handle at once. This operator tries to handle
+            // up to `datafusion.execution.batch_size` rows at once in the intermediate
+            // batch.
+            let l_row_count = std::cmp::min(
+                l_row_cnt_ratio,
+                left_data.batch().num_rows() - self.left_probe_idx,
+            );
+
+            debug_assert!(
+                l_row_count != 0,
+                "This function should only be entered when there are remaining left rows to process"
+            );
+            let joined_batch = self.process_left_range_join(
+                &left_data,
+                &right_batch,
+                self.left_probe_idx,
+                l_row_count,
+            )?;
+
+            if let Some(batch) = joined_batch {
+                self.output_buffer.push_batch(batch)?;
+            }
+
+            self.left_probe_idx += l_row_count;
+
+            return Ok(true);
+        }
+
         let l_idx = self.left_probe_idx;
-        let join_batch =
+        let joined_batch =
             self.process_single_left_row_join(&left_data, &right_batch, l_idx)?;
 
-        if let Some(batch) = join_batch {
+        if let Some(batch) = joined_batch {
             self.output_buffer.push_batch(batch)?;
         }
 
@@ -1286,8 +1330,196 @@ impl NestedLoopJoinStream {
         Ok(true)
     }
 
+    /// Process [l_start_index, l_start_index + l_count) JOIN right_batch
+    /// Returns a RecordBatch containing the join results (None if empty)
+    ///
+    /// Side Effect: If the join type requires, left or right side matched bitmap
+    /// will be set for matched indices.
+    fn process_left_range_join(
+        &mut self,
+        left_data: &JoinLeftData,
+        right_batch: &RecordBatch,
+        l_start_index: usize,
+        l_row_count: usize,
+    ) -> Result<Option<RecordBatch>> {
+        // Construct the Cartesian product between the specified range of left rows
+        // and the entire right_batch. First, it calculates the index vectors, then
+        // materializes the intermediate batch, and finally applies the join filter
+        // to it.
+        // -----------------------------------------------------------
+        let right_rows = right_batch.num_rows();
+        let total_rows = l_row_count * right_rows;
+
+        // Build index arrays for cartesian product: left_range X right_batch
+        let left_indices: UInt32Array =
+            UInt32Array::from_iter_values((0..l_row_count).flat_map(|i| {
+                std::iter::repeat_n((l_start_index + i) as u32, right_rows)
+            }));
+        let right_indices: UInt32Array = UInt32Array::from_iter_values(
+            (0..l_row_count).flat_map(|_| 0..right_rows as u32),
+        );
+
+        debug_assert!(
+            left_indices.len() == right_indices.len()
+                && right_indices.len() == total_rows,
+            "The length or cartesian product should be (left_size * right_size)",
+        );
+
+        // Evaluate the join filter (if any) over an intermediate batch built
+        // using the filter's own schema/column indices.
+        let bitmap_combined = if let Some(filter) = &self.join_filter {
+            // Build the intermediate batch for filter evaluation
+            let intermediate_batch = if filter.schema.fields().is_empty() {
+                // Constant predicate (e.g., TRUE/FALSE). Use an empty schema with row_count
+                create_record_batch_with_empty_schema(
+                    Arc::new((*filter.schema).clone()),
+                    total_rows,
+                )?
+            } else {
+                let mut filter_columns: Vec<Arc<dyn Array>> =
+                    Vec::with_capacity(filter.column_indices().len());
+                for column_index in filter.column_indices() {
+                    let array = if column_index.side == JoinSide::Left {
+                        let col = left_data.batch().column(column_index.index);
+                        take(col.as_ref(), &left_indices, None)?
+                    } else {
+                        let col = right_batch.column(column_index.index);
+                        take(col.as_ref(), &right_indices, None)?
+                    };
+                    filter_columns.push(array);
+                }
+
+                RecordBatch::try_new(Arc::new((*filter.schema).clone()), filter_columns)?
+            };
+
+            let filter_result = filter
+                .expression()
+                .evaluate(&intermediate_batch)?
+                .into_array(intermediate_batch.num_rows())?;
+            let filter_arr = as_boolean_array(&filter_result)?;
+
+            // Combine with null bitmap to get a unified mask
+            boolean_mask_from_filter(filter_arr)
+        } else {
+            // No filter: all pairs match
+            BooleanArray::from(vec![true; total_rows])
+        };
+
+        // Update the global left or right bitmap for matched indices
+        // -----------------------------------------------------------
+
+        // None means we don't have to update left bitmap for this join type
+        let mut left_bitmap = if need_produce_result_in_final(self.join_type) {
+            Some(left_data.bitmap().lock())
+        } else {
+            None
+        };
+
+        // 'local' meaning: we want to collect 'is_matched' flag for the current
+        // right batch, after it has joining all of the left buffer, here it's only
+        // the partial result for joining given left range
+        let mut local_right_bitmap = if self.should_track_unmatched_right {
+            let mut current_right_batch_bitmap = BooleanBufferBuilder::new(right_rows);
+            // Ensure builder has logical length so set_bit is in-bounds
+            current_right_batch_bitmap.append_n(right_rows, false);
+            Some(current_right_batch_bitmap)
+        } else {
+            None
+        };
+
+        // Set the matched bit for left and right side bitmap
+        for (i, is_matched) in bitmap_combined.iter().enumerate() {
+            let is_matched = is_matched.ok_or_else(|| {
+                internal_datafusion_err!("Must be Some after the previous combining step")
+            })?;
+
+            let l_index = l_start_index + i / right_rows;
+            let r_index = i % right_rows;
+
+            if let Some(bitmap) = left_bitmap.as_mut()
+                && is_matched
+            {
+                // Map local index back to absolute left index within the batch
+                bitmap.set_bit(l_index, true);
+            }
+
+            if let Some(bitmap) = local_right_bitmap.as_mut()
+                && is_matched
+            {
+                bitmap.set_bit(r_index, true);
+            }
+        }
+
+        // Apply the local right bitmap to the global bitmap
+        if self.should_track_unmatched_right {
+            // Remember to put it back after update
+            let global_right_bitmap =
+                std::mem::take(&mut self.current_right_batch_matched).ok_or_else(
+                    || internal_datafusion_err!("right batch's bitmap should be present"),
+                )?;
+            let (buf, nulls) = global_right_bitmap.into_parts();
+            debug_assert!(nulls.is_none());
+
+            let current_right_bitmap = local_right_bitmap
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Should be Some if the current join type requires right bitmap"
+                    )
+                })?
+                .finish();
+            let updated_global_right_bitmap = buf.bitor(&current_right_bitmap);
+
+            self.current_right_batch_matched =
+                Some(BooleanArray::new(updated_global_right_bitmap, None));
+        }
+
+        // For the following join types: only bitmaps are updated; do not emit rows now
+        if matches!(
+            self.join_type,
+            JoinType::LeftAnti
+                | JoinType::LeftSemi
+                | JoinType::LeftMark
+                | JoinType::RightAnti
+                | JoinType::RightMark
+                | JoinType::RightSemi
+        ) {
+            return Ok(None);
+        }
+
+        // Build the projected output batch (using output schema/column_indices),
+        // then apply the bitmap filter to it.
+        if self.output_schema.fields().is_empty() {
+            // Empty projection: only row count matters
+            let row_count = bitmap_combined.true_count();
+            return Ok(Some(create_record_batch_with_empty_schema(
+                Arc::clone(&self.output_schema),
+                row_count,
+            )?));
+        }
+
+        let mut out_columns: Vec<Arc<dyn Array>> =
+            Vec::with_capacity(self.output_schema.fields().len());
+        for column_index in &self.column_indices {
+            let array = if column_index.side == JoinSide::Left {
+                let col = left_data.batch().column(column_index.index);
+                take(col.as_ref(), &left_indices, None)?
+            } else {
+                let col = right_batch.column(column_index.index);
+                take(col.as_ref(), &right_indices, None)?
+            };
+            out_columns.push(array);
+        }
+        let pre_filtered =
+            RecordBatch::try_new(Arc::clone(&self.output_schema), out_columns)?;
+        let filtered = filter_record_batch(&pre_filtered, &bitmap_combined)?;
+        Ok(Some(filtered))
+    }
+
     /// Process a single left row join with the current right batch.
     /// Returns a RecordBatch containing the join results (None if empty)
+    ///
+    /// Side Effect: If the join type requires, left or right side matched bitmap
+    /// will be set for matched indices.
     fn process_single_left_row_join(
         &mut self,
         left_data: &JoinLeftData,
@@ -1427,11 +1659,12 @@ impl NestedLoopJoinStream {
         }
         let bitmap_sliced = BooleanArray::new(bitmap_sliced.finish(), None);
 
+        let right_schema = self.right_data.schema();
         build_unmatched_batch(
-            Arc::clone(&self.output_schema),
+            &self.output_schema,
             &left_batch_sliced,
             bitmap_sliced,
-            self.right_data.schema(),
+            &right_schema,
             &self.column_indices,
             self.join_type,
             JoinSide::Left,
@@ -1454,10 +1687,10 @@ impl NestedLoopJoinStream {
         let left_schema = left_data.batch().schema();
 
         let res = build_unmatched_batch(
-            Arc::clone(&self.output_schema),
+            &self.output_schema,
             &cur_right_batch,
             right_batch_bitmap,
-            left_schema,
+            &left_schema,
             &self.column_indices,
             self.join_type,
             JoinSide::Right,
@@ -1481,18 +1714,14 @@ impl NestedLoopJoinStream {
     /// Flush the `output_buffer` if there are batches ready to output
     /// None if no result batch ready.
     fn maybe_flush_ready_batch(&mut self) -> Option<Poll<Option<Result<RecordBatch>>>> {
-        if self.output_buffer.has_completed_batch() {
-            if let Some(batch) = self.output_buffer.next_completed_batch() {
-                // HACK: this is not part of `BaselineMetrics` yet, so update it
-                // manually
-                self.metrics.join_metrics.output_batches.add(1);
-
-                // Update output rows for selectivity metric
-                let output_rows = batch.num_rows();
-                self.metrics.selectivity.add_part(output_rows);
+        if self.output_buffer.has_completed_batch()
+            && let Some(batch) = self.output_buffer.next_completed_batch()
+        {
+            // Update output rows for selectivity metric
+            let output_rows = batch.num_rows();
+            self.metrics.selectivity.add_part(output_rows);
 
-                return Some(Poll::Ready(Some(Ok(batch))));
-            }
+            return Some(Poll::Ready(Some(Ok(batch))));
         }
 
         None
@@ -1588,22 +1817,26 @@ fn apply_filter_to_row_join_batch(
         .into_array(intermediate_batch.num_rows())?;
     let filter_arr = as_boolean_array(&filter_result)?;
 
-    // [Caution] This step has previously introduced bugs
-    // The filter result is NOT a bitmap; it contains true/false/null values.
-    // For example, 1 < NULL is evaluated to NULL. Therefore, we must combine (AND)
-    // the boolean array with its null bitmap to construct a unified bitmap.
-    let (is_filtered, nulls) = filter_arr.clone().into_parts();
-    let bitmap_combined = match nulls {
-        Some(nulls) => {
-            let combined = nulls.inner() & &is_filtered;
-            BooleanArray::new(combined, None)
-        }
-        None => BooleanArray::new(is_filtered, None),
-    };
+    // Convert boolean array with potential nulls into a unified mask bitmap
+    let bitmap_combined = boolean_mask_from_filter(filter_arr);
 
     Ok(bitmap_combined)
 }
 
+/// Convert a boolean filter array into a unified mask bitmap.
+///
+/// Caution: The filter result is NOT a bitmap; it contains true/false/null values.
+/// For example, `1 < NULL` evaluates to NULL. Therefore, we must combine (AND)
+/// the boolean array with its null bitmap to construct a unified bitmap.
+#[inline]
+fn boolean_mask_from_filter(filter_arr: &BooleanArray) -> BooleanArray {
+    let (values, nulls) = filter_arr.clone().into_parts();
+    match nulls {
+        Some(nulls) => BooleanArray::new(nulls.inner() & &values, None),
+        None => BooleanArray::new(values, None),
+    }
+}
+
 /// This function performs the following steps:
 /// 1. Apply filter to probe-side batch
 /// 2. Broadcast the left row (build_side_batch\[build_side_index\]) to the
@@ -1744,7 +1977,7 @@ fn build_row_join_batch(
 /// If Some, that's the result batch
 /// If None, it's not for this special case. Continue execution.
 fn build_unmatched_batch_empty_schema(
-    output_schema: SchemaRef,
+    output_schema: &SchemaRef,
     batch_bitmap: &BooleanArray,
     // For left/right/full joins, it needs to fill nulls for another side
     join_type: JoinType,
@@ -1762,7 +1995,7 @@ fn build_unmatched_batch_empty_schema(
 
     if output_schema.fields().is_empty() {
         Ok(Some(create_record_batch_with_empty_schema(
-            Arc::clone(&output_schema),
+            Arc::clone(output_schema),
             result_size,
         )?))
     } else {
@@ -1822,11 +2055,11 @@ fn create_record_batch_with_empty_schema(
 /// Null(bool) Null(Int32) 1
 /// Null(bool) Null(Int32) 3
 fn build_unmatched_batch(
-    output_schema: SchemaRef,
+    output_schema: &SchemaRef,
     batch: &RecordBatch,
     batch_bitmap: BooleanArray,
     // For left/right/full joins, it needs to fill nulls for another side
-    another_side_schema: SchemaRef,
+    another_side_schema: &SchemaRef,
     col_indices: &[ColumnIndex],
     join_type: JoinType,
     batch_side: JoinSide,
@@ -1836,11 +2069,9 @@ fn build_unmatched_batch(
     debug_assert_ne!(batch_side, JoinSide::None);
 
     // Handle special case (see function comment)
-    if let Some(batch) = build_unmatched_batch_empty_schema(
-        Arc::clone(&output_schema),
-        &batch_bitmap,
-        join_type,
-    )? {
+    if let Some(batch) =
+        build_unmatched_batch_empty_schema(output_schema, &batch_bitmap, join_type)?
+    {
         return Ok(Some(batch));
     }
 
@@ -1871,9 +2102,7 @@ fn build_unmatched_batch(
                 another_side_schema
                     .fields()
                     .iter()
-                    .map(|field| {
-                        (**field).clone().with_nullable(true)
-                    })
+                    .map(|field| (**field).clone().with_nullable(true))
                     .collect::<Vec<_>>(),
             ));
             let left_null_batch = if nullable_left_schema.fields.is_empty() {
@@ -1887,10 +2116,20 @@ fn build_unmatched_batch(
             debug_assert_ne!(batch_side, JoinSide::None);
             let opposite_side = batch_side.negate();
 
-            build_row_join_batch(&output_schema, &left_null_batch, 0, batch, Some(flipped_bitmap), col_indices, opposite_side)
-
-        },
-        JoinType::RightSemi | JoinType::RightAnti | JoinType::LeftSemi | JoinType::LeftAnti => {
+            build_row_join_batch(
+                output_schema,
+                &left_null_batch,
+                0,
+                batch,
+                Some(flipped_bitmap),
+                col_indices,
+                opposite_side,
+            )
+        }
+        JoinType::RightSemi
+        | JoinType::RightAnti
+        | JoinType::LeftSemi
+        | JoinType::LeftAnti => {
             if matches!(join_type, JoinType::RightSemi | JoinType::RightAnti) {
                 debug_assert_eq!(batch_side, JoinSide::Right);
             }
@@ -1898,7 +2137,8 @@ fn build_unmatched_batch(
                 debug_assert_eq!(batch_side, JoinSide::Left);
             }
 
-            let bitmap = if matches!(join_type, JoinType::LeftSemi | JoinType::RightSemi) {
+            let bitmap = if matches!(join_type, JoinType::LeftSemi | JoinType::RightSemi)
+            {
                 batch_bitmap.clone()
             } else {
                 not(&batch_bitmap)?
@@ -1920,8 +2160,11 @@ fn build_unmatched_batch(
                 columns.push(filtered_col);
             }
 
-            Ok(Some(RecordBatch::try_new(Arc::clone(&output_schema), columns)?))
-        },
+            Ok(Some(RecordBatch::try_new(
+                Arc::clone(output_schema),
+                columns,
+            )?))
+        }
         JoinType::RightMark | JoinType::LeftMark => {
             if join_type == JoinType::RightMark {
                 debug_assert_eq!(batch_side, JoinSide::Right);
@@ -1944,24 +2187,33 @@ fn build_unmatched_batch(
                 } else if column_index.side == JoinSide::None {
                     let right_batch_bitmap = std::mem::take(&mut right_batch_bitmap_opt);
                     match right_batch_bitmap {
-                        Some(right_batch_bitmap) => {columns.push(Arc::new(right_batch_bitmap))},
+                        Some(right_batch_bitmap) => {
+                            columns.push(Arc::new(right_batch_bitmap))
+                        }
                         None => unreachable!("Should only be one mark column"),
                     }
                 } else {
-                    return internal_err!("Not possible to have this join side for RightMark join");
+                    return internal_err!(
+                        "Not possible to have this join side for RightMark join"
+                    );
                 }
             }
 
-            Ok(Some(RecordBatch::try_new(Arc::clone(&output_schema), columns)?))
+            Ok(Some(RecordBatch::try_new(
+                Arc::clone(output_schema),
+                columns,
+            )?))
         }
-        _ => internal_err!("If batch is at right side, this function must be handling Full/Right/RightSemi/RightAnti/RightMark joins"),
+        _ => internal_err!(
+            "If batch is at right side, this function must be handling Full/Right/RightSemi/RightAnti/RightMark joins"
+        ),
     }
 }
 
 #[cfg(test)]
 pub(crate) mod tests {
     use super::*;
-    use crate::test::{assert_join_metrics, TestMemoryExec};
+    use crate::test::{TestMemoryExec, assert_join_metrics};
     use crate::{
         common, expressions::Column, repartition::RepartitionExec, test::build_table_i32,
     };
@@ -1969,7 +2221,7 @@ pub(crate) mod tests {
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field};
     use datafusion_common::test_util::batches_to_sort_string;
-    use datafusion_common::{assert_contains, ScalarValue};
+    use datafusion_common::{ScalarValue, assert_contains};
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
@@ -2017,7 +2269,8 @@ pub(crate) mod tests {
             source = source.try_with_sort_information(vec![ordering]).unwrap();
         }
 
-        Arc::new(TestMemoryExec::update_cache(Arc::new(source)))
+        let source = Arc::new(source);
+        Arc::new(TestMemoryExec::update_cache(&source))
     }
 
     fn build_left_table() -> Arc<dyn ExecutionPlan> {
@@ -2152,13 +2405,13 @@ pub(crate) mod tests {
         .await?;
 
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 5  | 5  | 50 | 2  | 2  | 80 |
-            +----+----+----+----+----+----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b2 | c2 |
+        +----+----+----+----+----+----+
+        | 5  | 5  | 50 | 2  | 2  | 80 |
+        +----+----+----+----+----+----+
+        "));
 
         assert_join_metrics!(metrics, 1);
 
@@ -2182,15 +2435,15 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+----+----+----+
-            | a1 | b1 | c1  | a2 | b2 | c2 |
-            +----+----+-----+----+----+----+
-            | 11 | 8  | 110 |    |    |    |
-            | 5  | 5  | 50  | 2  | 2  | 80 |
-            | 9  | 8  | 90  |    |    |    |
-            +----+----+-----+----+----+----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+----+----+----+
+        | a1 | b1 | c1  | a2 | b2 | c2 |
+        +----+----+-----+----+----+----+
+        | 11 | 8  | 110 |    |    |    |
+        | 5  | 5  | 50  | 2  | 2  | 80 |
+        | 9  | 8  | 90  |    |    |    |
+        +----+----+-----+----+----+----+
+        "));
 
         assert_join_metrics!(metrics, 3);
 
@@ -2214,15 +2467,15 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+----+----+-----+
-            | a1 | b1 | c1 | a2 | b2 | c2  |
-            +----+----+----+----+----+-----+
-            |    |    |    | 10 | 10 | 100 |
-            |    |    |    | 12 | 10 | 40  |
-            | 5  | 5  | 50 | 2  | 2  | 80  |
-            +----+----+----+----+----+-----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+----+----+----+-----+
+        | a1 | b1 | c1 | a2 | b2 | c2  |
+        +----+----+----+----+----+-----+
+        |    |    |    | 10 | 10 | 100 |
+        |    |    |    | 12 | 10 | 40  |
+        | 5  | 5  | 50 | 2  | 2  | 80  |
+        +----+----+----+----+----+-----+
+        "));
 
         assert_join_metrics!(metrics, 3);
 
@@ -2246,17 +2499,17 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+----+----+-----+
-            | a1 | b1 | c1  | a2 | b2 | c2  |
-            +----+----+-----+----+----+-----+
-            |    |    |     | 10 | 10 | 100 |
-            |    |    |     | 12 | 10 | 40  |
-            | 11 | 8  | 110 |    |    |     |
-            | 5  | 5  | 50  | 2  | 2  | 80  |
-            | 9  | 8  | 90  |    |    |     |
-            +----+----+-----+----+----+-----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+----+----+-----+
+        | a1 | b1 | c1  | a2 | b2 | c2  |
+        +----+----+-----+----+----+-----+
+        |    |    |     | 10 | 10 | 100 |
+        |    |    |     | 12 | 10 | 40  |
+        | 11 | 8  | 110 |    |    |     |
+        | 5  | 5  | 50  | 2  | 2  | 80  |
+        | 9  | 8  | 90  |    |    |     |
+        +----+----+-----+----+----+-----+
+        "));
 
         assert_join_metrics!(metrics, 5);
 
@@ -2282,13 +2535,13 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c1 |
-            +----+----+----+
-            | 5  | 5  | 50 |
-            +----+----+----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+----+
+        | a1 | b1 | c1 |
+        +----+----+----+
+        | 5  | 5  | 50 |
+        +----+----+----+
+        "));
 
         assert_join_metrics!(metrics, 1);
 
@@ -2314,14 +2567,14 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+
-            | a1 | b1 | c1  |
-            +----+----+-----+
-            | 11 | 8  | 110 |
-            | 9  | 8  | 90  |
-            +----+----+-----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+
+        | a1 | b1 | c1  |
+        +----+----+-----+
+        | 11 | 8  | 110 |
+        | 9  | 8  | 90  |
+        +----+----+-----+
+        "));
 
         assert_join_metrics!(metrics, 2);
 
@@ -2367,13 +2620,13 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+
-            | a2 | b2 | c2 |
-            +----+----+----+
-            | 2  | 2  | 80 |
-            +----+----+----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+----+
+        | a2 | b2 | c2 |
+        +----+----+----+
+        | 2  | 2  | 80 |
+        +----+----+----+
+        "));
 
         assert_join_metrics!(metrics, 1);
 
@@ -2399,14 +2652,14 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a2", "b2", "c2"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+
-            | a2 | b2 | c2  |
-            +----+----+-----+
-            | 10 | 10 | 100 |
-            | 12 | 10 | 40  |
-            +----+----+-----+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+
+        | a2 | b2 | c2  |
+        +----+----+-----+
+        | 10 | 10 | 100 |
+        | 12 | 10 | 40  |
+        +----+----+-----+
+        "));
 
         assert_join_metrics!(metrics, 2);
 
@@ -2432,15 +2685,15 @@ pub(crate) mod tests {
         )
         .await?;
         assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+-------+
-            | a1 | b1 | c1  | mark  |
-            +----+----+-----+-------+
-            | 11 | 8  | 110 | false |
-            | 5  | 5  | 50  | true  |
-            | 9  | 8  | 90  | false |
-            +----+----+-----+-------+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+-------+
+        | a1 | b1 | c1  | mark  |
+        +----+----+-----+-------+
+        | 11 | 8  | 110 | false |
+        | 5  | 5  | 50  | true  |
+        | 9  | 8  | 90  | false |
+        +----+----+-----+-------+
+        "));
 
         assert_join_metrics!(metrics, 3);
 
@@ -2467,15 +2720,15 @@ pub(crate) mod tests {
         .await?;
         assert_eq!(columns, vec!["a2", "b2", "c2", "mark"]);
 
-        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+-----+-------+
-            | a2 | b2 | c2  | mark  |
-            +----+----+-----+-------+
-            | 10 | 10 | 100 | false |
-            | 12 | 10 | 40  | false |
-            | 2  | 2  | 80  | true  |
-            +----+----+-----+-------+
-            "#));
+        allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +----+----+-----+-------+
+        | a2 | b2 | c2  | mark  |
+        +----+----+-----+-------+
+        | 10 | 10 | 100 | false |
+        | 12 | 10 | 40  | false |
+        | 2  | 2  | 80  | true  |
+        +----+----+-----+-------+
+        "));
 
         assert_join_metrics!(metrics, 3);
 
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
index 646905e0d7875..04daa3698d920 100644
--- a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
@@ -17,8 +17,8 @@
 
 //! Stream Implementation for PiecewiseMergeJoin's Classic Join (Left, Right, Full, Inner)
 
-use arrow::array::{new_null_array, Array, PrimitiveBuilder};
-use arrow::compute::{take, BatchCoalescer};
+use arrow::array::{Array, PrimitiveBuilder, new_null_array};
+use arrow::compute::{BatchCoalescer, take};
 use arrow::datatypes::UInt32Type;
 use arrow::{
     array::{ArrayRef, RecordBatch, UInt32Array},
@@ -26,7 +26,7 @@ use arrow::{
 };
 use arrow_schema::{Schema, SchemaRef, SortOptions};
 use datafusion_common::NullEquality;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 use datafusion_expr::{JoinType, Operator};
 use datafusion_physical_expr::PhysicalExprRef;
@@ -37,8 +37,8 @@ use std::{sync::Arc, task::Poll};
 use crate::handle_state;
 use crate::joins::piecewise_merge_join::exec::{BufferedSide, BufferedSideReadyState};
 use crate::joins::piecewise_merge_join::utils::need_produce_result_in_final;
-use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap};
 use crate::joins::utils::{BuildProbeJoinMetrics, StatefulStreamResult};
+use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap};
 
 pub(super) enum PiecewiseMergeJoinStreamState {
     WaitBufferedSide,
@@ -70,7 +70,6 @@ pub(super) struct SortedStreamBatch {
 }
 
 impl SortedStreamBatch {
-    #[allow(dead_code)]
     fn new(batch: RecordBatch, compare_key_values: Vec<ArrayRef>) -> Self {
         Self {
             batch,
@@ -132,7 +131,7 @@ impl RecordBatchStream for ClassicPWMJStream {
 //      `Completed` however for Full and Right we will need to process the unmatched buffered rows.
 impl ClassicPWMJStream {
     // Creates a new `PiecewiseMergeJoinStream` instance
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub fn try_new(
         schema: Arc<Schema>,
         on_streamed: PhysicalExprRef,
@@ -189,11 +188,12 @@ impl ClassicPWMJStream {
         cx: &mut std::task::Context<'_>,
     ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
         let build_timer = self.join_metrics.build_time.timer();
-        let buffered_data = ready!(self
-            .buffered_side
-            .try_as_initial_mut()?
-            .buffered_fut
-            .get_shared(cx))?;
+        let buffered_data = ready!(
+            self.buffered_side
+                .try_as_initial_mut()?
+                .buffered_fut
+                .get_shared(cx)
+        )?;
         build_timer.done();
 
         // We will start fetching stream batches for classic joins
@@ -248,10 +248,7 @@ impl ClassicPWMJStream {
                 // Reset BatchProcessState before processing a new stream batch
                 self.batch_process_state.reset();
                 self.state = PiecewiseMergeJoinStreamState::ProcessStreamBatch(
-                    SortedStreamBatch {
-                        batch: stream_batch,
-                        compare_key_values: vec![stream_values],
-                    },
+                    SortedStreamBatch::new(stream_batch, vec![stream_values]),
                 );
             }
             Some(Err(err)) => return Poll::Ready(Err(err)),
@@ -280,7 +277,7 @@ impl ClassicPWMJStream {
         let batch = resolve_classic_join(
             buffered_side,
             stream_batch,
-            Arc::clone(&self.schema),
+            &self.schema,
             self.operator,
             self.sort_option,
             self.join_type,
@@ -451,11 +448,10 @@ impl Stream for ClassicPWMJStream {
 }
 
 // For Left, Right, Full, and Inner joins, incoming stream batches will already be sorted.
-#[allow(clippy::too_many_arguments)]
 fn resolve_classic_join(
     buffered_side: &mut BufferedSideReadyState,
     stream_batch: &SortedStreamBatch,
-    join_schema: Arc<Schema>,
+    join_schema: &SchemaRef,
     operator: Operator,
     sort_options: SortOptions,
     join_type: JoinType,
@@ -504,7 +500,7 @@ fn resolve_classic_join(
                             buffered_side,
                             stream_batch,
                             join_type,
-                            Arc::clone(&join_schema),
+                            join_schema,
                         )?;
 
                         batch_process_state.output_batches.push_batch(batch)?;
@@ -532,7 +528,7 @@ fn resolve_classic_join(
                             buffered_side,
                             stream_batch,
                             join_type,
-                            Arc::clone(&join_schema),
+                            join_schema,
                         )?;
 
                         // Flush batch and update pointers if we have a completed batch
@@ -553,7 +549,7 @@ fn resolve_classic_join(
                     return internal_err!(
                         "PiecewiseMergeJoin should not contain operator, {}",
                         operator
-                    )
+                    );
                 }
             };
 
@@ -579,14 +575,14 @@ fn resolve_classic_join(
         let batch = create_unmatched_batch(
             &mut batch_process_state.unmatched_indices,
             stream_batch,
-            Arc::clone(&join_schema),
+            join_schema,
         )?;
 
         batch_process_state.output_batches.push_batch(batch)?;
     }
 
     batch_process_state.continue_process = false;
-    Ok(RecordBatch::new_empty(Arc::clone(&join_schema)))
+    Ok(RecordBatch::new_empty(Arc::clone(join_schema)))
 }
 
 // Builds a record batch from indices ranges on the buffered and streamed side.
@@ -599,7 +595,7 @@ fn build_matched_indices_and_set_buffered_bitmap(
     buffered_side: &mut BufferedSideReadyState,
     stream_batch: &SortedStreamBatch,
     join_type: JoinType,
-    join_schema: Arc<Schema>,
+    join_schema: &SchemaRef,
 ) -> Result<RecordBatch> {
     // Mark the buffered indices as visited
     if need_produce_result_in_final(join_type) {
@@ -622,7 +618,7 @@ fn build_matched_indices_and_set_buffered_bitmap(
     buffered_columns.extend(streamed_columns);
 
     Ok(RecordBatch::try_new(
-        Arc::clone(&join_schema),
+        Arc::clone(join_schema),
         buffered_columns,
     )?)
 }
@@ -631,7 +627,7 @@ fn build_matched_indices_and_set_buffered_bitmap(
 fn create_unmatched_batch(
     streamed_indices: &mut PrimitiveBuilder<UInt32Type>,
     stream_batch: &SortedStreamBatch,
-    join_schema: Arc<Schema>,
+    join_schema: &SchemaRef,
 ) -> Result<RecordBatch> {
     let streamed_indices = streamed_indices.finish();
     let new_stream_batch = take_record_batch(&stream_batch.batch, &streamed_indices)?;
@@ -649,7 +645,7 @@ fn create_unmatched_batch(
     buffered_columns.extend(streamed_columns);
 
     Ok(RecordBatch::try_new(
-        Arc::clone(&join_schema),
+        Arc::clone(join_schema),
         buffered_columns,
     )?)
 }
@@ -658,17 +654,16 @@ fn create_unmatched_batch(
 mod tests {
     use super::*;
     use crate::{
-        common,
+        ExecutionPlan, common,
         joins::PiecewiseMergeJoinExec,
-        test::{build_table_i32, TestMemoryExec},
-        ExecutionPlan,
+        test::{TestMemoryExec, build_table_i32},
     };
     use arrow::array::{Date32Array, Date64Array};
     use arrow_schema::{DataType, Field};
     use datafusion_common::test_util::batches_to_string;
     use datafusion_execution::TaskContext;
     use datafusion_expr::JoinType;
-    use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+    use datafusion_physical_expr::{PhysicalExpr, expressions::Column};
     use insta::assert_snapshot;
     use std::sync::Arc;
 
@@ -808,7 +803,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -819,7 +814,7 @@ mod tests {
         | 3  | 1  | 9  | 20 | 3  | 80 |
         | 3  | 1  | 9  | 10 | 2  | 70 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -859,18 +854,18 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b1 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 3  | 7  | 30 | 4  | 90 |
-            | 2  | 2  | 8  | 30 | 4  | 90 |
-            | 3  | 1  | 9  | 30 | 4  | 90 |
-            | 2  | 2  | 8  | 10 | 3  | 70 |
-            | 3  | 1  | 9  | 10 | 3  | 70 |
-            | 3  | 1  | 9  | 20 | 2  | 80 |
-            +----+----+----+----+----+----+
-        "#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 3  | 7  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 30 | 4  | 90 |
+        | 3  | 1  | 9  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 10 | 3  | 70 |
+        | 3  | 1  | 9  | 10 | 3  | 70 |
+        | 3  | 1  | 9  | 20 | 2  | 80 |
+        +----+----+----+----+----+----+
+        ");
         Ok(())
     }
 
@@ -910,7 +905,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::GtEq, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -923,7 +918,7 @@ mod tests {
         | 2  | 3  | 8  | 10 | 3  | 70 |
         | 3  | 4  | 9  | 10 | 3  | 70 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -958,12 +953,12 @@ mod tests {
         );
         let (_, batches) =
             join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1001,7 +996,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::GtEq, JoinType::Full).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+-----+----+----+-----+
         | a1 | b1 | c1  | a2 | b1 | c2  |
         +----+----+-----+----+----+-----+
@@ -1009,7 +1004,7 @@ mod tests {
         |    |    |     | 10 | 3  | 300 |
         | 1  | 1  | 100 |    |    |     |
         +----+----+-----+----+----+-----+
-        "#);
+        ");
 
         Ok(())
     }
@@ -1050,7 +1045,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Gt, JoinType::Left).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1061,7 +1056,7 @@ mod tests {
         | 3  | 4  | 9  | 10 | 3  | 70 |
         | 1  | 1  | 7  |    |    |    |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1101,7 +1096,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Gt, JoinType::Right).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1110,7 +1105,7 @@ mod tests {
         | 3  | 4  | 9  | 20 | 3  | 80 |
         |    |    |    | 10 | 5  | 70 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1150,7 +1145,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1160,7 +1155,7 @@ mod tests {
         | 3  | 1  | 9  | 20 | 3  | 80 |
         | 3  | 1  | 9  | 10 | 2  | 70 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1201,7 +1196,7 @@ mod tests {
             join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
 
         // Expected grouping follows right.b1 descending (4, 3, 2)
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1211,7 +1206,7 @@ mod tests {
         | 3  | 2  | 9  | 20 | 3  | 80 |
         | 3  | 2  | 9  | 30 | 2  | 90 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1252,7 +1247,7 @@ mod tests {
             join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
 
         // Grouped by right in ascending evaluation for > (1,2,3)
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1261,7 +1256,7 @@ mod tests {
         | 3  | 4  | 9  | 30 | 2  | 90 |
         | 3  | 4  | 9  | 10 | 3  | 70 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1295,7 +1290,7 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::LtEq, JoinType::Left).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
@@ -1303,7 +1298,7 @@ mod tests {
         | 1  | 5  | 7  |    |    |    |
         | 2  | 4  | 8  |    |    |    |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1341,14 +1336,14 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::GtEq, JoinType::Right).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
         |    |    |    | 10 | 3  | 70 |
         |    |    |    | 20 | 5  | 80 |
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1370,13 +1365,13 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+-----+----+----+----+
         | a1 | b1 | c1  | a2 | b1 | c2 |
         +----+----+-----+----+----+----+
         | 42 | 5  | 999 | 30 | 7  | 90 |
         +----+----+-----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1402,12 +1397,12 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +----+----+----+----+----+----+
         | a1 | b1 | c1 | a2 | b1 | c2 |
         +----+----+----+----+----+----+
         +----+----+----+----+----+----+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1447,13 +1442,13 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
-    +------------+------------+------------+------------+------------+------------+
-    | a1         | b1         | c1         | a2         | b1         | c2         |
-    +------------+------------+------------+------------+------------+------------+
-    | 1970-01-04 | 2022-04-23 | 1970-01-10 | 1970-01-31 | 2022-04-25 | 1970-04-01 |
-    +------------+------------+------------+------------+------------+------------+
-    "#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +------------+------------+------------+------------+------------+------------+
+        | a1         | b1         | c1         | a2         | b1         | c2         |
+        +------------+------------+------------+------------+------------+------------+
+        | 1970-01-04 | 2022-04-23 | 1970-01-10 | 1970-01-31 | 2022-04-25 | 1970-04-01 |
+        +------------+------------+------------+------------+------------+------------+
+        ");
         Ok(())
     }
 
@@ -1493,13 +1488,13 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
+        assert_snapshot!(batches_to_string(&batches), @r"
         +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
         | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
         +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
         | 1970-01-01T00:00:00.003 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.009 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
         +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-        "#);
+        ");
         Ok(())
     }
 
@@ -1537,14 +1532,14 @@ mod tests {
         let (_, batches) =
             join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
-    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-    | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
-    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-    | 1970-01-01T00:00:00.002 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.020 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
-    |                         |                     |                         | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.080 |
-    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-"#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | 1970-01-01T00:00:00.002 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.020 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+        |                         |                     |                         | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.080 |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        ");
         Ok(())
     }
 }
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
index a9ea92f2d92da..508be2e3984f4 100644
--- a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
@@ -23,10 +23,10 @@ use arrow::{
 };
 use arrow_schema::{SchemaRef, SortOptions};
 use datafusion_common::not_impl_err;
-use datafusion_common::{internal_err, JoinSide, Result};
+use datafusion_common::{JoinSide, Result, internal_err};
 use datafusion_execution::{
-    memory_pool::{MemoryConsumer, MemoryReservation},
     SendableRecordBatchStream,
+    memory_pool::{MemoryConsumer, MemoryReservation},
 };
 use datafusion_expr::{JoinType, Operator};
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
@@ -38,10 +38,10 @@ use datafusion_physical_expr_common::physical_expr::fmt_sql;
 use futures::TryStreamExt;
 use parking_lot::Mutex;
 use std::fmt::Formatter;
-use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;
+use std::sync::atomic::AtomicUsize;
 
-use crate::execution_plan::{boundedness_from_children, EmissionType};
+use crate::execution_plan::{EmissionType, boundedness_from_children};
 
 use crate::joins::piecewise_merge_join::classic_join::{
     ClassicPWMJStream, PiecewiseMergeJoinStreamState,
@@ -50,16 +50,17 @@ use crate::joins::piecewise_merge_join::utils::{
     build_visited_indices_map, is_existence_join, is_right_existence_join,
 };
 use crate::joins::utils::asymmetric_join_output_partitioning;
+use crate::metrics::MetricsSet;
+use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties};
 use crate::{
+    ExecutionPlan, PlanProperties,
     joins::{
-        utils::{build_join_schema, BuildProbeJoinMetrics, OnceAsync, OnceFut},
         SharedBitmapBuilder,
+        utils::{BuildProbeJoinMetrics, OnceAsync, OnceFut, build_join_schema},
     },
     metrics::ExecutionPlanMetricsSet,
     spill::get_record_batch_memory_size,
-    ExecutionPlan, PlanProperties,
 };
-use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties};
 
 /// `PiecewiseMergeJoinExec` is a join execution plan that only evaluates single range filter and show much
 /// better performance for these workloads than `NestedLoopJoin`
@@ -272,7 +273,7 @@ pub struct PiecewiseMergeJoinExec {
     left_child_plan_required_order: LexOrdering,
     /// The right sort order, descending for `<`, `<=` operations + ascending for `>`, `>=` operations
     /// Unsorted for mark joins
-    #[allow(unused)]
+    #[expect(dead_code)]
     right_batch_required_orders: LexOrdering,
 
     /// This determines the sort order of all join columns used in sorting the stream and buffered execution plans.
@@ -321,7 +322,7 @@ impl PiecewiseMergeJoinExec {
             _ => {
                 return internal_err!(
                     "Cannot contain non-range operator in PiecewiseMergeJoinExec"
-                )
+                );
             }
         };
 
@@ -572,6 +573,10 @@ impl ExecutionPlan for PiecewiseMergeJoinExec {
             )))
         }
     }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
 }
 
 impl DisplayAs for PiecewiseMergeJoinExec {
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs
index 592878a3bb1c5..5560c29d546b3 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs
@@ -23,19 +23,19 @@ use std::any::Any;
 use std::fmt::Formatter;
 use std::sync::Arc;
 
-use crate::execution_plan::{boundedness_from_children, EmissionType};
+use crate::execution_plan::{EmissionType, boundedness_from_children};
 use crate::expressions::PhysicalSortExpr;
 use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics;
 use crate::joins::sort_merge_join::stream::SortMergeJoinStream;
 use crate::joins::utils::{
-    build_join_schema, check_join_is_valid, estimate_join_statistics,
-    reorder_output_after_swap, symmetric_join_output_partitioning, JoinFilter, JoinOn,
-    JoinOnRef,
+    JoinFilter, JoinOn, JoinOnRef, build_join_schema, check_join_is_valid,
+    estimate_join_statistics, reorder_output_after_swap,
+    symmetric_join_output_partitioning,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::projection::{
-    join_allows_pushdown, join_table_borders, new_join_children,
-    physical_to_column_exprs, update_join_on, ProjectionExec,
+    ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children,
+    physical_to_column_exprs, update_join_on,
 };
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
@@ -45,12 +45,13 @@ use crate::{
 use arrow::compute::SortOptions;
 use arrow::datatypes::SchemaRef;
 use datafusion_common::{
-    internal_err, plan_err, JoinSide, JoinType, NullEquality, Result,
+    JoinSide, JoinType, NullEquality, Result, assert_eq_or_internal_err, internal_err,
+    plan_err,
 };
-use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
-use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
+use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements};
 
 /// Join execution plan that executes equi-join predicates on multiple partitions using Sort-Merge
@@ -359,7 +360,8 @@ impl DisplayAs for SortMergeJoinExec {
                     };
                 write!(
                     f,
-                    "SortMergeJoin: join_type={:?}, on=[{}]{}{}",
+                    "{}: join_type={:?}, on=[{}]{}{}",
+                    Self::static_name(),
                     self.join_type,
                     on,
                     self.filter.as_ref().map_or_else(
@@ -459,12 +461,12 @@ impl ExecutionPlan for SortMergeJoinExec {
     ) -> Result<SendableRecordBatchStream> {
         let left_partitions = self.left.output_partitioning().partition_count();
         let right_partitions = self.right.output_partitioning().partition_count();
-        if left_partitions != right_partitions {
-            return internal_err!(
-                "Invalid SortMergeJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
+        assert_eq_or_internal_err!(
+            left_partitions,
+            right_partitions,
+            "Invalid SortMergeJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
                  consider using RepartitionExec"
-            );
-        }
+        );
         let (on_left, on_right) = self.on.iter().cloned().unzip();
         let (streamed, buffered, on_streamed, on_buffered) =
             if SortMergeJoinExec::probe_side(&self.join_type) == JoinSide::Left {
@@ -531,7 +533,7 @@ impl ExecutionPlan for SortMergeJoinExec {
         estimate_join_statistics(
             self.left.partition_statistics(None)?,
             self.right.partition_statistics(None)?,
-            self.on.clone(),
+            &self.on,
             &self.join_type,
             &self.schema,
         )
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs b/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
index 5920cd663a775..8457408919e63 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
@@ -23,7 +23,6 @@ use crate::metrics::{
 };
 
 /// Metrics for SortMergeJoinExec
-#[allow(dead_code)]
 pub(super) struct SortMergeJoinMetrics {
     /// Total time for joining probe-side batches to the build-side batches
     join_time: Time,
@@ -31,8 +30,6 @@ pub(super) struct SortMergeJoinMetrics {
     input_batches: Count,
     /// Number of rows consumed by this operator
     input_rows: Count,
-    /// Number of batches produced by this operator
-    output_batches: Count,
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
     /// Peak memory used for buffered data.
@@ -43,14 +40,11 @@ pub(super) struct SortMergeJoinMetrics {
 }
 
 impl SortMergeJoinMetrics {
-    #[allow(dead_code)]
     pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self {
         let join_time = MetricBuilder::new(metrics).subset_time("join_time", partition);
         let input_batches =
             MetricBuilder::new(metrics).counter("input_batches", partition);
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
         let peak_mem_used = MetricBuilder::new(metrics).gauge("peak_mem_used", partition);
         let spill_metrics = SpillMetrics::new(metrics, partition);
 
@@ -60,7 +54,6 @@ impl SortMergeJoinMetrics {
             join_time,
             input_batches,
             input_rows,
-            output_batches,
             baseline_metrics,
             peak_mem_used,
             spill_metrics,
@@ -82,9 +75,6 @@ impl SortMergeJoinMetrics {
     pub fn input_rows(&self) -> Count {
         self.input_rows.clone()
     }
-    pub fn output_batches(&self) -> Count {
-        self.output_batches.clone()
-    }
 
     pub fn peak_mem_used(&self) -> Gauge {
         self.peak_mem_used.clone()
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
index 1185866b9f46e..b36992caf4b45 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
@@ -28,27 +28,29 @@ use std::io::BufReader;
 use std::mem::size_of;
 use std::ops::Range;
 use std::pin::Pin;
+use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering::Relaxed;
-use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics;
-use crate::joins::utils::{compare_join_arrays, JoinFilter};
+use crate::joins::utils::{JoinFilter, compare_join_arrays};
+use crate::metrics::RecordOutput;
 use crate::spill::spill_manager::SpillManager;
 use crate::{PhysicalExpr, RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::{types::UInt64Type, *};
 use arrow::compute::{
-    self, concat_batches, filter_record_batch, is_not_null, take, SortOptions,
+    self, BatchCoalescer, SortOptions, concat_batches, filter_record_batch, is_not_null,
+    take,
 };
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use arrow::error::ArrowError;
 use arrow::ipc::reader::StreamReader;
 use datafusion_common::config::SpillCompression;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, DataFusionError, HashSet, JoinSide, JoinType,
-    NullEquality, Result,
+    DataFusionError, HashSet, JoinSide, JoinType, NullEquality, Result, exec_err,
+    internal_err, not_impl_err,
 };
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::MemoryReservation;
@@ -154,21 +156,39 @@ impl StreamedBatch {
         }
     }
 
+    /// Number of unfrozen output pairs in this streamed batch
+    fn num_output_rows(&self) -> usize {
+        self.output_indices
+            .iter()
+            .map(|chunk| chunk.streamed_indices.len())
+            .sum()
+    }
+
     /// Appends new pair consisting of current streamed index and `buffered_idx`
     /// index of buffered batch with `buffered_batch_idx` index.
     fn append_output_pair(
         &mut self,
         buffered_batch_idx: Option<usize>,
         buffered_idx: Option<usize>,
+        batch_size: usize,
+        num_unfrozen_pairs: usize,
     ) {
         // If no current chunk exists or current chunk is not for current buffered batch,
         // create a new chunk
         if self.output_indices.is_empty() || self.buffered_batch_idx != buffered_batch_idx
         {
+            // Compute capacity only when creating a new chunk (infrequent operation).
+            // The capacity is the remaining space to reach batch_size.
+            // This should always be >= 1 since we only call this when num_unfrozen_pairs < batch_size.
+            debug_assert!(
+                batch_size > num_unfrozen_pairs,
+                "batch_size ({batch_size}) must be > num_unfrozen_pairs ({num_unfrozen_pairs})"
+            );
+            let capacity = batch_size - num_unfrozen_pairs;
             self.output_indices.push(StreamedJoinedChunk {
                 buffered_batch_idx,
-                streamed_indices: UInt64Builder::with_capacity(1),
-                buffered_indices: UInt64Builder::with_capacity(1),
+                streamed_indices: UInt64Builder::with_capacity(capacity),
+                buffered_indices: UInt64Builder::with_capacity(capacity),
             });
             self.buffered_batch_idx = buffered_batch_idx;
         };
@@ -319,14 +339,10 @@ pub(super) struct SortMergeJoinStream {
     /// Current state of the stream
     pub state: SortMergeJoinState,
     /// Staging output array builders
-    pub staging_output_record_batches: JoinedRecordBatches,
+    pub joined_record_batches: JoinedRecordBatches,
     /// Output buffer. Currently used by filtering as it requires double buffering
     /// to avoid small/empty batches. Non-filtered join outputs directly from `staging_output_record_batches.batches`
-    pub output: RecordBatch,
-    /// Staging output size, including output batches and staging joined results.
-    /// Increased when we put rows into buffer and decreased after we actually output batches.
-    /// Used to trigger output when sufficient rows are ready
-    pub output_size: usize,
+    pub output: BatchCoalescer,
     /// The comparison result of current streamed row and buffered batches
     pub current_ordering: Ordering,
     /// Manages the process of spilling and reading back intermediate data
@@ -346,26 +362,199 @@ pub(super) struct SortMergeJoinStream {
     pub streamed_batch_counter: AtomicUsize,
 }
 
-/// Joined batches with attached join filter information
+/// Staging area for joined data before output
+///
+/// Accumulates joined rows until either:
+/// - Target batch size reached (for efficiency)
+/// - Stream exhausted (flush remaining data)
 pub(super) struct JoinedRecordBatches {
     /// Joined batches. Each batch is already joined columns from left and right sources
-    pub batches: Vec<RecordBatch>,
-    /// Filter match mask for each row(matched/non-matched)
-    pub filter_mask: BooleanBuilder,
-    /// Left row indices to glue together rows in `batches` and `filter_mask`
-    pub row_indices: UInt64Builder,
-    /// Which unique batch id the row belongs to
-    /// It is necessary to differentiate rows that are distributed the way when they point to the same
-    /// row index but in not the same batches
-    pub batch_ids: Vec<usize>,
+    pub(super) joined_batches: BatchCoalescer,
+    /// Did each output row pass the join filter? (detect if input row found any match)
+    pub(super) filter_mask: BooleanBuilder,
+    /// Which input row (within batch) produced each output row? (for grouping by input row)
+    pub(super) row_indices: UInt64Builder,
+    /// Which input batch did each output row come from? (disambiguate row_indices)
+    pub(super) batch_ids: Vec<usize>,
 }
 
 impl JoinedRecordBatches {
-    fn clear(&mut self) {
-        self.batches.clear();
+    /// Concatenates all accumulated batches into a single RecordBatch
+    ///
+    /// Must drain ALL batches from BatchCoalescer for filtered joins to ensure
+    /// metadata alignment when applying get_corrected_filter_mask().
+    pub(super) fn concat_batches(&mut self, schema: &SchemaRef) -> Result<RecordBatch> {
+        self.joined_batches.finish_buffered_batch()?;
+
+        let mut all_batches = vec![];
+        while let Some(batch) = self.joined_batches.next_completed_batch() {
+            all_batches.push(batch);
+        }
+
+        match all_batches.as_slice() {
+            [] => unreachable!("concat_batches called with empty BatchCoalescer"),
+            [single_batch] => Ok(single_batch.clone()),
+            multiple_batches => Ok(concat_batches(schema, multiple_batches)?),
+        }
+    }
+
+    /// Finishes and returns the metadata arrays, clearing the builders
+    ///
+    /// Returns (row_indices, filter_mask, batch_ids_ref)
+    /// Note: batch_ids is returned as a reference since it's still needed in the struct
+    fn finish_metadata(&mut self) -> (UInt64Array, BooleanArray, &[usize]) {
+        let row_indices = self.row_indices.finish();
+        let filter_mask = self.filter_mask.finish();
+        (row_indices, filter_mask, &self.batch_ids)
+    }
+
+    /// Clears batches without touching metadata (for early return when no filtering needed)
+    fn clear_batches(&mut self, schema: &SchemaRef, batch_size: usize) {
+        self.joined_batches = BatchCoalescer::new(Arc::clone(schema), batch_size)
+            .with_biggest_coalesce_batch_size(Option::from(batch_size / 2));
+    }
+
+    /// Asserts that internal metadata arrays are consistent with each other
+    /// Only checks if metadata is actually being used (i.e., not all empty)
+    #[inline]
+    fn debug_assert_metadata_aligned(&self) {
+        // Metadata arrays should be aligned IF they're being used
+        // (For non-filtered joins, they may all be empty)
+        if self.filter_mask.len() > 0
+            || self.row_indices.len() > 0
+            || !self.batch_ids.is_empty()
+        {
+            debug_assert_eq!(
+                self.filter_mask.len(),
+                self.row_indices.len(),
+                "filter_mask and row_indices must have same length when metadata is used"
+            );
+            debug_assert_eq!(
+                self.filter_mask.len(),
+                self.batch_ids.len(),
+                "filter_mask and batch_ids must have same length when metadata is used"
+            );
+        }
+    }
+
+    /// Asserts that if batches is empty, metadata is also empty
+    #[inline]
+    fn debug_assert_empty_consistency(&self) {
+        if self.joined_batches.is_empty() {
+            debug_assert_eq!(
+                self.filter_mask.len(),
+                0,
+                "filter_mask should be empty when batches is empty"
+            );
+            debug_assert_eq!(
+                self.row_indices.len(),
+                0,
+                "row_indices should be empty when batches is empty"
+            );
+            debug_assert_eq!(
+                self.batch_ids.len(),
+                0,
+                "batch_ids should be empty when batches is empty"
+            );
+        }
+    }
+
+    /// Pushes a batch with null metadata (Full join null-joined rows only)
+    ///
+    /// These buffered rows had NO matching streamed rows. Since we can't group
+    /// by input row (no input row exists), we use null metadata as a sentinel.
+    ///
+    /// Maintains invariant: N rows → N metadata entries (nulls)
+    fn push_batch_with_null_metadata(&mut self, batch: RecordBatch, join_type: JoinType) {
+        debug_assert!(
+            matches!(join_type, JoinType::Full),
+            "push_batch_with_null_metadata should only be called for Full joins"
+        );
+
+        let num_rows = batch.num_rows();
+
+        self.filter_mask.append_nulls(num_rows);
+        self.row_indices.append_nulls(num_rows);
+        self.batch_ids.resize(
+            self.batch_ids.len() + num_rows,
+            0, // batch_id = 0 for null-joined rows
+        );
+
+        self.debug_assert_metadata_aligned();
+        self.joined_batches
+            .push_batch(batch)
+            .expect("Failed to push batch to BatchCoalescer");
+    }
+
+    /// Pushes a batch with filter metadata (filtered outer/semi/anti/mark joins)
+    ///
+    /// Deferred filtering: An input row may join with multiple buffered rows, but we
+    /// don't know yet if all matches failed the filter. We track metadata so
+    /// `get_corrected_filter_mask()` can later group by input row and decide:
+    /// - If any match passed: emit passing rows
+    /// - If all matches failed: emit null-joined row
+    ///
+    /// Maintains invariant: N rows → N metadata entries
+    fn push_batch_with_filter_metadata(
+        &mut self,
+        batch: RecordBatch,
+        row_indices: &UInt64Array,
+        filter_mask: &BooleanArray,
+        streamed_batch_id: usize,
+        join_type: JoinType,
+    ) {
+        debug_assert!(
+            matches!(
+                join_type,
+                JoinType::Left
+                    | JoinType::LeftSemi
+                    | JoinType::LeftMark
+                    | JoinType::Right
+                    | JoinType::RightSemi
+                    | JoinType::RightMark
+                    | JoinType::LeftAnti
+                    | JoinType::RightAnti
+                    | JoinType::Full
+            ),
+            "push_batch_with_filter_metadata should only be called for outer/semi/anti/mark joins that need deferred filtering"
+        );
+
+        debug_assert_eq!(
+            row_indices.len(),
+            filter_mask.len(),
+            "row_indices and filter_mask must have same length"
+        );
+
+        // For Full joins, we keep the pre_mask (with nulls), for others we keep the cleaned mask
+        self.filter_mask.extend(filter_mask);
+        self.row_indices.extend(row_indices);
+        self.batch_ids
+            .resize(self.batch_ids.len() + row_indices.len(), streamed_batch_id);
+
+        self.debug_assert_metadata_aligned();
+        self.joined_batches
+            .push_batch(batch)
+            .expect("Failed to push batch to BatchCoalescer");
+    }
+
+    /// Pushes a batch without metadata (non-filtered joins)
+    ///
+    /// No deferred filtering needed. Either every join match is output (Inner),
+    /// or null-joined rows are handled separately. No need to track which input
+    /// row produced which output row.
+    fn push_batch_without_metadata(&mut self, batch: RecordBatch, _join_type: JoinType) {
+        self.joined_batches
+            .push_batch(batch)
+            .expect("Failed to push batch to BatchCoalescer");
+    }
+
+    fn clear(&mut self, schema: &SchemaRef, batch_size: usize) {
+        self.joined_batches = BatchCoalescer::new(Arc::clone(schema), batch_size)
+            .with_biggest_coalesce_batch_size(Option::from(batch_size / 2));
         self.batch_ids.clear();
         self.filter_mask = BooleanBuilder::new();
         self.row_indices = UInt64Builder::new();
+        self.debug_assert_empty_consistency();
     }
 }
 impl RecordBatchStream for SortMergeJoinStream {
@@ -385,6 +574,21 @@ fn last_index_for_row(
     batch_ids: &[usize],
     indices_len: usize,
 ) -> bool {
+    debug_assert_eq!(
+        indices.len(),
+        indices_len,
+        "indices.len() should match indices_len parameter"
+    );
+    debug_assert_eq!(
+        batch_ids.len(),
+        indices_len,
+        "batch_ids.len() should match indices_len"
+    );
+    debug_assert!(
+        row_index < indices_len,
+        "row_index {row_index} should be < indices_len {indices_len}",
+    );
+
     row_index == indices_len - 1
         || batch_ids[row_index] != batch_ids[row_index + 1]
         || indices.value(row_index) != indices.value(row_index + 1)
@@ -532,7 +736,7 @@ pub(super) fn get_corrected_filter_mask(
                     // If the left row seen as true its needed to output it once
                     // To do that we mark all other matches for same row as null to avoid the output
                     if seen_true {
-                        #[allow(clippy::needless_range_loop)]
+                        #[expect(clippy::needless_range_loop)]
                         for j in first_row_idx..last_true_idx {
                             mask[j] = None;
                         }
@@ -574,51 +778,12 @@ impl Stream for SortMergeJoinStream {
                         match self.current_ordering {
                             Ordering::Less | Ordering::Equal => {
                                 if !streamed_exhausted {
-                                    if self.filter.is_some()
-                                        && matches!(
-                                            self.join_type,
-                                            JoinType::Left
-                                                | JoinType::LeftSemi
-                                                | JoinType::LeftMark
-                                                | JoinType::Right
-                                                | JoinType::RightSemi
-                                                | JoinType::RightMark
-                                                | JoinType::LeftAnti
-                                                | JoinType::RightAnti
-                                                | JoinType::Full
-                                        )
-                                    {
-                                        self.freeze_all()?;
-
-                                        // If join is filtered and there is joined tuples waiting
-                                        // to be filtered
-                                        if !self
-                                            .staging_output_record_batches
-                                            .batches
-                                            .is_empty()
-                                        {
-                                            // Apply filter on joined tuples and get filtered batch
-                                            let out_filtered_batch =
-                                                self.filter_joined_batch()?;
-
-                                            // Append filtered batch to the output buffer
-                                            self.output = concat_batches(
-                                                &self.schema(),
-                                                [&self.output, &out_filtered_batch],
-                                            )?;
-
-                                            // Send to output if the output buffer surpassed the `batch_size`
-                                            if self.output.num_rows() >= self.batch_size {
-                                                let record_batch = std::mem::replace(
-                                                    &mut self.output,
-                                                    RecordBatch::new_empty(
-                                                        out_filtered_batch.schema(),
-                                                    ),
-                                                );
-                                                return Poll::Ready(Some(Ok(
-                                                    record_batch,
-                                                )));
+                                    if self.needs_deferred_filtering() {
+                                        match self.process_filtered_batches()? {
+                                            Poll::Ready(Some(batch)) => {
+                                                return Poll::Ready(Some(Ok(batch)));
                                             }
+                                            Poll::Ready(None) | Poll::Pending => {}
                                         }
                                     }
 
@@ -668,78 +833,93 @@ impl Stream for SortMergeJoinStream {
                 SortMergeJoinState::JoinOutput => {
                     self.join_partial()?;
 
-                    if self.output_size < self.batch_size {
+                    if self.num_unfrozen_pairs() < self.batch_size {
                         if self.buffered_data.scanning_finished() {
                             self.buffered_data.scanning_reset();
                             self.state = SortMergeJoinState::Init;
                         }
                     } else {
                         self.freeze_all()?;
-                        if !self.staging_output_record_batches.batches.is_empty() {
-                            let record_batch = self.output_record_batch_and_reset()?;
-                            // For non-filtered join output whenever the target output batch size
-                            // is hit. For filtered join its needed to output on later phase
-                            // because target output batch size can be hit in the middle of
-                            // filtering causing the filtering to be incomplete and causing
-                            // correctness issues
-                            if self.filter.is_some()
-                                && matches!(
-                                    self.join_type,
-                                    JoinType::Left
-                                        | JoinType::LeftSemi
-                                        | JoinType::Right
-                                        | JoinType::RightSemi
-                                        | JoinType::LeftAnti
-                                        | JoinType::RightAnti
-                                        | JoinType::LeftMark
-                                        | JoinType::RightMark
-                                        | JoinType::Full
-                                )
-                            {
-                                continue;
-                            }
 
+                        // Verify metadata alignment before checking if we have batches to output
+                        self.joined_record_batches.debug_assert_metadata_aligned();
+
+                        // For filtered joins, skip output and let Init state handle it
+                        if self.needs_deferred_filtering() {
+                            continue;
+                        }
+
+                        // For non-filtered joins, only output if we have a completed batch
+                        // (opportunistic output when target batch size is reached)
+                        if self
+                            .joined_record_batches
+                            .joined_batches
+                            .has_completed_batch()
+                        {
+                            let record_batch = self
+                                .joined_record_batches
+                                .joined_batches
+                                .next_completed_batch()
+                                .expect("has_completed_batch was true");
+                            (&record_batch)
+                                .record_output(&self.join_metrics.baseline_metrics());
                             return Poll::Ready(Some(Ok(record_batch)));
                         }
-                        return Poll::Pending;
+                        // Otherwise keep buffering (don't output yet)
                     }
                 }
                 SortMergeJoinState::Exhausted => {
                     self.freeze_all()?;
 
-                    // if there is still something not processed
-                    if !self.staging_output_record_batches.batches.is_empty() {
-                        if self.filter.is_some()
-                            && matches!(
-                                self.join_type,
-                                JoinType::Left
-                                    | JoinType::LeftSemi
-                                    | JoinType::Right
-                                    | JoinType::RightSemi
-                                    | JoinType::LeftAnti
-                                    | JoinType::RightAnti
-                                    | JoinType::Full
-                                    | JoinType::LeftMark
-                                    | JoinType::RightMark
-                            )
-                        {
-                            let record_batch = self.filter_joined_batch()?;
-                            return Poll::Ready(Some(Ok(record_batch)));
-                        } else {
-                            let record_batch = self.output_record_batch_and_reset()?;
-                            return Poll::Ready(Some(Ok(record_batch)));
-                        }
-                    } else if self.output.num_rows() > 0 {
-                        // if processed but still not outputted because it didn't hit batch size before
-                        let schema = self.output.schema();
-                        let record_batch = std::mem::replace(
-                            &mut self.output,
-                            RecordBatch::new_empty(schema),
-                        );
+                    // Verify metadata alignment before final output
+                    self.joined_record_batches.debug_assert_metadata_aligned();
+
+                    // For filtered joins, must concat and filter ALL data at once
+                    if self.needs_deferred_filtering()
+                        && !self.joined_record_batches.joined_batches.is_empty()
+                    {
+                        let record_batch = self.filter_joined_batch()?;
+                        (&record_batch)
+                            .record_output(&self.join_metrics.baseline_metrics());
                         return Poll::Ready(Some(Ok(record_batch)));
-                    } else {
-                        return Poll::Ready(None);
                     }
+
+                    // For non-filtered joins, finish buffered data first
+                    if !self.joined_record_batches.joined_batches.is_empty() {
+                        self.joined_record_batches
+                            .joined_batches
+                            .finish_buffered_batch()?;
+                    }
+
+                    // Output one completed batch at a time (stay in Exhausted until empty)
+                    if self
+                        .joined_record_batches
+                        .joined_batches
+                        .has_completed_batch()
+                    {
+                        let record_batch = self
+                            .joined_record_batches
+                            .joined_batches
+                            .next_completed_batch()
+                            .expect("has_completed_batch was true");
+                        (&record_batch)
+                            .record_output(&self.join_metrics.baseline_metrics());
+                        return Poll::Ready(Some(Ok(record_batch)));
+                    }
+
+                    // Finally check self.output BatchCoalescer (used by filtered joins)
+                    return if !self.output.is_empty() {
+                        self.output.finish_buffered_batch()?;
+                        let record_batch = self
+                            .output
+                            .next_completed_batch()
+                            .expect("Failed to get last batch");
+                        (&record_batch)
+                            .record_output(&self.join_metrics.baseline_metrics());
+                        Poll::Ready(Some(Ok(record_batch)))
+                    } else {
+                        Poll::Ready(None)
+                    };
                 }
             }
         }
@@ -747,7 +927,7 @@ impl Stream for SortMergeJoinStream {
 }
 
 impl SortMergeJoinStream {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub fn try_new(
         // Configured via `datafusion.execution.spill_compression`.
         spill_compression: SpillCompression,
@@ -792,14 +972,15 @@ impl SortMergeJoinStream {
             on_streamed,
             on_buffered,
             filter,
-            staging_output_record_batches: JoinedRecordBatches {
-                batches: vec![],
+            joined_record_batches: JoinedRecordBatches {
+                joined_batches: BatchCoalescer::new(Arc::clone(&schema), batch_size)
+                    .with_biggest_coalesce_batch_size(Option::from(batch_size / 2)),
                 filter_mask: BooleanBuilder::new(),
                 row_indices: UInt64Builder::new(),
                 batch_ids: vec![],
             },
-            output: RecordBatch::new_empty(schema),
-            output_size: 0,
+            output: BatchCoalescer::new(schema, batch_size)
+                .with_biggest_coalesce_batch_size(Option::from(batch_size / 2)),
             batch_size,
             join_type,
             join_metrics,
@@ -810,6 +991,59 @@ impl SortMergeJoinStream {
         })
     }
 
+    /// Number of unfrozen output pairs (used to decide when to freeze + output)
+    fn num_unfrozen_pairs(&self) -> usize {
+        self.streamed_batch.num_output_rows()
+    }
+
+    /// Returns true if this join needs deferred filtering
+    ///
+    /// Deferred filtering is needed when a filter exists and the join type requires
+    /// ensuring each input row produces at least one output row (or exactly one for semi).
+    fn needs_deferred_filtering(&self) -> bool {
+        self.filter.is_some()
+            && matches!(
+                self.join_type,
+                JoinType::Left
+                    | JoinType::LeftSemi
+                    | JoinType::LeftMark
+                    | JoinType::Right
+                    | JoinType::RightSemi
+                    | JoinType::RightMark
+                    | JoinType::LeftAnti
+                    | JoinType::RightAnti
+                    | JoinType::Full
+            )
+    }
+
+    /// Process accumulated batches for filtered joins
+    ///
+    /// Freezes unfrozen pairs, applies deferred filtering, and outputs if ready.
+    /// Returns Poll::Ready with a batch if one is available, otherwise Poll::Pending.
+    fn process_filtered_batches(&mut self) -> Poll<Option<Result<RecordBatch>>> {
+        self.freeze_all()?;
+
+        self.joined_record_batches.debug_assert_metadata_aligned();
+
+        if !self.joined_record_batches.joined_batches.is_empty() {
+            let out_filtered_batch = self.filter_joined_batch()?;
+            self.output
+                .push_batch(out_filtered_batch)
+                .expect("Failed to push output batch");
+
+            if self.output.has_completed_batch() {
+                let record_batch = self
+                    .output
+                    .next_completed_batch()
+                    .expect("Failed to get output batch");
+                (&record_batch).record_output(&self.join_metrics.baseline_metrics());
+                return Poll::Ready(Some(Ok(record_batch)));
+            }
+        }
+
+        Poll::Pending
+    }
+
     /// Poll next streamed row
     fn poll_streamed_row(&mut self, cx: &mut Context) -> Poll<Option<Result<()>>> {
         loop {
@@ -856,7 +1090,7 @@ impl SortMergeJoinStream {
         }
     }
 
-    fn free_reservation(&mut self, buffered_batch: BufferedBatch) -> Result<()> {
+    fn free_reservation(&mut self, buffered_batch: &BufferedBatch) -> Result<()> {
         // Shrink memory usage for in-memory batches only
         if let BufferedBatchState::InMemory(_) = buffered_batch.batch {
             self.reservation
@@ -914,7 +1148,7 @@ impl SortMergeJoinStream {
                                 self.buffered_data.batches.pop_front()
                             {
                                 self.produce_buffered_not_matched(&mut buffered_batch)?;
-                                self.free_reservation(buffered_batch)?;
+                                self.free_reservation(&buffered_batch)?;
                             }
                         } else {
                             // If the head batch is not fully processed, break the loop.
@@ -1109,14 +1343,18 @@ impl SortMergeJoinStream {
         if join_buffered {
             // joining streamed/nulls and buffered
             while !self.buffered_data.scanning_finished()
-                && self.output_size < self.batch_size
+                && self.num_unfrozen_pairs() < self.batch_size
             {
                 let scanning_idx = self.buffered_data.scanning_idx();
                 if join_streamed {
                     // Join streamed row and buffered row
+                    // Pass batch_size and num_unfrozen_pairs to compute capacity only when
+                    // creating a new chunk (when buffered_batch_idx changes), not on every iteration.
                     self.streamed_batch.append_output_pair(
                         Some(self.buffered_data.scanning_batch_idx),
                         Some(scanning_idx),
+                        self.batch_size,
+                        self.num_unfrozen_pairs(),
                     );
                 } else {
                     // Join nulls and buffered row for FULL join
@@ -1125,7 +1363,6 @@ impl SortMergeJoinStream {
                         .null_joined
                         .push(scanning_idx);
                 }
-                self.output_size += 1;
                 self.buffered_data.scanning_advance();
 
                 if self.buffered_data.scanning_finished() {
@@ -1143,9 +1380,14 @@ impl SortMergeJoinStream {
             // For Mark join we store a dummy id to indicate the row has a match
             let scanning_idx = mark_row_as_match.then_some(0);
 
-            self.streamed_batch
-                .append_output_pair(scanning_batch_idx, scanning_idx);
-            self.output_size += 1;
+            // Pass batch_size=1 and num_unfrozen_pairs=0 to get capacity of 1,
+            // since we only append a single null-joined pair here (not in a loop).
+            self.streamed_batch.append_output_pair(
+                scanning_batch_idx,
+                scanning_idx,
+                1,
+                0,
+            );
             self.buffered_data.scanning_finish();
             self.streamed_joined = true;
         }
@@ -1155,6 +1397,10 @@ impl SortMergeJoinStream {
     fn freeze_all(&mut self) -> Result<()> {
         self.freeze_buffered(self.buffered_data.batches.len())?;
         self.freeze_streamed()?;
+
+        // After freezing, metadata should be aligned
+        self.joined_record_batches.debug_assert_metadata_aligned();
+
         Ok(())
     }
 
@@ -1166,6 +1412,10 @@ impl SortMergeJoinStream {
         self.freeze_streamed()?;
         // Only freeze and produce the first batch in buffered_data as the batch is fully processed
         self.freeze_buffered(1)?;
+
+        // After freezing, metadata should be aligned
+        self.joined_record_batches.debug_assert_metadata_aligned();
+
         Ok(())
     }
 
@@ -1188,21 +1438,8 @@ impl SortMergeJoinStream {
                 &buffered_indices,
                 buffered_batch,
             )? {
-                let num_rows = record_batch.num_rows();
-                self.staging_output_record_batches
-                    .filter_mask
-                    .append_nulls(num_rows);
-                self.staging_output_record_batches
-                    .row_indices
-                    .append_nulls(num_rows);
-                self.staging_output_record_batches.batch_ids.resize(
-                    self.staging_output_record_batches.batch_ids.len() + num_rows,
-                    0,
-                );
-
-                self.staging_output_record_batches
-                    .batches
-                    .push(record_batch);
+                self.joined_record_batches
+                    .push_batch_with_null_metadata(record_batch, self.join_type);
             }
             buffered_batch.null_joined.clear();
         }
@@ -1234,21 +1471,8 @@ impl SortMergeJoinStream {
             &buffered_indices,
             buffered_batch,
         )? {
-            let num_rows = record_batch.num_rows();
-
-            self.staging_output_record_batches
-                .filter_mask
-                .append_nulls(num_rows);
-            self.staging_output_record_batches
-                .row_indices
-                .append_nulls(num_rows);
-            self.staging_output_record_batches.batch_ids.resize(
-                self.staging_output_record_batches.batch_ids.len() + num_rows,
-                0,
-            );
-            self.staging_output_record_batches
-                .batches
-                .push(record_batch);
+            self.joined_record_batches
+                .push_batch_with_null_metadata(record_batch, self.join_type);
         }
         buffered_batch.join_filter_not_matched_map.clear();
 
@@ -1377,7 +1601,9 @@ impl SortMergeJoinStream {
                     };
 
                     // Push the filtered batch which contains rows passing join filter to the output
-                    if matches!(
+                    // For outer/semi/anti/mark joins with deferred filtering, push the unfiltered batch with metadata
+                    // For INNER joins, filter immediately and push without metadata
+                    let needs_deferred_filtering = matches!(
                         self.join_type,
                         JoinType::Left
                             | JoinType::LeftSemi
@@ -1388,32 +1614,29 @@ impl SortMergeJoinStream {
                             | JoinType::LeftMark
                             | JoinType::RightMark
                             | JoinType::Full
-                    ) {
-                        self.staging_output_record_batches
-                            .batches
-                            .push(output_batch);
-                    } else {
-                        let filtered_batch = filter_record_batch(&output_batch, &mask)?;
-                        self.staging_output_record_batches
-                            .batches
-                            .push(filtered_batch);
-                    }
+                    );
 
-                    if !matches!(self.join_type, JoinType::Full) {
-                        self.staging_output_record_batches.filter_mask.extend(&mask);
+                    if needs_deferred_filtering {
+                        // Outer/semi/anti/mark joins: push unfiltered batch with metadata for deferred filtering
+                        let mask_to_use = if !matches!(self.join_type, JoinType::Full) {
+                            &mask
+                        } else {
+                            pre_mask
+                        };
+
+                        self.joined_record_batches.push_batch_with_filter_metadata(
+                            output_batch,
+                            &left_indices,
+                            mask_to_use,
+                            self.streamed_batch_counter.load(Relaxed),
+                            self.join_type,
+                        );
                     } else {
-                        self.staging_output_record_batches
-                            .filter_mask
-                            .extend(pre_mask);
+                        // INNER joins: filter immediately and push without metadata
+                        let filtered_batch = filter_record_batch(&output_batch, &mask)?;
+                        self.joined_record_batches
+                            .push_batch_without_metadata(filtered_batch, self.join_type);
                     }
-                    self.staging_output_record_batches
-                        .row_indices
-                        .extend(&left_indices);
-                    self.staging_output_record_batches.batch_ids.resize(
-                        self.staging_output_record_batches.batch_ids.len()
-                            + left_indices.len(),
-                        self.streamed_batch_counter.load(Relaxed),
-                    );
 
                     // For outer joins, we need to push the null joined rows to the output if
                     // all joined rows are failed on the join filter.
@@ -1442,15 +1665,10 @@ impl SortMergeJoinStream {
                             );
                         }
                     }
-                } else {
-                    self.staging_output_record_batches
-                        .batches
-                        .push(output_batch);
                 }
             } else {
-                self.staging_output_record_batches
-                    .batches
-                    .push(output_batch);
+                self.joined_record_batches
+                    .push_batch_without_metadata(output_batch, self.join_type);
             }
         }
 
@@ -1459,50 +1677,13 @@ impl SortMergeJoinStream {
         Ok(())
     }
 
-    fn output_record_batch_and_reset(&mut self) -> Result<RecordBatch> {
-        let record_batch =
-            concat_batches(&self.schema, &self.staging_output_record_batches.batches)?;
-        self.join_metrics.output_batches().add(1);
-        self.join_metrics
-            .baseline_metrics()
-            .record_output(record_batch.num_rows());
-        // If join filter exists, `self.output_size` is not accurate as we don't know the exact
-        // number of rows in the output record batch. If streamed row joined with buffered rows,
-        // once join filter is applied, the number of output rows may be more than 1.
-        // If `record_batch` is empty, we should reset `self.output_size` to 0. It could be happened
-        // when the join filter is applied and all rows are filtered out.
-        if record_batch.num_rows() == 0 || record_batch.num_rows() > self.output_size {
-            self.output_size = 0;
-        } else {
-            self.output_size -= record_batch.num_rows();
-        }
-
-        if !(self.filter.is_some()
-            && matches!(
-                self.join_type,
-                JoinType::Left
-                    | JoinType::LeftSemi
-                    | JoinType::Right
-                    | JoinType::RightSemi
-                    | JoinType::LeftAnti
-                    | JoinType::RightAnti
-                    | JoinType::LeftMark
-                    | JoinType::RightMark
-                    | JoinType::Full
-            ))
-        {
-            self.staging_output_record_batches.batches.clear();
-        }
-
-        Ok(record_batch)
-    }
-
     fn filter_joined_batch(&mut self) -> Result<RecordBatch> {
-        let record_batch =
-            concat_batches(&self.schema, &self.staging_output_record_batches.batches)?;
-        let mut out_indices = self.staging_output_record_batches.row_indices.finish();
-        let mut out_mask = self.staging_output_record_batches.filter_mask.finish();
-        let mut batch_ids = &self.staging_output_record_batches.batch_ids;
+        // Metadata should be aligned before processing
+        self.joined_record_batches.debug_assert_metadata_aligned();
+
+        let record_batch = self.joined_record_batches.concat_batches(&self.schema)?;
+        let (mut out_indices, mut out_mask, mut batch_ids) =
+            self.joined_record_batches.finish_metadata();
         let default_batch_ids = vec![0; record_batch.num_rows()];
 
         // If only nulls come in and indices sizes doesn't match with expected record batch count
@@ -1516,11 +1697,41 @@ impl SortMergeJoinStream {
             batch_ids = &default_batch_ids;
         }
 
+        // After potential reconstruction, metadata should align with batch row count
+        debug_assert_eq!(
+            out_indices.len(),
+            record_batch.num_rows(),
+            "out_indices length should match record_batch row count"
+        );
+        debug_assert_eq!(
+            out_mask.len(),
+            record_batch.num_rows(),
+            "out_mask length should match record_batch row count (unless empty)"
+        );
+        debug_assert_eq!(
+            batch_ids.len(),
+            record_batch.num_rows(),
+            "batch_ids length should match record_batch row count"
+        );
+
         if out_mask.is_empty() {
-            self.staging_output_record_batches.batches.clear();
+            self.joined_record_batches
+                .clear_batches(&self.schema, self.batch_size);
             return Ok(record_batch);
         }
 
+        // Validate inputs to get_corrected_filter_mask
+        debug_assert_eq!(
+            out_indices.len(),
+            out_mask.len(),
+            "out_indices and out_mask must have same length for get_corrected_filter_mask"
+        );
+        debug_assert_eq!(
+            batch_ids.len(),
+            out_mask.len(),
+            "batch_ids and out_mask must have same length for get_corrected_filter_mask"
+        );
+
         let maybe_corrected_mask = get_corrected_filter_mask(
             self.join_type,
             &out_indices,
@@ -1535,16 +1746,25 @@ impl SortMergeJoinStream {
             &out_mask
         };
 
-        self.filter_record_batch_by_join_type(record_batch, corrected_mask)
+        self.filter_record_batch_by_join_type(&record_batch, corrected_mask)
     }
 
     fn filter_record_batch_by_join_type(
         &mut self,
-        record_batch: RecordBatch,
+        record_batch: &RecordBatch,
         corrected_mask: &BooleanArray,
     ) -> Result<RecordBatch> {
+        // Corrected mask should have length matching or exceeding record_batch rows
+        // (for outer joins it may be longer to include null-joined rows)
+        debug_assert!(
+            corrected_mask.len() >= record_batch.num_rows(),
+            "corrected_mask length ({}) should be >= record_batch rows ({})",
+            corrected_mask.len(),
+            record_batch.num_rows()
+        );
+
         let mut filtered_record_batch =
-            filter_record_batch(&record_batch, corrected_mask)?;
+            filter_record_batch(record_batch, corrected_mask)?;
         let left_columns_length = self.streamed_schema.fields.len();
         let right_columns_length = self.buffered_schema.fields.len();
 
@@ -1553,7 +1773,7 @@ impl SortMergeJoinStream {
             JoinType::Left | JoinType::LeftMark | JoinType::Right | JoinType::RightMark
         ) {
             let null_mask = compute::not(corrected_mask)?;
-            let null_joined_batch = filter_record_batch(&record_batch, &null_mask)?;
+            let null_joined_batch = filter_record_batch(record_batch, &null_mask)?;
 
             let mut right_columns = create_unmatched_columns(
                 self.join_type,
@@ -1561,26 +1781,32 @@ impl SortMergeJoinStream {
                 null_joined_batch.num_rows(),
             );
 
-            let columns = if !matches!(self.join_type, JoinType::Right) {
-                let mut left_columns = null_joined_batch
-                    .columns()
-                    .iter()
-                    .take(right_columns_length)
-                    .cloned()
-                    .collect::<Vec<_>>();
-
-                left_columns.extend(right_columns);
-                left_columns
-            } else {
-                let left_columns = null_joined_batch
-                    .columns()
-                    .iter()
-                    .skip(left_columns_length)
-                    .cloned()
-                    .collect::<Vec<_>>();
-
-                right_columns.extend(left_columns);
-                right_columns
+            let columns = match self.join_type {
+                JoinType::Right => {
+                    // The first columns are the right columns.
+                    let left_columns = null_joined_batch
+                        .columns()
+                        .iter()
+                        .skip(right_columns_length)
+                        .cloned()
+                        .collect::<Vec<_>>();
+
+                    right_columns.extend(left_columns);
+                    right_columns
+                }
+                JoinType::Left | JoinType::LeftMark | JoinType::RightMark => {
+                    // The first columns are the left columns.
+                    let mut left_columns = null_joined_batch
+                        .columns()
+                        .iter()
+                        .take(left_columns_length)
+                        .cloned()
+                        .collect::<Vec<_>>();
+
+                    left_columns.extend(right_columns);
+                    left_columns
+                }
+                _ => exec_err!("Did not expect join type {}", self.join_type)?,
             };
 
             // Push the streamed/buffered batch joined nulls to the output
@@ -1591,21 +1817,23 @@ impl SortMergeJoinStream {
                 &self.schema,
                 &[filtered_record_batch, null_joined_streamed_batch],
             )?;
-        } else if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftAnti) {
+        } else if matches!(
+            self.join_type,
+            JoinType::LeftSemi
+                | JoinType::LeftAnti
+                | JoinType::RightAnti
+                | JoinType::RightSemi
+        ) {
             let output_column_indices = (0..left_columns_length).collect::<Vec<_>>();
             filtered_record_batch =
                 filtered_record_batch.project(&output_column_indices)?;
-        } else if matches!(self.join_type, JoinType::RightAnti | JoinType::RightSemi) {
-            let output_column_indices = (0..right_columns_length).collect::<Vec<_>>();
-            filtered_record_batch =
-                filtered_record_batch.project(&output_column_indices)?;
         } else if matches!(self.join_type, JoinType::Full)
             && corrected_mask.false_count() > 0
         {
             // Find rows which joined by key but Filter predicate evaluated as false
             let joined_filter_not_matched_mask = compute::not(corrected_mask)?;
             let joined_filter_not_matched_batch =
-                filter_record_batch(&record_batch, &joined_filter_not_matched_mask)?;
+                filter_record_batch(record_batch, &joined_filter_not_matched_mask)?;
 
             // Add left unmatched rows adding the right side as nulls
             let right_null_columns = self
@@ -1660,7 +1888,8 @@ impl SortMergeJoinStream {
             )?;
         }
 
-        self.staging_output_record_batches.clear();
+        self.joined_record_batches
+            .clear(&self.schema, self.batch_size);
 
         Ok(filtered_record_batch)
     }
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs
index 83a5c4041cc03..171b6e5d682ad 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs
@@ -27,39 +27,39 @@
 use std::sync::Arc;
 
 use arrow::array::{
-    builder::{BooleanBuilder, UInt64Builder},
     BinaryArray, BooleanArray, Date32Array, Date64Array, FixedSizeBinaryArray,
     Int32Array, RecordBatch, UInt64Array,
+    builder::{BooleanBuilder, UInt64Builder},
 };
-use arrow::compute::{concat_batches, filter_record_batch, SortOptions};
+use arrow::compute::{BatchCoalescer, SortOptions, filter_record_batch};
 use arrow::datatypes::{DataType, Field, Schema};
 
 use datafusion_common::JoinType::*;
 use datafusion_common::{
-    assert_batches_eq, assert_contains, JoinType, NullEquality, Result,
+    JoinSide,
+    test_util::{batches_to_sort_string, batches_to_string},
 };
 use datafusion_common::{
-    test_util::{batches_to_sort_string, batches_to_string},
-    JoinSide,
+    JoinType, NullEquality, Result, assert_batches_eq, assert_contains,
 };
+use datafusion_execution::TaskContext;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
 use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-use datafusion_execution::TaskContext;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::BinaryExpr;
 use insta::{allow_duplicates, assert_snapshot};
 
 use crate::{
     expressions::Column,
-    joins::sort_merge_join::stream::{get_corrected_filter_mask, JoinedRecordBatches},
+    joins::sort_merge_join::stream::{JoinedRecordBatches, get_corrected_filter_mask},
 };
 
-use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn};
 use crate::joins::SortMergeJoinExec;
+use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn};
 use crate::test::TestMemoryExec;
 use crate::test::{build_table_i32, build_table_i32_two_cols};
-use crate::{common, ExecutionPlan};
+use crate::{ExecutionPlan, common};
 
 fn build_table(
     a: (&str, &Vec<i32>),
@@ -365,15 +365,15 @@ async fn join_inner_one() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b1 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 4  | 7  | 10 | 4  | 70 |
-            | 2  | 5  | 8  | 20 | 5  | 80 |
-            | 3  | 5  | 9  | 20 | 5  | 80 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b1 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 4  | 7  | 10 | 4  | 70 |
+    | 2  | 5  | 8  | 20 | 5  | 80 |
+    | 3  | 5  | 9  | 20 | 5  | 80 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -403,15 +403,15 @@ async fn join_inner_two() -> Result<()> {
     let (_columns, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b2 | c1 | a1 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 1  | 7  | 1  | 1  | 70 |
-            | 2  | 2  | 8  | 2  | 2  | 80 |
-            | 2  | 2  | 9  | 2  | 2  | 80 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b2 | c1 | a1 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 1  | 7  | 1  | 1  | 70 |
+    | 2  | 2  | 8  | 2  | 2  | 80 |
+    | 2  | 2  | 9  | 2  | 2  | 80 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -441,16 +441,16 @@ async fn join_inner_two_two() -> Result<()> {
     let (_columns, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b2 | c1 | a1 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 1  | 7  | 1  | 1  | 70 |
-            | 1  | 1  | 7  | 1  | 1  | 80 |
-            | 1  | 1  | 8  | 1  | 1  | 70 |
-            | 1  | 1  | 8  | 1  | 1  | 80 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b2 | c1 | a1 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 1  | 7  | 1  | 1  | 70 |
+    | 1  | 1  | 7  | 1  | 1  | 80 |
+    | 1  | 1  | 8  | 1  | 1  | 70 |
+    | 1  | 1  | 8  | 1  | 1  | 80 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -479,15 +479,15 @@ async fn join_inner_with_nulls() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, Inner).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b2 | c1 | a1 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 1  |    | 1  | 1  | 70 |
-            | 2  | 2  | 8  | 2  | 2  | 80 |
-            | 2  | 2  | 9  | 2  | 2  | 80 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b2 | c1 | a1 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 1  |    | 1  | 1  | 70 |
+    | 2  | 2  | 8  | 2  | 2  | 80 |
+    | 2  | 2  | 9  | 2  | 2  | 80 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -529,16 +529,16 @@ async fn join_inner_with_nulls_with_options() -> Result<()> {
     )
     .await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b2 | c1 | a1 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 2  | 2  | 9  | 2  | 2  | 80 |
-            | 2  | 2  | 8  | 2  | 2  | 80 |
-            | 1  | 1  |    | 1  | 1  | 70 |
-            | 1  |    | 1  | 1  |    | 10 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b2 | c1 | a1 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 2  | 2  | 9  | 2  | 2  | 80 |
+    | 2  | 2  | 8  | 2  | 2  | 80 |
+    | 1  | 1  |    | 1  | 1  | 70 |
+    | 1  |    | 1  | 1  |    | 10 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -570,15 +570,15 @@ async fn join_inner_output_two_batches() -> Result<()> {
     assert_eq!(batches[0].num_rows(), 2);
     assert_eq!(batches[1].num_rows(), 1);
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b2 | c1 | a1 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 1  | 7  | 1  | 1  | 70 |
-            | 2  | 2  | 8  | 2  | 2  | 80 |
-            | 2  | 2  | 9  | 2  | 2  | 80 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b2 | c1 | a1 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 1  | 7  | 1  | 1  | 70 |
+    | 2  | 2  | 8  | 2  | 2  | 80 |
+    | 2  | 2  | 9  | 2  | 2  | 80 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -601,15 +601,15 @@ async fn join_left_one() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, Left).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b1 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 4  | 7  | 10 | 4  | 70 |
-            | 2  | 5  | 8  | 20 | 5  | 80 |
-            | 3  | 7  | 9  |    |    |    |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b1 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 4  | 7  | 10 | 4  | 70 |
+    | 2  | 5  | 8  | 20 | 5  | 80 |
+    | 3  | 7  | 9  |    |    |    |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -632,15 +632,249 @@ async fn join_right_one() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, Right).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b1 | c2 |
-            +----+----+----+----+----+----+
-            | 1  | 4  | 7  | 10 | 4  | 70 |
-            | 2  | 5  | 8  | 20 | 5  | 80 |
-            |    |    |    | 30 | 6  | 90 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b1 | c2 |
+    +----+----+----+----+----+----+
+    | 1  | 4  | 7  | 10 | 4  | 70 |
+    | 2  | 5  | 8  | 20 | 5  | 80 |
+    |    |    |    | 30 | 6  | 90 |
+    +----+----+----+----+----+----+
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+async fn join_right_different_columns_count_with_filter() -> Result<()> {
+    // select *
+    // from t1
+    // right join t2 on t1.b1 = t2.b1 and t1.a1 > t2.a2
+
+    let left = build_table(
+        ("a1", &vec![1, 21, 3]), // 21(t1.a1) > 20(t2.a2)
+        ("b1", &vec![4, 5, 7]),
+        ("c1", &vec![7, 8, 9]),
+    );
+
+    let right = build_table_two_cols(
+        ("a2", &vec![10, 20, 30]),
+        ("b1", &vec![4, 5, 6]), // 6 does not exist on the left
+    );
+
+    let on = vec![(
+        Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+        Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+    )];
+
+    let filter = JoinFilter::new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a1", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("a2", 1)),
+        )),
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ],
+        Arc::new(Schema::new(vec![
+            Field::new("a1", DataType::Int32, true),
+            Field::new("a2", DataType::Int32, true),
+        ])),
+    );
+
+    let (_, batches) = join_collect_with_filter(left, right, on, filter, Right).await?;
+
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b1 |
+    +----+----+----+----+----+
+    |    |    |    | 10 | 4  |
+    | 21 | 5  | 8  | 20 | 5  |
+    |    |    |    | 30 | 6  |
+    +----+----+----+----+----+
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+async fn join_left_different_columns_count_with_filter() -> Result<()> {
+    // select *
+    // from t2
+    // left join t1 on t2.b1 = t1.b1 and t2.a2 > t1.a1
+
+    let left = build_table_two_cols(
+        ("a2", &vec![10, 20, 30]),
+        ("b1", &vec![4, 5, 6]), // 6 does not exist on the right
+    );
+
+    let right = build_table(
+        ("a1", &vec![1, 21, 3]), // 20(t2.a2) > 1(t1.a1)
+        ("b1", &vec![4, 5, 7]),
+        ("c1", &vec![7, 8, 9]),
+    );
+
+    let on = vec![(
+        Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+        Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+    )];
+
+    let filter = JoinFilter::new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a2", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("a1", 1)),
+        )),
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ],
+        Arc::new(Schema::new(vec![
+            Field::new("a2", DataType::Int32, true),
+            Field::new("a1", DataType::Int32, true),
+        ])),
+    );
+
+    let (_, batches) = join_collect_with_filter(left, right, on, filter, Left).await?;
+
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+
+    | a2 | b1 | a1 | b1 | c1 |
+    +----+----+----+----+----+
+    | 10 | 4  | 1  | 4  | 7  |
+    | 20 | 5  |    |    |    |
+    | 30 | 6  |    |    |    |
+    +----+----+----+----+----+
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+async fn join_left_mark_different_columns_count_with_filter() -> Result<()> {
+    // select *
+    // from t2
+    // left mark join t1 on t2.b1 = t1.b1 and t2.a2 > t1.a1
+
+    let left = build_table_two_cols(
+        ("a2", &vec![10, 20, 30]),
+        ("b1", &vec![4, 5, 6]), // 6 does not exist on the right
+    );
+
+    let right = build_table(
+        ("a1", &vec![1, 21, 3]), // 20(t2.a2) > 1(t1.a1)
+        ("b1", &vec![4, 5, 7]),
+        ("c1", &vec![7, 8, 9]),
+    );
+
+    let on = vec![(
+        Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+        Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+    )];
+
+    let filter = JoinFilter::new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a2", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("a1", 1)),
+        )),
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ],
+        Arc::new(Schema::new(vec![
+            Field::new("a2", DataType::Int32, true),
+            Field::new("a1", DataType::Int32, true),
+        ])),
+    );
+
+    let (_, batches) =
+        join_collect_with_filter(left, right, on, filter, LeftMark).await?;
+
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+-------+
+    | a2 | b1 | mark  |
+    +----+----+-------+
+    | 10 | 4  | true  |
+    | 20 | 5  | false |
+    | 30 | 6  | false |
+    +----+----+-------+
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+async fn join_right_mark_different_columns_count_with_filter() -> Result<()> {
+    // select *
+    // from t1
+    // right mark join t2 on t1.b1 = t2.b1 and t1.a1 > t2.a2
+
+    let left = build_table(
+        ("a1", &vec![1, 21, 3]), // 21(t1.a1) > 20(t2.a2)
+        ("b1", &vec![4, 5, 7]),
+        ("c1", &vec![7, 8, 9]),
+    );
+
+    let right = build_table_two_cols(
+        ("a2", &vec![10, 20, 30]),
+        ("b1", &vec![4, 5, 6]), // 6 does not exist on the left
+    );
+
+    let on = vec![(
+        Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+        Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+    )];
+
+    let filter = JoinFilter::new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a1", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("a2", 1)),
+        )),
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ],
+        Arc::new(Schema::new(vec![
+            Field::new("a1", DataType::Int32, true),
+            Field::new("a2", DataType::Int32, true),
+        ])),
+    );
+
+    let (_, batches) =
+        join_collect_with_filter(left, right, on, filter, RightMark).await?;
+
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+-------+
+    | a2 | b1 | mark  |
+    +----+----+-------+
+    | 10 | 4  | false |
+    | 20 | 5  | true  |
+    | 30 | 6  | false |
+    +----+----+-------+
+    ");
     Ok(())
 }
 
@@ -663,16 +897,16 @@ async fn join_full_one() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, Full).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            |    |    |    | 30 | 6  | 90 |
-            | 1  | 4  | 7  | 10 | 4  | 70 |
-            | 2  | 5  | 8  | 20 | 5  | 80 |
-            | 3  | 7  | 9  |    |    |    |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_sort_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    |    |    |    | 30 | 6  | 90 |
+    | 1  | 4  | 7  | 10 | 4  | 70 |
+    | 2  | 5  | 8  | 20 | 5  | 80 |
+    | 3  | 7  | 9  |    |    |    |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -696,14 +930,14 @@ async fn join_left_anti() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, LeftAnti).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c1 |
-            +----+----+----+
-            | 3  | 7  | 9  |
-            | 5  | 7  | 11 |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c1 |
+    +----+----+----+
+    | 3  | 7  | 9  |
+    | 5  | 7  | 11 |
+    +----+----+----+
+    ");
     Ok(())
 }
 
@@ -722,13 +956,13 @@ async fn join_right_anti_one_one() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, RightAnti).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+
-            | a2 | b1 |
-            +----+----+
-            | 30 | 6  |
-            +----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+
+    | a2 | b1 |
+    +----+----+
+    | 30 | 6  |
+    +----+----+
+    ");
 
     let left2 = build_table(
         ("a1", &vec![1, 2, 2]),
@@ -748,13 +982,13 @@ async fn join_right_anti_one_one() -> Result<()> {
 
     let (_, batches2) = join_collect(left2, right2, on, RightAnti).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches2), @r#"
-            +----+----+----+
-            | a2 | b1 | c2 |
-            +----+----+----+
-            | 30 | 6  | 90 |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches2), @r"
+    +----+----+----+
+    | a2 | b1 | c2 |
+    +----+----+----+
+    | 30 | 6  | 90 |
+    +----+----+----+
+    ");
 
     Ok(())
 }
@@ -780,15 +1014,15 @@ async fn join_right_anti_two_two() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, RightAnti).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+
-            | a2 | b1 |
-            +----+----+
-            | 10 | 4  |
-            | 20 | 5  |
-            | 30 | 6  |
-            +----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+
+    | a2 | b1 |
+    +----+----+
+    | 10 | 4  |
+    | 20 | 5  |
+    | 30 | 6  |
+    +----+----+
+    ");
 
     let left = build_table(
         ("a1", &vec![1, 2, 2]),
@@ -865,13 +1099,68 @@ async fn join_right_anti_two_with_filter() -> Result<()> {
     );
     let (_, batches) =
         join_collect_with_filter(left, right, on, filter, RightAnti).await?;
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c2 |
-            +----+----+----+
-            | 1  | 10 | 20 |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c2 |
+    +----+----+----+
+    | 1  | 10 | 20 |
+    +----+----+----+
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+async fn join_right_anti_filtered_with_mismatched_columns() -> Result<()> {
+    let left = build_table_two_cols(("a1", &vec![31, 31]), ("b1", &vec![32, 33]));
+    let right = build_table(
+        ("a2", &vec![31, 31]),
+        ("b2", &vec![32, 35]),
+        ("c2", &vec![108, 109]),
+    );
+    let on = vec![
+        (
+            Arc::new(Column::new_with_schema("a1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("a2", &right.schema())?) as _,
+        ),
+        (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b2", &right.schema())?) as _,
+        ),
+    ];
+
+    let filter = JoinFilter::new(
+        Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("b1", 0)),
+            Operator::LtEq,
+            Arc::new(Column::new("c2", 1)),
+        )),
+        vec![
+            ColumnIndex {
+                index: 1,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 2,
+                side: JoinSide::Right,
+            },
+        ],
+        Arc::new(Schema::new(vec![
+            Field::new("b1", DataType::Int32, false),
+            Field::new("c2", DataType::Int32, false),
+        ])),
+    );
+
+    let (_, batches) =
+        join_collect_with_filter(left, right, on, filter, RightAnti).await?;
+
+    let expected = [
+        "+----+----+-----+",
+        "| a2 | b2 | c2  |",
+        "+----+----+-----+",
+        "| 31 | 35 | 109 |",
+        "+----+----+-----+",
+    ];
+    assert_batches_eq!(expected, &batches);
     Ok(())
 }
 
@@ -900,13 +1189,13 @@ async fn join_right_anti_with_nulls() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, RightAnti).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c2 |
-            +----+----+----+
-            | 2  |    | 8  |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c2 |
+    +----+----+----+
+    | 2  |    | 8  |
+    +----+----+----+
+    ");
     Ok(())
 }
 
@@ -950,15 +1239,15 @@ async fn join_right_anti_with_nulls_with_options() -> Result<()> {
     .await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c2 |
-            +----+----+----+
-            | 3  |    | 9  |
-            | 2  | 5  |    |
-            | 2  | 5  | 8  |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c2 |
+    +----+----+----+
+    | 3  |    | 9  |
+    | 2  | 5  |    |
+    | 2  | 5  | 8  |
+    +----+----+----+
+    ");
     Ok(())
 }
 
@@ -990,15 +1279,15 @@ async fn join_right_anti_output_two_batches() -> Result<()> {
     assert_eq!(batches.len(), 2);
     assert_eq!(batches[0].num_rows(), 2);
     assert_eq!(batches[1].num_rows(), 1);
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c1 |
-            +----+----+----+
-            | 1  | 4  | 7  |
-            | 2  | 5  | 8  |
-            | 2  | 5  | 8  |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c1 |
+    +----+----+----+
+    | 1  | 4  | 7  |
+    | 2  | 5  | 8  |
+    | 2  | 5  | 8  |
+    +----+----+----+
+    ");
     Ok(())
 }
 
@@ -1021,15 +1310,15 @@ async fn join_left_semi() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, LeftSemi).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+
-            | a1 | b1 | c1 |
-            +----+----+----+
-            | 1  | 4  | 7  |
-            | 2  | 5  | 8  |
-            | 2  | 5  | 8  |
-            +----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+
+    | a1 | b1 | c1 |
+    +----+----+----+
+    | 1  | 4  | 7  |
+    | 2  | 5  | 8  |
+    | 2  | 5  | 8  |
+    +----+----+----+
+    ");
     Ok(())
 }
 
@@ -1301,16 +1590,16 @@ async fn join_left_mark() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, LeftMark).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+-------+
-            | a1 | b1 | c1 | mark  |
-            +----+----+----+-------+
-            | 1  | 4  | 7  | true  |
-            | 2  | 5  | 8  | true  |
-            | 2  | 5  | 8  | true  |
-            | 3  | 7  | 9  | false |
-            +----+----+----+-------+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+-------+
+    | a1 | b1 | c1 | mark  |
+    +----+----+----+-------+
+    | 1  | 4  | 7  | true  |
+    | 2  | 5  | 8  | true  |
+    | 2  | 5  | 8  | true  |
+    | 3  | 7  | 9  | false |
+    +----+----+----+-------+
+    ");
     Ok(())
 }
 
@@ -1333,16 +1622,16 @@ async fn join_right_mark() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, RightMark).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+-------+
-            | a2 | b1 | c2 | mark  |
-            +----+----+----+-------+
-            | 10 | 4  | 60 | true  |
-            | 20 | 4  | 70 | true  |
-            | 30 | 5  | 80 | true  |
-            | 40 | 6  | 90 | false |
-            +----+----+----+-------+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+-------+
+    | a2 | b1 | c2 | mark  |
+    +----+----+----+-------+
+    | 10 | 4  | 60 | true  |
+    | 20 | 4  | 70 | true  |
+    | 30 | 5  | 80 | true  |
+    | 40 | 6  | 90 | false |
+    +----+----+----+-------+
+    ");
     Ok(())
 }
 
@@ -1366,14 +1655,14 @@ async fn join_with_duplicated_column_names() -> Result<()> {
 
     let (_, batches) = join_collect(left, right, on, Inner).await?;
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +---+---+---+----+---+----+
-            | a | b | c | a  | b | c  |
-            +---+---+---+----+---+----+
-            | 1 | 4 | 7 | 10 | 1 | 70 |
-            | 2 | 5 | 8 | 20 | 2 | 80 |
-            +---+---+---+----+---+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +---+---+---+----+---+----+
+    | a | b | c | a  | b | c  |
+    +---+---+---+----+---+----+
+    | 1 | 4 | 7 | 10 | 1 | 70 |
+    | 2 | 5 | 8 | 20 | 2 | 80 |
+    +---+---+---+----+---+----+
+    ");
     Ok(())
 }
 
@@ -1398,15 +1687,15 @@ async fn join_date32() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +------------+------------+------------+------------+------------+------------+
-            | a1         | b1         | c1         | a2         | b1         | c2         |
-            +------------+------------+------------+------------+------------+------------+
-            | 1970-01-02 | 2022-04-25 | 1970-01-08 | 1970-01-11 | 2022-04-25 | 1970-03-12 |
-            | 1970-01-03 | 2022-04-26 | 1970-01-09 | 1970-01-21 | 2022-04-26 | 1970-03-22 |
-            | 1970-01-04 | 2022-04-26 | 1970-01-10 | 1970-01-21 | 2022-04-26 | 1970-03-22 |
-            +------------+------------+------------+------------+------------+------------+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +------------+------------+------------+------------+------------+------------+
+    | a1         | b1         | c1         | a2         | b1         | c2         |
+    +------------+------------+------------+------------+------------+------------+
+    | 1970-01-02 | 2022-04-25 | 1970-01-08 | 1970-01-11 | 2022-04-25 | 1970-03-12 |
+    | 1970-01-03 | 2022-04-26 | 1970-01-09 | 1970-01-21 | 2022-04-26 | 1970-03-22 |
+    | 1970-01-04 | 2022-04-26 | 1970-01-10 | 1970-01-21 | 2022-04-26 | 1970-03-22 |
+    +------------+------------+------------+------------+------------+------------+
+    ");
     Ok(())
 }
 
@@ -1431,15 +1720,15 @@ async fn join_date64() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-            | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
-            +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-            | 1970-01-01T00:00:00.001 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.007 | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.070 |
-            | 1970-01-01T00:00:00.002 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
-            | 1970-01-01T00:00:00.003 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.009 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
-            +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | 1970-01-01T00:00:00.001 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.007 | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.070 |
+    | 1970-01-01T00:00:00.002 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+    | 1970-01-01T00:00:00.003 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.009 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    ");
     Ok(())
 }
 
@@ -1478,15 +1767,15 @@ async fn join_binary() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +--------+----+----+--------+-----+----+
-            | a1     | b1 | c1 | a1     | b2  | c2 |
-            +--------+----+----+--------+-----+----+
-            | c0ffee | 5  | 7  | c0ffee | 105 | 70 |
-            | decade | 10 | 8  | decade | 110 | 80 |
-            | facade | 15 | 9  | facade | 115 | 90 |
-            +--------+----+----+--------+-----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +--------+----+----+--------+-----+----+
+    | a1     | b1 | c1 | a1     | b2  | c2 |
+    +--------+----+----+--------+-----+----+
+    | c0ffee | 5  | 7  | c0ffee | 105 | 70 |
+    | decade | 10 | 8  | decade | 110 | 80 |
+    | facade | 15 | 9  | facade | 115 | 90 |
+    +--------+----+----+--------+-----+----+
+    ");
     Ok(())
 }
 
@@ -1525,15 +1814,15 @@ async fn join_fixed_size_binary() -> Result<()> {
     let (_, batches) = join_collect(left, right, on, Inner).await?;
 
     // The output order is important as SMJ preserves sortedness
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +--------+----+----+--------+-----+----+
-            | a1     | b1 | c1 | a1     | b2  | c2 |
-            +--------+----+----+--------+-----+----+
-            | c0ffee | 5  | 7  | c0ffee | 105 | 70 |
-            | decade | 10 | 8  | decade | 110 | 80 |
-            | facade | 15 | 9  | facade | 115 | 90 |
-            +--------+----+----+--------+-----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +--------+----+----+--------+-----+----+
+    | a1     | b1 | c1 | a1     | b2  | c2 |
+    +--------+----+----+--------+-----+----+
+    | c0ffee | 5  | 7  | c0ffee | 105 | 70 |
+    | decade | 10 | 8  | decade | 110 | 80 |
+    | facade | 15 | 9  | facade | 115 | 90 |
+    +--------+----+----+--------+-----+----+
+    ");
     Ok(())
 }
 
@@ -1555,20 +1844,20 @@ async fn join_left_sort_order() -> Result<()> {
     )];
 
     let (_, batches) = join_collect(left, right, on, Left).await?;
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 0  | 3  | 4  |    |    |    |
-            | 1  | 4  | 5  | 10 | 4  | 60 |
-            | 2  | 5  | 6  |    |    |    |
-            | 3  | 6  | 7  | 20 | 6  | 70 |
-            | 3  | 6  | 7  | 30 | 6  | 80 |
-            | 4  | 6  | 8  | 20 | 6  | 70 |
-            | 4  | 6  | 8  | 30 | 6  | 80 |
-            | 5  | 7  | 9  |    |    |    |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 0  | 3  | 4  |    |    |    |
+    | 1  | 4  | 5  | 10 | 4  | 60 |
+    | 2  | 5  | 6  |    |    |    |
+    | 3  | 6  | 7  | 20 | 6  | 70 |
+    | 3  | 6  | 7  | 30 | 6  | 80 |
+    | 4  | 6  | 8  | 20 | 6  | 70 |
+    | 4  | 6  | 8  | 30 | 6  | 80 |
+    | 5  | 7  | 9  |    |    |    |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -1590,16 +1879,16 @@ async fn join_right_sort_order() -> Result<()> {
     )];
 
     let (_, batches) = join_collect(left, right, on, Right).await?;
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            |    |    |    | 0  | 2  | 60 |
-            | 1  | 4  | 7  | 10 | 4  | 70 |
-            | 2  | 5  | 8  | 20 | 5  | 80 |
-            |    |    |    | 30 | 6  | 90 |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    |    |    |    | 0  | 2  | 60 |
+    | 1  | 4  | 7  | 10 | 4  | 70 |
+    | 2  | 5  | 8  | 20 | 5  | 80 |
+    |    |    |    | 30 | 6  | 90 |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -1633,21 +1922,21 @@ async fn join_left_multiple_batches() -> Result<()> {
     )];
 
     let (_, batches) = join_collect(left, right, on, Left).await?;
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            | 0  | 3  | 4  |    |    |    |
-            | 1  | 4  | 5  | 10 | 4  | 60 |
-            | 2  | 5  | 6  |    |    |    |
-            | 3  | 6  | 7  | 20 | 6  | 70 |
-            | 3  | 6  | 7  | 30 | 6  | 80 |
-            | 4  | 6  | 8  | 20 | 6  | 70 |
-            | 4  | 6  | 8  | 30 | 6  | 80 |
-            | 5  | 7  | 9  |    |    |    |
-            | 6  | 9  | 9  |    |    |    |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    | 0  | 3  | 4  |    |    |    |
+    | 1  | 4  | 5  | 10 | 4  | 60 |
+    | 2  | 5  | 6  |    |    |    |
+    | 3  | 6  | 7  | 20 | 6  | 70 |
+    | 3  | 6  | 7  | 30 | 6  | 80 |
+    | 4  | 6  | 8  | 20 | 6  | 70 |
+    | 4  | 6  | 8  | 30 | 6  | 80 |
+    | 5  | 7  | 9  |    |    |    |
+    | 6  | 9  | 9  |    |    |    |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -1681,21 +1970,21 @@ async fn join_right_multiple_batches() -> Result<()> {
     )];
 
     let (_, batches) = join_collect(left, right, on, Right).await?;
-    assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            |    |    |    | 0  | 3  | 4  |
-            | 10 | 4  | 60 | 1  | 4  | 5  |
-            |    |    |    | 2  | 5  | 6  |
-            | 20 | 6  | 70 | 3  | 6  | 7  |
-            | 30 | 6  | 80 | 3  | 6  | 7  |
-            | 20 | 6  | 70 | 4  | 6  | 8  |
-            | 30 | 6  | 80 | 4  | 6  | 8  |
-            |    |    |    | 5  | 7  | 9  |
-            |    |    |    | 6  | 9  | 9  |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    |    |    |    | 0  | 3  | 4  |
+    | 10 | 4  | 60 | 1  | 4  | 5  |
+    |    |    |    | 2  | 5  | 6  |
+    | 20 | 6  | 70 | 3  | 6  | 7  |
+    | 30 | 6  | 80 | 3  | 6  | 7  |
+    | 20 | 6  | 70 | 4  | 6  | 8  |
+    | 30 | 6  | 80 | 4  | 6  | 8  |
+    |    |    |    | 5  | 7  | 9  |
+    |    |    |    | 6  | 9  | 9  |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -1729,23 +2018,23 @@ async fn join_full_multiple_batches() -> Result<()> {
     )];
 
     let (_, batches) = join_collect(left, right, on, Full).await?;
-    assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +----+----+----+----+----+----+
-            | a1 | b1 | c1 | a2 | b2 | c2 |
-            +----+----+----+----+----+----+
-            |    |    |    | 0  | 2  | 50 |
-            |    |    |    | 40 | 8  | 90 |
-            | 0  | 3  | 4  |    |    |    |
-            | 1  | 4  | 5  | 10 | 4  | 60 |
-            | 2  | 5  | 6  |    |    |    |
-            | 3  | 6  | 7  | 20 | 6  | 70 |
-            | 3  | 6  | 7  | 30 | 6  | 80 |
-            | 4  | 6  | 8  | 20 | 6  | 70 |
-            | 4  | 6  | 8  | 30 | 6  | 80 |
-            | 5  | 7  | 9  |    |    |    |
-            | 6  | 9  | 9  |    |    |    |
-            +----+----+----+----+----+----+
-            "#);
+    assert_snapshot!(batches_to_sort_string(&batches), @r"
+    +----+----+----+----+----+----+
+    | a1 | b1 | c1 | a2 | b2 | c2 |
+    +----+----+----+----+----+----+
+    |    |    |    | 0  | 2  | 50 |
+    |    |    |    | 40 | 8  | 90 |
+    | 0  | 3  | 4  |    |    |    |
+    | 1  | 4  | 5  | 10 | 4  | 60 |
+    | 2  | 5  | 6  |    |    |    |
+    | 3  | 6  | 7  | 20 | 6  | 70 |
+    | 3  | 6  | 7  | 30 | 6  | 80 |
+    | 4  | 6  | 8  | 20 | 6  | 70 |
+    | 4  | 6  | 8  | 30 | 6  | 80 |
+    | 5  | 7  | 9  |    |    |    |
+    | 6  | 9  | 9  |    |    |    |
+    +----+----+----+----+----+----+
+    ");
     Ok(())
 }
 
@@ -2085,14 +2374,14 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
     ]));
 
     let mut batches = JoinedRecordBatches {
-        batches: vec![],
+        joined_batches: BatchCoalescer::new(Arc::clone(&schema), 8192),
         filter_mask: BooleanBuilder::new(),
         row_indices: UInt64Builder::new(),
         batch_ids: vec![],
     };
 
     // Insert already prejoined non-filtered rows
-    batches.batches.push(RecordBatch::try_new(
+    batches.joined_batches.push_batch(RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(Int32Array::from(vec![1, 1])),
@@ -2100,9 +2389,9 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
             Arc::new(Int32Array::from(vec![1, 1])),
             Arc::new(Int32Array::from(vec![11, 9])),
         ],
-    )?);
+    )?)?;
 
-    batches.batches.push(RecordBatch::try_new(
+    batches.joined_batches.push_batch(RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(Int32Array::from(vec![1])),
@@ -2110,9 +2399,9 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
             Arc::new(Int32Array::from(vec![1])),
             Arc::new(Int32Array::from(vec![12])),
         ],
-    )?);
+    )?)?;
 
-    batches.batches.push(RecordBatch::try_new(
+    batches.joined_batches.push_batch(RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(Int32Array::from(vec![1, 1])),
@@ -2120,9 +2409,9 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
             Arc::new(Int32Array::from(vec![1, 1])),
             Arc::new(Int32Array::from(vec![11, 13])),
         ],
-    )?);
+    )?)?;
 
-    batches.batches.push(RecordBatch::try_new(
+    batches.joined_batches.push_batch(RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(Int32Array::from(vec![1])),
@@ -2130,9 +2419,9 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
             Arc::new(Int32Array::from(vec![1])),
             Arc::new(Int32Array::from(vec![12])),
         ],
-    )?);
+    )?)?;
 
-    batches.batches.push(RecordBatch::try_new(
+    batches.joined_batches.push_batch(RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(Int32Array::from(vec![1, 1])),
@@ -2140,7 +2429,7 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
             Arc::new(Int32Array::from(vec![1, 1])),
             Arc::new(Int32Array::from(vec![12, 11])),
         ],
-    )?);
+    )?)?;
 
     let streamed_indices = vec![0, 0];
     batches.batch_ids.extend(vec![0; streamed_indices.len()]);
@@ -2190,9 +2479,9 @@ fn build_joined_record_batches() -> Result<JoinedRecordBatches> {
 #[tokio::test]
 async fn test_left_outer_join_filtered_mask() -> Result<()> {
     let mut joined_batches = build_joined_record_batches()?;
-    let schema = joined_batches.batches.first().unwrap().schema();
+    let schema = joined_batches.joined_batches.schema();
 
-    let output = concat_batches(&schema, &joined_batches.batches)?;
+    let output = joined_batches.concat_batches(&schema)?;
     let out_mask = joined_batches.filter_mask.finish();
     let out_indices = joined_batches.row_indices.finish();
 
@@ -2353,15 +2642,15 @@ async fn test_left_outer_join_filtered_mask() -> Result<()> {
 
     let filtered_rb = filter_record_batch(&output, &corrected_mask)?;
 
-    assert_snapshot!(batches_to_string(&[filtered_rb]), @r#"
-                +---+----+---+----+
-                | a | b  | x | y  |
-                +---+----+---+----+
-                | 1 | 10 | 1 | 11 |
-                | 1 | 11 | 1 | 12 |
-                | 1 | 12 | 1 | 13 |
-                +---+----+---+----+
-            "#);
+    assert_snapshot!(batches_to_string(&[filtered_rb]), @r"
+    +---+----+---+----+
+    | a | b  | x | y  |
+    +---+----+---+----+
+    | 1 | 10 | 1 | 11 |
+    | 1 | 11 | 1 | 12 |
+    | 1 | 12 | 1 | 13 |
+    +---+----+---+----+
+    ");
 
     // output null rows
 
@@ -2382,14 +2671,14 @@ async fn test_left_outer_join_filtered_mask() -> Result<()> {
 
     let null_joined_batch = filter_record_batch(&output, &null_mask)?;
 
-    assert_snapshot!(batches_to_string(&[null_joined_batch]), @r#"
-                +---+----+---+----+
-                | a | b  | x | y  |
-                +---+----+---+----+
-                | 1 | 13 | 1 | 12 |
-                | 1 | 14 | 1 | 11 |
-                +---+----+---+----+
-            "#);
+    assert_snapshot!(batches_to_string(&[null_joined_batch]), @r"
+    +---+----+---+----+
+    | a | b  | x | y  |
+    +---+----+---+----+
+    | 1 | 13 | 1 | 12 |
+    | 1 | 14 | 1 | 11 |
+    +---+----+---+----+
+    ");
     Ok(())
 }
 
@@ -2397,9 +2686,9 @@ async fn test_left_outer_join_filtered_mask() -> Result<()> {
 async fn test_semi_join_filtered_mask() -> Result<()> {
     for join_type in [LeftSemi, RightSemi] {
         let mut joined_batches = build_joined_record_batches()?;
-        let schema = joined_batches.batches.first().unwrap().schema();
+        let schema = joined_batches.joined_batches.schema();
 
-        let output = concat_batches(&schema, &joined_batches.batches)?;
+        let output = joined_batches.concat_batches(&schema)?;
         let out_mask = joined_batches.filter_mask.finish();
         let out_indices = joined_batches.row_indices.finish();
 
@@ -2572,9 +2861,9 @@ async fn test_semi_join_filtered_mask() -> Result<()> {
 async fn test_anti_join_filtered_mask() -> Result<()> {
     for join_type in [LeftAnti, RightAnti] {
         let mut joined_batches = build_joined_record_batches()?;
-        let schema = joined_batches.batches.first().unwrap().schema();
+        let schema = joined_batches.joined_batches.schema();
 
-        let output = concat_batches(&schema, &joined_batches.batches)?;
+        let output = joined_batches.concat_batches(&schema)?;
         let out_mask = joined_batches.filter_mask.finish();
         let out_indices = joined_batches.row_indices.finish();
 
@@ -2700,14 +2989,14 @@ async fn test_anti_join_filtered_mask() -> Result<()> {
         let filtered_rb = filter_record_batch(&output, &corrected_mask)?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&[filtered_rb]), @r#"
-                    +---+----+---+----+
-                    | a | b  | x | y  |
-                    +---+----+---+----+
-                    | 1 | 13 | 1 | 12 |
-                    | 1 | 14 | 1 | 11 |
-                    +---+----+---+----+
-            "#);
+            assert_snapshot!(batches_to_string(&[filtered_rb]), @r"
+            +---+----+---+----+
+            | a | b  | x | y  |
+            +---+----+---+----+
+            | 1 | 13 | 1 | 12 |
+            | 1 | 14 | 1 | 11 |
+            +---+----+---+----+
+            ");
         }
 
         // output null rows
@@ -2729,12 +3018,12 @@ async fn test_anti_join_filtered_mask() -> Result<()> {
         let null_joined_batch = filter_record_batch(&output, &null_mask)?;
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&[null_joined_batch]), @r#"
-                        +---+---+---+---+
-                        | a | b | x | y |
-                        +---+---+---+---+
-                        +---+---+---+---+
-                "#);
+            assert_snapshot!(batches_to_string(&[null_joined_batch]), @r"
+            +---+---+---+---+
+            | a | b | x | y |
+            +---+---+---+---+
+            +---+---+---+---+
+            ");
         }
     }
 
diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs
index 80221a77992ce..22cc82a22db5f 100644
--- a/datafusion/physical-plan/src/joins/stream_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs
@@ -23,12 +23,12 @@ use std::mem::size_of;
 use std::sync::Arc;
 
 use crate::joins::join_hash_map::{
-    get_matched_indices, get_matched_indices_with_limit_offset, update_from_iter,
-    JoinHashMapOffset,
+    JoinHashMapOffset, get_matched_indices, get_matched_indices_with_limit_offset,
+    update_from_iter,
 };
 use crate::joins::utils::{JoinFilter, JoinHashMapType};
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder};
-use crate::{metrics, ExecutionPlan};
+use crate::{ExecutionPlan, metrics};
 
 use arrow::array::{
     ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch,
@@ -37,9 +37,7 @@ use arrow::compute::concat_batches;
 use arrow::datatypes::{ArrowNativeType, Schema, SchemaRef};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::utils::memory::estimate_memory_size;
-use datafusion_common::{
-    arrow_datafusion_err, DataFusionError, HashSet, JoinSide, Result, ScalarValue,
-};
+use datafusion_common::{HashSet, JoinSide, Result, ScalarValue, arrow_datafusion_err};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph;
@@ -80,7 +78,9 @@ impl JoinHashMapType for PruningJoinHashMap {
         hash_values: &[u64],
         limit: usize,
         offset: JoinHashMapOffset,
-    ) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>) {
+        input_indices: &mut Vec<u32>,
+        match_indices: &mut Vec<u64>,
+    ) -> Option<JoinHashMapOffset> {
         // Flatten the deque
         let next: Vec<u64> = self.next.iter().copied().collect();
         get_matched_indices_with_limit_offset::<u64>(
@@ -89,12 +89,18 @@ impl JoinHashMapType for PruningJoinHashMap {
             hash_values,
             limit,
             offset,
+            input_indices,
+            match_indices,
         )
     }
 
     fn is_empty(&self) -> bool {
         self.map.is_empty()
     }
+
+    fn len(&self) -> usize {
+        self.map.len()
+    }
 }
 
 /// The `PruningJoinHashMap` is similar to a regular `JoinHashMap`, but with
@@ -682,8 +688,6 @@ pub struct StreamJoinMetrics {
     pub(crate) right: StreamJoinSideMetrics,
     /// Memory used by sides in bytes
     pub(crate) stream_memory_usage: metrics::Gauge,
-    /// Number of batches produced by this operator
-    pub(crate) output_batches: metrics::Count,
     /// Number of rows produced by this operator
     pub(crate) baseline_metrics: BaselineMetrics,
 }
@@ -691,16 +695,18 @@ pub struct StreamJoinMetrics {
 impl StreamJoinMetrics {
     pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self {
         let input_batches =
-            MetricBuilder::new(metrics).counter("input_batches", partition);
-        let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
+            MetricBuilder::new(metrics).counter("left_input_batches", partition);
+        let input_rows =
+            MetricBuilder::new(metrics).counter("left_input_rows", partition);
         let left = StreamJoinSideMetrics {
             input_batches,
             input_rows,
         };
 
         let input_batches =
-            MetricBuilder::new(metrics).counter("input_batches", partition);
-        let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
+            MetricBuilder::new(metrics).counter("right_input_batches", partition);
+        let input_rows =
+            MetricBuilder::new(metrics).counter("right_input_rows", partition);
         let right = StreamJoinSideMetrics {
             input_batches,
             input_rows,
@@ -709,13 +715,9 @@ impl StreamJoinMetrics {
         let stream_memory_usage =
             MetricBuilder::new(metrics).gauge("stream_memory_usage", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
-
         Self {
             left,
             right,
-            output_batches,
             stream_memory_usage,
             baseline_metrics: BaselineMetrics::new(metrics, partition),
         }
@@ -1020,46 +1022,54 @@ pub mod tests {
         let left_schema = Arc::new(left_schema);
         let right_schema = Arc::new(right_schema);
 
-        assert!(build_filter_input_order(
-            JoinSide::Left,
-            &filter,
-            &left_schema,
-            &PhysicalSortExpr {
-                expr: col("la1", left_schema.as_ref())?,
-                options: SortOptions::default(),
-            }
-        )?
-        .is_some());
-        assert!(build_filter_input_order(
-            JoinSide::Left,
-            &filter,
-            &left_schema,
-            &PhysicalSortExpr {
-                expr: col("lt1", left_schema.as_ref())?,
-                options: SortOptions::default(),
-            }
-        )?
-        .is_none());
-        assert!(build_filter_input_order(
-            JoinSide::Right,
-            &filter,
-            &right_schema,
-            &PhysicalSortExpr {
-                expr: col("ra1", right_schema.as_ref())?,
-                options: SortOptions::default(),
-            }
-        )?
-        .is_some());
-        assert!(build_filter_input_order(
-            JoinSide::Right,
-            &filter,
-            &right_schema,
-            &PhysicalSortExpr {
-                expr: col("rb1", right_schema.as_ref())?,
-                options: SortOptions::default(),
-            }
-        )?
-        .is_none());
+        assert!(
+            build_filter_input_order(
+                JoinSide::Left,
+                &filter,
+                &left_schema,
+                &PhysicalSortExpr {
+                    expr: col("la1", left_schema.as_ref())?,
+                    options: SortOptions::default(),
+                }
+            )?
+            .is_some()
+        );
+        assert!(
+            build_filter_input_order(
+                JoinSide::Left,
+                &filter,
+                &left_schema,
+                &PhysicalSortExpr {
+                    expr: col("lt1", left_schema.as_ref())?,
+                    options: SortOptions::default(),
+                }
+            )?
+            .is_none()
+        );
+        assert!(
+            build_filter_input_order(
+                JoinSide::Right,
+                &filter,
+                &right_schema,
+                &PhysicalSortExpr {
+                    expr: col("ra1", right_schema.as_ref())?,
+                    options: SortOptions::default(),
+                }
+            )?
+            .is_some()
+        );
+        assert!(
+            build_filter_input_order(
+                JoinSide::Right,
+                &filter,
+                &right_schema,
+                &PhysicalSortExpr {
+                    expr: col("rb1", right_schema.as_ref())?,
+                    options: SortOptions::default(),
+                }
+            )?
+            .is_none()
+        );
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index be4646e88bd76..1f6bc703a0300 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -35,26 +35,26 @@ use std::vec;
 use crate::common::SharedMemoryReservation;
 use crate::execution_plan::{boundedness_from_children, emission_type_from_children};
 use crate::joins::stream_join_utils::{
+    PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics,
     calculate_filter_expr_intervals, combine_two_batches,
     convert_sort_expr_with_filter_schema, get_pruning_anti_indices,
     get_pruning_semi_indices, prepare_sorted_exprs, record_visited_indices,
-    PruningJoinHashMap, SortedFilterExpr, StreamJoinMetrics,
 };
 use crate::joins::utils::{
-    apply_join_filter_to_indices, build_batch_from_indices, build_join_schema,
-    check_join_is_valid, equal_rows_arr, symmetric_join_output_partitioning, update_hash,
     BatchSplitter, BatchTransformer, ColumnIndex, JoinFilter, JoinHashMapType, JoinOn,
-    JoinOnRef, NoopBatchTransformer, StatefulStreamResult,
+    JoinOnRef, NoopBatchTransformer, StatefulStreamResult, apply_join_filter_to_indices,
+    build_batch_from_indices, build_join_schema, check_join_is_valid, equal_rows_arr,
+    symmetric_join_output_partitioning, update_hash,
 };
 use crate::projection::{
-    join_allows_pushdown, join_table_borders, new_join_children,
-    physical_to_column_exprs, update_join_filter, update_join_on, ProjectionExec,
+    ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children,
+    physical_to_column_exprs, update_join_filter, update_join_on,
 };
 use crate::{
-    joins::StreamJoinPartitionMode,
-    metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
     PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    joins::StreamJoinPartitionMode,
+    metrics::{ExecutionPlanMetricsSet, MetricsSet},
 };
 
 use arrow::array::{
@@ -67,18 +67,20 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::bisect;
 use datafusion_common::{
-    internal_err, plan_err, HashSet, JoinSide, JoinType, NullEquality, Result,
+    HashSet, JoinSide, JoinType, NullEquality, Result, assert_eq_or_internal_err,
+    plan_err,
 };
-use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph;
-use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
+use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements};
 
 use ahash::RandomState;
-use futures::{ready, Stream, StreamExt};
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
+use futures::{Stream, StreamExt, ready};
 use parking_lot::Mutex;
 
 const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4;
@@ -205,7 +207,7 @@ impl SymmetricHashJoinExec {
     /// - It is not possible to join the left and right sides on keys `on`, or
     /// - It fails to construct `SortedFilterExpr`s, or
     /// - It fails to create the [ExprIntervalGraph].
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub fn try_new(
         left: Arc<dyn ExecutionPlan>,
         right: Arc<dyn ExecutionPlan>,
@@ -480,12 +482,12 @@ impl ExecutionPlan for SymmetricHashJoinExec {
     ) -> Result<SendableRecordBatchStream> {
         let left_partitions = self.left.output_partitioning().partition_count();
         let right_partitions = self.right.output_partitioning().partition_count();
-        if left_partitions != right_partitions {
-            return internal_err!(
-                "Invalid SymmetricHashJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
+        assert_eq_or_internal_err!(
+            left_partitions,
+            right_partitions,
+            "Invalid SymmetricHashJoinExec, partition count mismatch {left_partitions}!={right_partitions},\
                  consider using RepartitionExec"
-            );
-        }
+        );
         // If `filter_state` and `filter` are both present, then calculate sorted
         // filter expressions for both sides, and build an expression graph.
         let (left_sorted_filter_expr, right_sorted_filter_expr, graph) = match (
@@ -955,7 +957,7 @@ pub(crate) fn build_side_determined_results(
 ///
 /// A [Result] containing an optional record batch if the join type is not one of `LeftAnti`, `RightAnti`, `LeftSemi` or `RightSemi`.
 /// If the join type is one of the above four, the function will return [None].
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 pub(crate) fn join_with_probe_batch(
     build_hash_joiner: &mut OneSideHashJoiner,
     probe_hash_joiner: &mut OneSideHashJoiner,
@@ -1053,7 +1055,7 @@ pub(crate) fn join_with_probe_batch(
 ///
 /// A [Result] containing a tuple with two equal length arrays, representing indices of rows from build and probe side,
 /// matched by join key columns.
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 fn lookup_join_hashmap(
     build_hashmap: &PruningJoinHashMap,
     build_batch: &RecordBatch,
@@ -1065,14 +1067,8 @@ fn lookup_join_hashmap(
     hashes_buffer: &mut Vec<u64>,
     deleted_offset: Option<usize>,
 ) -> Result<(UInt64Array, UInt32Array)> {
-    let keys_values = probe_on
-        .iter()
-        .map(|c| c.evaluate(probe_batch)?.into_array(probe_batch.num_rows()))
-        .collect::<Result<Vec<_>>>()?;
-    let build_join_values = build_on
-        .iter()
-        .map(|c| c.evaluate(build_batch)?.into_array(build_batch.num_rows()))
-        .collect::<Result<Vec<_>>>()?;
+    let keys_values = evaluate_expressions_to_arrays(probe_on, probe_batch)?;
+    let build_join_values = evaluate_expressions_to_arrays(build_on, build_batch)?;
 
     hashes_buffer.clear();
     hashes_buffer.resize(probe_batch.num_rows(), 0);
@@ -1245,7 +1241,7 @@ impl OneSideHashJoiner {
             filter_intervals.push((expr.node_index(), expr.interval().clone()))
         }
         // Update the physical expression graph using the join filter intervals:
-        graph.update_ranges(&mut filter_intervals, Interval::CERTAINLY_TRUE)?;
+        graph.update_ranges(&mut filter_intervals, Interval::TRUE)?;
         // Extract the new join filter interval for the build side:
         let calculated_build_side_interval = filter_intervals.remove(0).1;
         // If the intervals have not changed, return early without pruning:
@@ -1376,7 +1372,6 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                     }
                 }
                 Some((batch, _)) => {
-                    self.metrics.output_batches.add(1);
                     return self
                         .metrics
                         .baseline_metrics
@@ -1404,7 +1399,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                     return Poll::Ready(Ok(StatefulStreamResult::Continue));
                 }
                 self.set_state(SHJStreamState::PullLeft);
-                Poll::Ready(self.process_batch_from_right(batch))
+                Poll::Ready(self.process_batch_from_right(&batch))
             }
             Some(Err(e)) => Poll::Ready(Err(e)),
             None => {
@@ -1433,7 +1428,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                     return Poll::Ready(Ok(StatefulStreamResult::Continue));
                 }
                 self.set_state(SHJStreamState::PullRight);
-                Poll::Ready(self.process_batch_from_left(batch))
+                Poll::Ready(self.process_batch_from_left(&batch))
             }
             Some(Err(e)) => Poll::Ready(Err(e)),
             None => {
@@ -1462,7 +1457,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                 if batch.num_rows() == 0 {
                     return Poll::Ready(Ok(StatefulStreamResult::Continue));
                 }
-                Poll::Ready(self.process_batch_after_right_end(batch))
+                Poll::Ready(self.process_batch_after_right_end(&batch))
             }
             Some(Err(e)) => Poll::Ready(Err(e)),
             None => {
@@ -1493,7 +1488,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                 if batch.num_rows() == 0 {
                     return Poll::Ready(Ok(StatefulStreamResult::Continue));
                 }
-                Poll::Ready(self.process_batch_after_left_end(batch))
+                Poll::Ready(self.process_batch_after_left_end(&batch))
             }
             Some(Err(e)) => Poll::Ready(Err(e)),
             None => {
@@ -1523,7 +1518,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
 
     fn process_batch_from_right(
         &mut self,
-        batch: RecordBatch,
+        batch: &RecordBatch,
     ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         self.perform_join_for_given_side(batch, JoinSide::Right)
             .map(|maybe_batch| {
@@ -1537,7 +1532,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
 
     fn process_batch_from_left(
         &mut self,
-        batch: RecordBatch,
+        batch: &RecordBatch,
     ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         self.perform_join_for_given_side(batch, JoinSide::Left)
             .map(|maybe_batch| {
@@ -1551,14 +1546,14 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
 
     fn process_batch_after_left_end(
         &mut self,
-        right_batch: RecordBatch,
+        right_batch: &RecordBatch,
     ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         self.process_batch_from_right(right_batch)
     }
 
     fn process_batch_after_right_end(
         &mut self,
-        left_batch: RecordBatch,
+        left_batch: &RecordBatch,
     ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         self.process_batch_from_left(left_batch)
     }
@@ -1637,7 +1632,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
     /// 5. Combines the results and returns a combined batch or `None` if no batch was produced.
     fn perform_join_for_given_side(
         &mut self,
-        probe_batch: RecordBatch,
+        probe_batch: &RecordBatch,
         probe_side: JoinSide,
     ) -> Result<Option<RecordBatch>> {
         let (
@@ -1667,7 +1662,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
         probe_side_metrics.input_batches.add(1);
         probe_side_metrics.input_rows.add(probe_batch.num_rows());
         // Update the internal state of the hash joiner for the build side:
-        probe_hash_joiner.update_internal_state(&probe_batch, &self.random_state)?;
+        probe_hash_joiner.update_internal_state(probe_batch, &self.random_state)?;
         // Join the two sides:
         let equal_result = join_with_probe_batch(
             build_hash_joiner,
@@ -1675,7 +1670,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
             &self.schema,
             self.join_type,
             self.filter.as_ref(),
-            &probe_batch,
+            probe_batch,
             &self.column_indices,
             &self.random_state,
             self.null_equality,
@@ -1696,7 +1691,7 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
             calculate_filter_expr_intervals(
                 &build_hash_joiner.input_buffer,
                 build_side_sorted_filter_expr,
-                &probe_batch,
+                probe_batch,
                 probe_side_sorted_filter_expr,
             )?;
             let prune_length = build_hash_joiner
@@ -1774,7 +1769,7 @@ mod tests {
     use datafusion_common::ScalarValue;
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{binary, col, lit, Column};
+    use datafusion_physical_expr::expressions::{Column, binary, col, lit};
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
     use rstest::*;
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index de288724c446e..27284bf546bc1 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -25,11 +25,11 @@ use crate::joins::{
 };
 use crate::repartition::RepartitionExec;
 use crate::test::TestMemoryExec;
-use crate::{common, ExecutionPlan, ExecutionPlanProperties, Partitioning};
+use crate::{ExecutionPlan, ExecutionPlanProperties, Partitioning, common};
 
 use arrow::array::{
-    types::IntervalDayTime, ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray,
-    RecordBatch, TimestampMillisecondArray,
+    ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, RecordBatch,
+    TimestampMillisecondArray, types::IntervalDayTime,
 };
 use arrow::datatypes::{DataType, Schema};
 use arrow::util::pretty::pretty_format_batches;
@@ -534,9 +534,11 @@ pub fn create_memory_table(
     let right_schema = right_partition[0].schema();
     let right = TestMemoryExec::try_new(&[right_partition], right_schema, None)?
         .try_with_sort_information(right_sorted)?;
+    let left = Arc::new(left);
+    let right = Arc::new(right);
     Ok((
-        Arc::new(TestMemoryExec::update_cache(Arc::new(left))),
-        Arc::new(TestMemoryExec::update_cache(Arc::new(right))),
+        Arc::new(TestMemoryExec::update_cache(&left)),
+        Arc::new(TestMemoryExec::update_cache(&right)),
     ))
 }
 
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 9b589b674cc5b..e0cdda6d5a729 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -17,7 +17,7 @@
 
 //! Join related functionality used both on logical and physical plans
 
-use std::cmp::{min, Ordering};
+use std::cmp::{Ordering, min};
 use std::collections::HashSet;
 use std::fmt::{self, Debug};
 use std::future::Future;
@@ -27,7 +27,9 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use crate::joins::SharedBitmapBuilder;
-use crate::metrics::{self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder};
+use crate::metrics::{
+    self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder, MetricType,
+};
 use crate::projection::{ProjectionExec, ProjectionExpr};
 use crate::{
     ColumnStatistics, ExecutionPlan, ExecutionPlanProperties, Partitioning, Statistics,
@@ -39,20 +41,20 @@ pub use crate::joins::{JoinOn, JoinOnRef};
 
 use ahash::RandomState;
 use arrow::array::{
-    builder::UInt64Builder, downcast_array, new_null_array, Array, ArrowPrimitiveType,
-    BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, RecordBatchOptions,
-    UInt32Array, UInt32Builder, UInt64Array,
+    Array, ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray,
+    RecordBatch, RecordBatchOptions, UInt32Array, UInt32Builder, UInt64Array,
+    builder::UInt64Builder, downcast_array, new_null_array,
 };
 use arrow::array::{
     ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array,
-    Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray,
+    Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int8Array,
+    Int16Array, Int32Array, Int64Array, LargeBinaryArray, LargeStringArray, StringArray,
     StringViewArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt8Array,
+    TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array,
 };
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::kernels::cmp::eq;
-use arrow::compute::{self, and, take, FilterBuilder};
+use arrow::compute::{self, FilterBuilder, and, take};
 use arrow::datatypes::{
     ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type,
 };
@@ -62,21 +64,22 @@ use datafusion_common::cast::as_boolean_array;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    not_impl_err, plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result,
-    SharedResult,
+    DataFusionError, JoinSide, JoinType, NullEquality, Result, SharedResult,
+    not_impl_err, plan_err,
 };
-use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::Operator;
+use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{
-    add_offset_to_expr, add_offset_to_physical_sort_exprs, LexOrdering, PhysicalExpr,
-    PhysicalExprRef,
+    LexOrdering, PhysicalExpr, PhysicalExprRef, add_offset_to_expr,
+    add_offset_to_physical_sort_exprs,
 };
 
 use datafusion_physical_expr_common::datum::compare_op_for_nested;
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use futures::future::{BoxFuture, Shared};
-use futures::{ready, FutureExt};
+use futures::{FutureExt, ready};
 use parking_lot::Mutex;
 
 /// Checks whether the schemas "left" and "right" and columns "on" represent a valid join.
@@ -156,20 +159,21 @@ pub fn calculate_join_output_ordering(
     match maintains_input_order {
         [true, false] => {
             // Special case, we can prefix ordering of right side with the ordering of left side.
-            if join_type == JoinType::Inner && probe_side == Some(JoinSide::Left) {
-                if let Some(right_ordering) = right_ordering.cloned() {
-                    let right_offset = add_offset_to_physical_sort_exprs(
-                        right_ordering,
-                        left_columns_len as _,
-                    )?;
-                    return if let Some(left_ordering) = left_ordering {
-                        let mut result = left_ordering.clone();
-                        result.extend(right_offset);
-                        Ok(Some(result))
-                    } else {
-                        Ok(LexOrdering::new(right_offset))
-                    };
-                }
+            if join_type == JoinType::Inner
+                && probe_side == Some(JoinSide::Left)
+                && let Some(right_ordering) = right_ordering.cloned()
+            {
+                let right_offset = add_offset_to_physical_sort_exprs(
+                    right_ordering,
+                    left_columns_len as _,
+                )?;
+                return if let Some(left_ordering) = left_ordering {
+                    let mut result = left_ordering.clone();
+                    result.extend(right_offset);
+                    Ok(Some(result))
+                } else {
+                    Ok(LexOrdering::new(right_offset))
+                };
             }
             Ok(left_ordering.cloned())
         }
@@ -411,11 +415,11 @@ struct PartialJoinStatistics {
 pub(crate) fn estimate_join_statistics(
     left_stats: Statistics,
     right_stats: Statistics,
-    on: JoinOn,
+    on: &JoinOn,
     join_type: &JoinType,
     schema: &Schema,
 ) -> Result<Statistics> {
-    let join_stats = estimate_join_cardinality(join_type, left_stats, right_stats, &on);
+    let join_stats = estimate_join_cardinality(join_type, left_stats, right_stats, on);
     let (num_rows, column_statistics) = match join_stats {
         Some(stats) => (Precision::Inexact(stats.num_rows), stats.column_statistics),
         None => (Precision::Absent, Statistics::unknown_column(schema)),
@@ -564,16 +568,26 @@ fn estimate_inner_join_cardinality(
         return Some(estimation);
     };
 
+    let Statistics {
+        num_rows: left_num_rows,
+        column_statistics: left_column_statistics,
+        ..
+    } = left_stats;
+    let Statistics {
+        num_rows: right_num_rows,
+        column_statistics: right_column_statistics,
+        ..
+    } = right_stats;
+
     // The algorithm here is partly based on the non-histogram selectivity estimation
     // from Spark's Catalyst optimizer.
     let mut join_selectivity = Precision::Absent;
-    for (left_stat, right_stat) in left_stats
-        .column_statistics
+    for (left_stat, right_stat) in left_column_statistics
         .iter()
-        .zip(right_stats.column_statistics.iter())
+        .zip(right_column_statistics.iter())
     {
-        let left_max_distinct = max_distinct_count(&left_stats.num_rows, left_stat);
-        let right_max_distinct = max_distinct_count(&right_stats.num_rows, right_stat);
+        let left_max_distinct = max_distinct_count(&left_num_rows, left_stat);
+        let right_max_distinct = max_distinct_count(&right_num_rows, right_stat);
         let max_distinct = left_max_distinct.max(&right_max_distinct);
         if max_distinct.get_value().is_some() {
             // Seems like there are a few implementations of this algorithm that implement
@@ -691,27 +705,25 @@ fn max_distinct_count(
             // Cap the estimate using the number of possible values:
             if let (Some(min), Some(max)) =
                 (stats.min_value.get_value(), stats.max_value.get_value())
-            {
-                if let Some(range_dc) = Interval::try_new(min.clone(), max.clone())
+                && let Some(range_dc) = Interval::try_new(min.clone(), max.clone())
                     .ok()
                     .and_then(|e| e.cardinality())
+            {
+                let range_dc = range_dc as usize;
+                // Note that the `unwrap` calls in the below statement are safe.
+                return if matches!(result, Precision::Absent)
+                    || &range_dc < result.get_value().unwrap()
                 {
-                    let range_dc = range_dc as usize;
-                    // Note that the `unwrap` calls in the below statement are safe.
-                    return if matches!(result, Precision::Absent)
-                        || &range_dc < result.get_value().unwrap()
+                    if stats.min_value.is_exact().unwrap()
+                        && stats.max_value.is_exact().unwrap()
                     {
-                        if stats.min_value.is_exact().unwrap()
-                            && stats.max_value.is_exact().unwrap()
-                        {
-                            Precision::Exact(range_dc)
-                        } else {
-                            Precision::Inexact(range_dc)
-                        }
+                        Precision::Exact(range_dc)
                     } else {
-                        result
-                    };
-                }
+                        Precision::Inexact(range_dc)
+                    }
+                } else {
+                    result
+                };
             }
 
             result
@@ -1128,8 +1140,8 @@ pub(crate) fn append_right_indices(
 ) -> Result<(UInt64Array, UInt32Array)> {
     if preserve_order_for_right {
         Ok(append_probe_indices_in_order(
-            left_indices,
-            right_indices,
+            &left_indices,
+            &right_indices,
             adjust_range,
         ))
     } else {
@@ -1273,8 +1285,8 @@ fn build_range_bitmap<T: ArrowPrimitiveType>(
 /// - A `PrimitiveArray` of `UInt64Type` with the newly constructed build indices.
 /// - A `PrimitiveArray` of `UInt32Type` with the newly constructed probe indices.
 fn append_probe_indices_in_order(
-    build_indices: PrimitiveArray<UInt64Type>,
-    probe_indices: PrimitiveArray<UInt32Type>,
+    build_indices: &PrimitiveArray<UInt64Type>,
+    probe_indices: &PrimitiveArray<UInt32Type>,
     range: Range<usize>,
 ) -> (PrimitiveArray<UInt64Type>, PrimitiveArray<UInt32Type>) {
     // Builders for new indices:
@@ -1327,8 +1339,10 @@ pub(crate) struct BuildProbeJoinMetrics {
     pub(crate) input_batches: metrics::Count,
     /// Number of rows consumed by probe-side this operator
     pub(crate) input_rows: metrics::Count,
-    /// Number of batches produced by this operator
-    pub(crate) output_batches: metrics::Count,
+    /// Fraction of probe rows that found more than one match
+    pub(crate) probe_hit_rate: metrics::RatioMetrics,
+    /// Average number of build matches per matched probe row
+    pub(crate) avg_fanout: metrics::RatioMetrics,
 }
 
 // This Drop implementation updates the elapsed compute part of the metrics.
@@ -1372,8 +1386,13 @@ impl BuildProbeJoinMetrics {
 
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
+        let probe_hit_rate = MetricBuilder::new(metrics)
+            .with_type(MetricType::SUMMARY)
+            .ratio_metrics("probe_hit_rate", partition);
+
+        let avg_fanout = MetricBuilder::new(metrics)
+            .with_type(MetricType::SUMMARY)
+            .ratio_metrics("avg_fanout", partition);
 
         Self {
             build_time,
@@ -1383,8 +1402,9 @@ impl BuildProbeJoinMetrics {
             join_time,
             input_batches,
             input_rows,
-            output_batches,
             baseline,
+            probe_hit_rate,
+            avg_fanout,
         }
     }
 }
@@ -1662,22 +1682,19 @@ pub fn swap_join_projection(
 /// `fifo_hashmap` sets the order of iteration over `batch` rows while updating hashmap,
 /// which allows to keep either first (if set to true) or last (if set to false) row index
 /// as a chain head for rows with equal hash values.
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 pub fn update_hash(
     on: &[PhysicalExprRef],
     batch: &RecordBatch,
     hash_map: &mut dyn JoinHashMapType,
     offset: usize,
     random_state: &RandomState,
-    hashes_buffer: &mut Vec<u64>,
+    hashes_buffer: &mut [u64],
     deleted_offset: usize,
     fifo_hashmap: bool,
 ) -> Result<()> {
     // evaluate the keys
-    let keys_values = on
-        .iter()
-        .map(|c| c.evaluate(batch)?.into_array(batch.num_rows()))
-        .collect::<Result<Vec<_>>>()?;
+    let keys_values = evaluate_expressions_to_arrays(on, batch)?;
 
     // calculate the hash values
     let hash_values = create_hashes(&keys_values, random_state, hashes_buffer)?;
@@ -1866,7 +1883,7 @@ mod tests {
     use arrow::datatypes::{DataType, Fields};
     use arrow::error::{ArrowError, Result as ArrowResult};
     use datafusion_common::stats::Precision::{Absent, Exact, Inexact};
-    use datafusion_common::{arrow_datafusion_err, arrow_err, ScalarValue};
+    use datafusion_common::{ScalarValue, arrow_datafusion_err, arrow_err};
     use datafusion_physical_expr::PhysicalSortExpr;
 
     use rstest::rstest;
@@ -2066,6 +2083,7 @@ mod tests {
             max_value: max.map(ScalarValue::from),
             sum_value: Absent,
             null_count,
+            byte_size: Absent,
         }
     }
 
@@ -2650,18 +2668,21 @@ mod tests {
             &JoinType::LeftSemi,
             Statistics {
                 num_rows: Inexact(500),
-                total_byte_size: Absent,
+                    total_byte_size: Absent,
                 column_statistics: dummy_column_stats.clone(),
             },
             Statistics {
                 num_rows: Absent,
-                total_byte_size: Absent,
+                    total_byte_size: Absent,
                 column_statistics: dummy_column_stats.clone(),
             },
             &join_on,
         ).expect("Expected non-empty PartialJoinStatistics for SemiJoin with absent inner num_rows");
 
-        assert_eq!(absent_inner_estimation.num_rows, 500, "Expected outer.num_rows estimated SemiJoin cardinality for absent inner num_rows");
+        assert_eq!(
+            absent_inner_estimation.num_rows, 500,
+            "Expected outer.num_rows estimated SemiJoin cardinality for absent inner num_rows"
+        );
 
         let absent_inner_estimation = estimate_join_cardinality(
             &JoinType::LeftSemi,
@@ -2677,7 +2698,10 @@ mod tests {
             },
             &join_on,
         );
-        assert!(absent_inner_estimation.is_none(), "Expected \"None\" estimated SemiJoin cardinality for absent outer and inner num_rows");
+        assert!(
+            absent_inner_estimation.is_none(),
+            "Expected \"None\" estimated SemiJoin cardinality for absent outer and inner num_rows"
+        );
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 17628fd8ad1d2..ec8e154caec91 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+// https://github.com/apache/datafusion/issues/18881
+#![deny(clippy::allow_attributes)]
 
 //! Traits for physical query plan, supporting parallel execution for partitioned relations.
 //!
@@ -30,26 +33,27 @@
 
 pub use datafusion_common::hash_utils;
 pub use datafusion_common::utils::project_schema;
-pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
+pub use datafusion_common::{ColumnStatistics, Statistics, internal_err};
 pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 pub use datafusion_expr::{Accumulator, ColumnarValue};
-pub use datafusion_physical_expr::window::WindowExpr;
 use datafusion_physical_expr::PhysicalSortExpr;
+pub use datafusion_physical_expr::window::WindowExpr;
 pub use datafusion_physical_expr::{
-    expressions, Distribution, Partitioning, PhysicalExpr,
+    Distribution, Partitioning, PhysicalExpr, expressions,
 };
 
 pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
 pub use crate::execution_plan::{
-    collect, collect_partitioned, displayable, execute_input_stream, execute_stream,
-    execute_stream_partitioned, get_plan_string, with_new_children_if_necessary,
-    ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+    ExecutionPlan, ExecutionPlanProperties, PlanProperties, collect, collect_partitioned,
+    displayable, execute_input_stream, execute_stream, execute_stream_partitioned,
+    get_plan_string, with_new_children_if_necessary,
 };
 pub use crate::metrics::Metric;
 pub use crate::ordering::InputOrderMode;
+pub use crate::sort_pushdown::SortOrderPushdownResult;
 pub use crate::stream::EmptyRecordBatchStream;
 pub use crate::topk::TopK;
-pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
+pub use crate::visitor::{ExecutionPlanVisitor, accept, visit_execution_plan};
 pub use crate::work_table::WorkTable;
 pub use spill::spill_manager::SpillManager;
 
@@ -80,6 +84,7 @@ pub mod placeholder_row;
 pub mod projection;
 pub mod recursive_query;
 pub mod repartition;
+pub mod sort_pushdown;
 pub mod sorts;
 pub mod spill;
 pub mod stream;
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 6a0cae20e5aa6..05d6882821477 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -32,7 +32,7 @@ use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning};
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err, internal_err};
 use datafusion_execution::TaskContext;
 
 use futures::stream::{Stream, StreamExt};
@@ -167,14 +167,18 @@ impl ExecutionPlan for GlobalLimitExec {
     ) -> Result<SendableRecordBatchStream> {
         trace!("Start GlobalLimitExec::execute for partition: {partition}");
         // GlobalLimitExec has a single output partition
-        if 0 != partition {
-            return internal_err!("GlobalLimitExec invalid partition {partition}");
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "GlobalLimitExec invalid partition {partition}"
+        );
 
         // GlobalLimitExec requires a single input partition
-        if 1 != self.input.output_partitioning().partition_count() {
-            return internal_err!("GlobalLimitExec requires a single input partition");
-        }
+        assert_eq_or_internal_err!(
+            self.input.output_partitioning().partition_count(),
+            1,
+            "GlobalLimitExec requires a single input partition"
+        );
 
         let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
         let stream = self.input.execute(0, context)?;
@@ -316,7 +320,12 @@ impl ExecutionPlan for LocalLimitExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start LocalLimitExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start LocalLimitExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
         let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
         let stream = self.input.execute(partition, context)?;
         Ok(Box::pin(LimitStream::new(
@@ -490,8 +499,8 @@ mod tests {
     use arrow::array::RecordBatchOptions;
     use arrow::datatypes::Schema;
     use datafusion_common::stats::Precision;
-    use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalExpr;
+    use datafusion_physical_expr::expressions::col;
 
     #[tokio::test]
     async fn limit() -> Result<()> {
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 09710ae1e2edb..4a406ca648d57 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -32,9 +32,9 @@ use crate::{
 
 use arrow::array::RecordBatch;
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, Result};
-use datafusion_execution::memory_pool::MemoryReservation;
+use datafusion_common::{Result, assert_eq_or_internal_err, assert_or_internal_err};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryReservation;
 use datafusion_physical_expr::EquivalenceProperties;
 
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
@@ -144,6 +144,9 @@ pub trait LazyBatchGenerator: Send + Sync + fmt::Debug + fmt::Display {
 
     /// Generate the next batch, return `None` when no more batches are available
     fn generate_next_batch(&mut self) -> Result<Option<RecordBatch>>;
+
+    /// Returns a new instance with the state reset.
+    fn reset_state(&self) -> Arc<RwLock<dyn LazyBatchGenerator>>;
 }
 
 /// Execution plan for lazy in-memory batches of data
@@ -224,16 +227,17 @@ impl LazyMemoryExec {
     }
 
     pub fn try_set_partitioning(&mut self, partitioning: Partitioning) -> Result<()> {
-        if partitioning.partition_count() != self.batch_generators.len() {
-            internal_err!(
-                "Partition count must match generator count: {} != {}",
-                partitioning.partition_count(),
-                self.batch_generators.len()
-            )
-        } else {
-            self.cache.partitioning = partitioning;
-            Ok(())
-        }
+        let partition_count = partitioning.partition_count();
+        let generator_count = self.batch_generators.len();
+        assert_eq_or_internal_err!(
+            partition_count,
+            generator_count,
+            "Partition count must match generator count: {} != {}",
+            partition_count,
+            generator_count
+        );
+        self.cache.partitioning = partitioning;
+        Ok(())
     }
 
     pub fn add_ordering(&mut self, ordering: impl IntoIterator<Item = PhysicalSortExpr>) {
@@ -314,11 +318,11 @@ impl ExecutionPlan for LazyMemoryExec {
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(self)
-        } else {
-            internal_err!("Children cannot be replaced in LazyMemoryExec")
-        }
+        assert_or_internal_err!(
+            children.is_empty(),
+            "Children cannot be replaced in LazyMemoryExec"
+        );
+        Ok(self)
     }
 
     fn execute(
@@ -326,13 +330,12 @@ impl ExecutionPlan for LazyMemoryExec {
         partition: usize,
         _context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        if partition >= self.batch_generators.len() {
-            return internal_err!(
-                "Invalid partition {} for LazyMemoryExec with {} partitions",
-                partition,
-                self.batch_generators.len()
-            );
-        }
+        assert_or_internal_err!(
+            partition < self.batch_generators.len(),
+            "Invalid partition {} for LazyMemoryExec with {} partitions",
+            partition,
+            self.batch_generators.len()
+        );
 
         let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
 
@@ -352,6 +355,21 @@ impl ExecutionPlan for LazyMemoryExec {
     fn statistics(&self) -> Result<Statistics> {
         Ok(Statistics::new_unknown(&self.schema))
     }
+
+    fn reset_state(self: Arc<Self>) -> Result<Arc<dyn ExecutionPlan>> {
+        let generators = self
+            .generators()
+            .iter()
+            .map(|g| g.read().reset_state())
+            .collect::<Vec<_>>();
+        Ok(Arc::new(LazyMemoryExec {
+            schema: Arc::clone(&self.schema),
+            batch_generators: generators,
+            cache: self.cache.clone(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            projection: self.projection.clone(),
+        }))
+    }
 }
 
 /// Stream that generates record batches on demand
@@ -450,6 +468,15 @@ mod lazy_memory_tests {
                 vec![Arc::new(array)],
             )?))
         }
+
+        fn reset_state(&self) -> Arc<RwLock<dyn LazyBatchGenerator>> {
+            Arc::new(RwLock::new(TestGenerator {
+                counter: 0,
+                max_batches: self.max_batches,
+                batch_size: self.batch_size,
+                schema: Arc::clone(&self.schema),
+            }))
+        }
     }
 
     #[tokio::test]
@@ -568,4 +595,31 @@ mod lazy_memory_tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_lazy_memory_exec_reset_state() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, false)]));
+        let generator = TestGenerator {
+            counter: 0,
+            max_batches: 3,
+            batch_size: 2,
+            schema: Arc::clone(&schema),
+        };
+
+        let exec = Arc::new(LazyMemoryExec::try_new(
+            schema,
+            vec![Arc::new(RwLock::new(generator))],
+        )?);
+        let stream = exec.execute(0, Arc::new(TaskContext::default()))?;
+        let batches = collect(stream).await?;
+
+        let exec_reset = exec.reset_state()?;
+        let stream = exec_reset.execute(0, Arc::new(TaskContext::default()))?;
+        let batches_reset = collect(stream).await?;
+
+        // if the reset_state is not correct, the batches_reset will be empty
+        assert_eq!(batches, batches_reset);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/metrics.rs b/datafusion/physical-plan/src/metrics.rs
new file mode 100644
index 0000000000000..fe17cbdd4a2c2
--- /dev/null
+++ b/datafusion/physical-plan/src/metrics.rs
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Metrics live in `datafusion-physical-expr-common`; this module re-exports
+//! them to keep the public APIs stable.
+
+pub use datafusion_physical_expr_common::metrics::*;
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index e7df79f867d70..4d00b73cff39c 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -24,13 +24,13 @@ use crate::coop::cooperative;
 use crate::execution_plan::{Boundedness, EmissionType, SchedulingType};
 use crate::memory::MemoryStream;
 use crate::{
-    common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
-    SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    SendableRecordBatchStream, Statistics, common,
 };
 
 use arrow::array::{ArrayRef, NullArray, RecordBatch, RecordBatchOptions};
 use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
@@ -152,15 +152,18 @@ impl ExecutionPlan for PlaceholderRowExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start PlaceholderRowExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
-
-        if partition >= self.partitions {
-            return internal_err!(
-                "PlaceholderRowExec invalid partition {} (expected less than {})",
-                partition,
-                self.partitions
-            );
-        }
+        trace!(
+            "Start PlaceholderRowExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
+
+        assert_or_internal_err!(
+            partition < self.partitions,
+            "PlaceholderRowExec invalid partition {partition} (expected less than {})",
+            self.partitions
+        );
 
         let ms = MemoryStream::try_new(self.data()?, Arc::clone(&self.schema), None)?;
         Ok(Box::pin(cooperative(ms)))
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index ead2196860cde..e8608f17a1b20 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -24,7 +24,7 @@ use super::expressions::{Column, Literal};
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{
     DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    SendableRecordBatchStream, SortOrderPushdownResult, Statistics,
 };
 use crate::execution_plan::CardinalityEffect;
 use crate::filter_pushdown::{
@@ -40,21 +40,24 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use arrow::datatypes::SchemaRef;
-use arrow::record_batch::{RecordBatch, RecordBatchOptions};
+use arrow::record_batch::RecordBatch;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::{internal_err, JoinSide, Result};
+use datafusion_common::{JoinSide, Result, internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
+use datafusion_physical_expr::projection::Projector;
 use datafusion_physical_expr::utils::collect_columns;
-use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
-use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
+use datafusion_physical_expr_common::physical_expr::{PhysicalExprRef, fmt_sql};
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrdering, LexRequirement, PhysicalSortExpr,
+};
 // Re-exported from datafusion-physical-expr for backwards compatibility
 // We recommend updating your imports to use datafusion-physical-expr directly
 pub use datafusion_physical_expr::projection::{
-    update_expr, ProjectionExpr, ProjectionExprs,
+    ProjectionExpr, ProjectionExprs, update_expr,
 };
 
 use futures::stream::{Stream, StreamExt};
@@ -66,10 +69,9 @@ use log::trace;
 /// output row for each input row.
 #[derive(Debug, Clone)]
 pub struct ProjectionExec {
-    /// The projection expressions stored as tuples of (expression, output column name)
-    projection: ProjectionExprs,
-    /// The schema once the projection has been applied to the input
-    schema: SchemaRef,
+    /// A projector specialized to apply the projection to the input schema from the child node
+    /// and produce [`RecordBatch`]es with the output schema of this node.
+    projector: Projector,
     /// The input plan
     input: Arc<dyn ExecutionPlan>,
     /// Execution metrics
@@ -137,16 +139,17 @@ impl ProjectionExec {
         // convert argument to Vec<ProjectionExpr>
         let expr_vec = expr.into_iter().map(Into::into).collect::<Vec<_>>();
         let projection = ProjectionExprs::new(expr_vec);
-
-        let schema = Arc::new(projection.project_schema(&input_schema)?);
+        let projector = projection.make_projector(&input_schema)?;
 
         // Construct a map from the input expressions to the output expression of the Projection
         let projection_mapping = projection.projection_mapping(&input_schema)?;
-        let cache =
-            Self::compute_properties(&input, &projection_mapping, Arc::clone(&schema))?;
+        let cache = Self::compute_properties(
+            &input,
+            &projection_mapping,
+            Arc::clone(projector.output_schema()),
+        )?;
         Ok(Self {
-            projection,
-            schema,
+            projector,
             input,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
@@ -155,7 +158,12 @@ impl ProjectionExec {
 
     /// The projection expressions stored as tuples of (expression, output column name)
     pub fn expr(&self) -> &[ProjectionExpr] {
-        self.projection.as_ref()
+        self.projector.projection().as_ref()
+    }
+
+    /// The projection expressions as a [`ProjectionExprs`].
+    pub fn projection_expr(&self) -> &ProjectionExprs {
+        self.projector.projection()
     }
 
     /// The input plan
@@ -195,7 +203,8 @@ impl DisplayAs for ProjectionExec {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 let expr: Vec<String> = self
-                    .projection
+                    .projector
+                    .projection()
                     .as_ref()
                     .iter()
                     .map(|proj_expr| {
@@ -246,10 +255,15 @@ impl ExecutionPlan for ProjectionExec {
     }
 
     fn benefits_from_input_partitioning(&self) -> Vec<bool> {
-        let all_simple_exprs = self.projection.iter().all(|proj_expr| {
-            proj_expr.expr.as_any().is::<Column>()
-                || proj_expr.expr.as_any().is::<Literal>()
-        });
+        let all_simple_exprs =
+            self.projector
+                .projection()
+                .as_ref()
+                .iter()
+                .all(|proj_expr| {
+                    proj_expr.expr.as_any().is::<Column>()
+                        || proj_expr.expr.as_any().is::<Literal>()
+                });
         // If expressions are all either column_expr or Literal, then all computations in this projection are reorder or rename,
         // and projection would not benefit from the repartition, benefits_from_input_partitioning will return false.
         vec![!all_simple_exprs]
@@ -263,8 +277,11 @@ impl ExecutionPlan for ProjectionExec {
         self: Arc<Self>,
         mut children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        ProjectionExec::try_new(self.projection.clone(), children.swap_remove(0))
-            .map(|p| Arc::new(p) as _)
+        ProjectionExec::try_new(
+            self.projector.projection().clone(),
+            children.swap_remove(0),
+        )
+        .map(|p| Arc::new(p) as _)
     }
 
     fn execute(
@@ -272,13 +289,19 @@ impl ExecutionPlan for ProjectionExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
+
+        let projector = self.projector.with_metrics(&self.metrics, partition);
         Ok(Box::pin(ProjectionStream::new(
-            Arc::clone(&self.schema),
-            self.projection.expr_iter().collect(),
+            projector,
             self.input.execute(partition, context)?,
             BaselineMetrics::new(&self.metrics, partition),
-        )))
+        )?))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -291,8 +314,10 @@ impl ExecutionPlan for ProjectionExec {
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         let input_stats = self.input.partition_statistics(partition)?;
-        self.projection
-            .project_statistics(input_stats, &self.input.schema())
+        let output_schema = self.schema();
+        self.projector
+            .projection()
+            .project_statistics(input_stats, &output_schema)
     }
 
     fn supports_limit_pushdown(&self) -> bool {
@@ -336,51 +361,98 @@ impl ExecutionPlan for ProjectionExec {
     ) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>> {
         Ok(FilterPushdownPropagation::if_all(child_pushdown_result))
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        let child = self.input();
+        let mut child_order = Vec::new();
+
+        // Check and transform sort expressions
+        for sort_expr in order {
+            // Recursively transform the expression
+            let mut can_pushdown = true;
+            let transformed = Arc::clone(&sort_expr.expr).transform(|expr| {
+                if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+                    // Check if column index is valid.
+                    // This should always be true but fail gracefully if it's not.
+                    if col.index() >= self.expr().len() {
+                        can_pushdown = false;
+                        return Ok(Transformed::no(expr));
+                    }
+
+                    let proj_expr = &self.expr()[col.index()];
+
+                    // Check if projection expression is a simple column
+                    // We cannot push down order by clauses that depend on
+                    // projected computations as they would have nothing to reference.
+                    if let Some(child_col) =
+                        proj_expr.expr.as_any().downcast_ref::<Column>()
+                    {
+                        // Replace with the child column
+                        Ok(Transformed::yes(Arc::new(child_col.clone()) as _))
+                    } else {
+                        // Projection involves computation, cannot push down
+                        can_pushdown = false;
+                        Ok(Transformed::no(expr))
+                    }
+                } else {
+                    Ok(Transformed::no(expr))
+                }
+            })?;
+
+            if !can_pushdown {
+                return Ok(SortOrderPushdownResult::Unsupported);
+            }
+
+            child_order.push(PhysicalSortExpr {
+                expr: transformed.data,
+                options: sort_expr.options,
+            });
+        }
+
+        // Recursively push down to child node
+        match child.try_pushdown_sort(&child_order)? {
+            SortOrderPushdownResult::Exact { inner } => {
+                let new_exec = Arc::new(self.clone()).with_new_children(vec![inner])?;
+                Ok(SortOrderPushdownResult::Exact { inner: new_exec })
+            }
+            SortOrderPushdownResult::Inexact { inner } => {
+                let new_exec = Arc::new(self.clone()).with_new_children(vec![inner])?;
+                Ok(SortOrderPushdownResult::Inexact { inner: new_exec })
+            }
+            SortOrderPushdownResult::Unsupported => {
+                Ok(SortOrderPushdownResult::Unsupported)
+            }
+        }
+    }
 }
 
 impl ProjectionStream {
     /// Create a new projection stream
     fn new(
-        schema: SchemaRef,
-        expr: Vec<Arc<dyn PhysicalExpr>>,
+        projector: Projector,
         input: SendableRecordBatchStream,
         baseline_metrics: BaselineMetrics,
-    ) -> Self {
-        Self {
-            schema,
-            expr,
+    ) -> Result<Self> {
+        Ok(Self {
+            projector,
             input,
             baseline_metrics,
-        }
+        })
     }
 
     fn batch_project(&self, batch: &RecordBatch) -> Result<RecordBatch> {
         // Records time on drop
         let _timer = self.baseline_metrics.elapsed_compute().timer();
-        let arrays = self
-            .expr
-            .iter()
-            .map(|expr| {
-                expr.evaluate(batch)
-                    .and_then(|v| v.into_array(batch.num_rows()))
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        if arrays.is_empty() {
-            let options =
-                RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
-            RecordBatch::try_new_with_options(Arc::clone(&self.schema), arrays, &options)
-                .map_err(Into::into)
-        } else {
-            RecordBatch::try_new(Arc::clone(&self.schema), arrays).map_err(Into::into)
-        }
+        self.projector.project_batch(batch)
     }
 }
 
 /// Projection iterator
 struct ProjectionStream {
-    schema: SchemaRef,
-    expr: Vec<Arc<dyn PhysicalExpr>>,
+    projector: Projector,
     input: SendableRecordBatchStream,
     baseline_metrics: BaselineMetrics,
 }
@@ -409,7 +481,7 @@ impl Stream for ProjectionStream {
 impl RecordBatchStream for ProjectionStream {
     /// Get the schema
     fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
+        Arc::clone(self.projector.output_schema())
     }
 }
 
@@ -489,7 +561,7 @@ pub fn try_pushdown_through_join(
     join_left: &Arc<dyn ExecutionPlan>,
     join_right: &Arc<dyn ExecutionPlan>,
     join_on: JoinOnRef,
-    schema: SchemaRef,
+    schema: &SchemaRef,
     filter: Option<&JoinFilter>,
 ) -> Result<Option<JoinData>> {
     // Convert projected expressions to columns. We can not proceed if this is not possible.
@@ -502,7 +574,7 @@ pub fn try_pushdown_through_join(
 
     if !join_allows_pushdown(
         &projection_as_columns,
-        &schema,
+        schema,
         far_right_left_col_ind,
         far_left_right_col_ind,
     ) {
@@ -789,10 +861,6 @@ pub fn update_join_on(
     hash_join_on: &[(PhysicalExprRef, PhysicalExprRef)],
     left_field_size: usize,
 ) -> Option<Vec<(PhysicalExprRef, PhysicalExprRef)>> {
-    // TODO: Clippy wants the "map" call removed, but doing so generates
-    //       a compilation error. Remove the clippy directive once this
-    //       issue is fixed.
-    #[allow(clippy::map_identity)]
     let (left_idx, right_idx): (Vec<_>, Vec<_>) = hash_join_on
         .iter()
         .map(|(left, right)| (left, right))
@@ -1004,15 +1072,16 @@ mod tests {
     use std::sync::Arc;
 
     use crate::common::collect;
+
     use crate::test;
     use crate::test::exec::StatisticsExec;
 
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::stats::{ColumnStatistics, Precision, Statistics};
     use datafusion_common::ScalarValue;
+    use datafusion_common::stats::{ColumnStatistics, Precision, Statistics};
 
     use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{col, BinaryExpr, Column, Literal};
+    use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal, col};
 
     #[test]
     fn test_collect_column_indices() -> Result<()> {
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index 163f214444d09..683dbb4e49765 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -21,23 +21,28 @@ use std::any::Any;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use super::work_table::{ReservedBatches, WorkTable, WorkTableExec};
+use super::work_table::{ReservedBatches, WorkTable};
+use crate::aggregates::group_values::{GroupValues, new_group_values};
+use crate::aggregates::order::GroupOrdering;
 use crate::execution_plan::{Boundedness, EmissionType};
+use crate::metrics::{
+    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput,
+};
 use crate::{
-    metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
-    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
-use crate::{DisplayAs, DisplayFormatType, ExecutionPlan};
-
+use arrow::array::{BooleanArray, BooleanBuilder};
+use arrow::compute::filter_record_batch;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_datafusion_err, not_impl_err, Result};
-use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
+use datafusion_common::{Result, internal_datafusion_err, not_impl_err};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 
-use futures::{ready, Stream, StreamExt};
+use futures::{Stream, StreamExt, ready};
 
 /// Recursive query execution plan.
 ///
@@ -81,9 +86,9 @@ impl RecursiveQueryExec {
         is_distinct: bool,
     ) -> Result<Self> {
         // Each recursive query needs its own work table
-        let work_table = Arc::new(WorkTable::new());
+        let work_table = Arc::new(WorkTable::new(name.clone()));
         // Use the same work table for both the WorkTableExec and the recursive term
-        let recursive_term = assign_work_table(recursive_term, Arc::clone(&work_table))?;
+        let recursive_term = assign_work_table(recursive_term, &work_table)?;
         let cache = Self::compute_properties(static_term.schema());
         Ok(RecursiveQueryExec {
             name,
@@ -195,8 +200,9 @@ impl ExecutionPlan for RecursiveQueryExec {
             Arc::clone(&self.work_table),
             Arc::clone(&self.recursive_term),
             static_stream,
+            self.is_distinct,
             baseline_metrics,
-        )))
+        )?))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -267,8 +273,10 @@ struct RecursiveQueryStream {
     buffer: Vec<RecordBatch>,
     /// Tracks the memory used by the buffer
     reservation: MemoryReservation,
-    // /// Metrics.
-    _baseline_metrics: BaselineMetrics,
+    /// If the distinct flag is set, then we use this hash table to remove duplicates from result and work tables
+    distinct_deduplicator: Option<DistinctDeduplicator>,
+    /// Metrics.
+    baseline_metrics: BaselineMetrics,
 }
 
 impl RecursiveQueryStream {
@@ -278,12 +286,16 @@ impl RecursiveQueryStream {
         work_table: Arc<WorkTable>,
         recursive_term: Arc<dyn ExecutionPlan>,
         static_stream: SendableRecordBatchStream,
+        is_distinct: bool,
         baseline_metrics: BaselineMetrics,
-    ) -> Self {
+    ) -> Result<Self> {
         let schema = static_stream.schema();
         let reservation =
             MemoryConsumer::new("RecursiveQuery").register(task_context.memory_pool());
-        Self {
+        let distinct_deduplicator = is_distinct
+            .then(|| DistinctDeduplicator::new(Arc::clone(&schema), &task_context))
+            .transpose()?;
+        Ok(Self {
             task_context,
             work_table,
             recursive_term,
@@ -292,21 +304,28 @@ impl RecursiveQueryStream {
             schema,
             buffer: vec![],
             reservation,
-            _baseline_metrics: baseline_metrics,
-        }
+            distinct_deduplicator,
+            baseline_metrics,
+        })
     }
 
     /// Push a clone of the given batch to the in memory buffer, and then return
     /// a poll with it.
     fn push_batch(
         mut self: std::pin::Pin<&mut Self>,
-        batch: RecordBatch,
+        mut batch: RecordBatch,
     ) -> Poll<Option<Result<RecordBatch>>> {
+        let baseline_metrics = self.baseline_metrics.clone();
+        if let Some(deduplicator) = &mut self.distinct_deduplicator {
+            let _timer_guard = baseline_metrics.elapsed_compute().timer();
+            batch = deduplicator.deduplicate(&batch)?;
+        }
+
         if let Err(e) = self.reservation.try_grow(batch.get_array_memory_size()) {
             return Poll::Ready(Some(Err(e)));
         }
-
         self.buffer.push(batch.clone());
+        (&batch).record_output(&baseline_metrics);
         Poll::Ready(Some(Ok(batch)))
     }
 
@@ -346,12 +365,12 @@ impl RecursiveQueryStream {
 
 fn assign_work_table(
     plan: Arc<dyn ExecutionPlan>,
-    work_table: Arc<WorkTable>,
+    work_table: &Arc<WorkTable>,
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let mut work_table_refs = 0;
     plan.transform_down(|plan| {
         if let Some(new_plan) =
-            plan.with_new_state(Arc::clone(&work_table) as Arc<dyn Any + Send + Sync>)
+            plan.with_new_state(Arc::clone(work_table) as Arc<dyn Any + Send + Sync>)
         {
             if work_table_refs > 0 {
                 not_impl_err!(
@@ -361,8 +380,6 @@ fn assign_work_table(
                 work_table_refs += 1;
                 Ok(Transformed::yes(new_plan))
             }
-        } else if plan.as_any().is::<RecursiveQueryExec>() {
-            not_impl_err!("Recursive queries cannot be nested")
         } else {
             Ok(Transformed::no(plan))
         }
@@ -378,13 +395,8 @@ fn assign_work_table(
 /// as the work table changes. When the next iteration executes this plan again, we must clear the left table.
 fn reset_plan_states(plan: Arc<dyn ExecutionPlan>) -> Result<Arc<dyn ExecutionPlan>> {
     plan.transform_up(|plan| {
-        // WorkTableExec's states have already been updated correctly.
-        if plan.as_any().is::<WorkTableExec>() {
-            Ok(Transformed::no(plan))
-        } else {
-            let new_plan = Arc::clone(&plan).reset_state()?;
-            Ok(Transformed::yes(new_plan))
-        }
+        let new_plan = Arc::clone(&plan).reset_state()?;
+        Ok(Transformed::yes(new_plan))
     })
     .data()
 }
@@ -396,7 +408,6 @@ impl Stream for RecursiveQueryStream {
         mut self: std::pin::Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        // TODO: we should use this poll to record some metrics!
         if let Some(static_stream) = &mut self.static_stream {
             // While the static term's stream is available, we'll be forwarding the batches from it (also
             // saving them for the initial iteration of the recursive term).
@@ -433,5 +444,61 @@ impl RecordBatchStream for RecursiveQueryStream {
     }
 }
 
+/// Deduplicator based on a hash table.
+struct DistinctDeduplicator {
+    /// Grouped rows used for distinct
+    group_values: Box<dyn GroupValues>,
+    reservation: MemoryReservation,
+    intern_output_buffer: Vec<usize>,
+}
+
+impl DistinctDeduplicator {
+    fn new(schema: SchemaRef, task_context: &TaskContext) -> Result<Self> {
+        let group_values = new_group_values(schema, &GroupOrdering::None)?;
+        let reservation = MemoryConsumer::new("RecursiveQueryHashTable")
+            .register(task_context.memory_pool());
+        Ok(Self {
+            group_values,
+            reservation,
+            intern_output_buffer: Vec::new(),
+        })
+    }
+
+    /// Remove duplicated rows from the given batch, keeping a state between batches.
+    ///
+    /// We use a hash table to allocate new group ids for the new rows.
+    /// [`GroupValues`] allocate increasing group ids.
+    /// Hence, if groups (i.e., rows) are new, then they have ids >= length before interning, we keep them.
+    /// We also detect duplicates by enforcing that group ids are increasing.
+    fn deduplicate(&mut self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let size_before = self.group_values.len();
+        self.intern_output_buffer.reserve(batch.num_rows());
+        self.group_values
+            .intern(batch.columns(), &mut self.intern_output_buffer)?;
+        let mask = new_groups_mask(&self.intern_output_buffer, size_before);
+        self.intern_output_buffer.clear();
+        // We update the reservation to reflect the new size of the hash table.
+        self.reservation.try_resize(self.group_values.size())?;
+        Ok(filter_record_batch(batch, &mask)?)
+    }
+}
+
+/// Return a mask, each element being true if, and only if, the element is greater than all previous elements and greater or equal than the provided max_already_seen_group_id
+fn new_groups_mask(
+    values: &[usize],
+    mut max_already_seen_group_id: usize,
+) -> BooleanArray {
+    let mut output = BooleanBuilder::with_capacity(values.len());
+    for value in values {
+        if *value >= max_already_seen_group_id {
+            output.append_value(true);
+            max_already_seen_group_id = *value + 1; // We want to be increasing
+        } else {
+            output.append_value(false);
+        }
+    }
+    output.finish()
+}
+
 #[cfg(test)]
 mod tests {}
diff --git a/datafusion/physical-plan/src/render_tree.rs b/datafusion/physical-plan/src/render_tree.rs
index f86e4c55e7b0e..40e2763698093 100644
--- a/datafusion/physical-plan/src/render_tree.rs
+++ b/datafusion/physical-plan/src/render_tree.rs
@@ -31,11 +31,12 @@ use crate::{DisplayFormatType, ExecutionPlan};
 // TODO: It's never used.
 /// Represents a 2D coordinate in the rendered tree.
 /// Used to track positions of nodes and their connections.
-#[allow(dead_code)]
 pub struct Coordinate {
     /// Horizontal position in the tree
+    #[expect(dead_code)]
     pub x: usize,
     /// Vertical position in the tree
+    #[expect(dead_code)]
     pub y: usize,
 }
 
diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
index 34294d0f2326d..22872d1e32d49 100644
--- a/datafusion/physical-plan/src/repartition/distributor_channels.rs
+++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs
@@ -43,8 +43,8 @@ use std::{
     ops::DerefMut,
     pin::Pin,
     sync::{
-        atomic::{AtomicUsize, Ordering},
         Arc,
+        atomic::{AtomicUsize, Ordering},
     },
     task::{Context, Poll, Waker},
 };
@@ -476,7 +476,7 @@ type SharedGate = Arc<Gate>;
 mod tests {
     use std::sync::atomic::AtomicBool;
 
-    use futures::{task::ArcWake, FutureExt};
+    use futures::{FutureExt, task::ArcWake};
 
     use super::*;
 
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 74cf798895998..1efdaaabc7d6a 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -30,10 +30,11 @@ use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use super::{
     DisplayAs, ExecutionPlanProperties, RecordBatchStream, SendableRecordBatchStream,
 };
+use crate::coalesce::LimitedBatchCoalescer;
 use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType};
 use crate::hash_utils::create_hashes;
 use crate::metrics::{BaselineMetrics, SpillMetrics};
-use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec};
+use crate::projection::{ProjectionExec, all_columns, make_with_child, update_expr};
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
 use crate::spill::spill_manager::SpillManager;
 use crate::spill::spill_pool::{self, SpillPoolWriter};
@@ -46,11 +47,13 @@ use arrow::datatypes::{SchemaRef, UInt32Type};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::stats::Precision;
 use datafusion_common::utils::transpose;
-use datafusion_common::{internal_err, ColumnStatistics, HashMap};
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{
+    ColumnStatistics, DataFusionError, HashMap, assert_or_internal_err, internal_err,
+};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
@@ -58,14 +61,18 @@ use crate::filter_pushdown::{
     ChildPushdownResult, FilterDescription, FilterPushdownPhase,
     FilterPushdownPropagation,
 };
+use crate::joins::SeededRandomState;
+use crate::sort_pushdown::SortOrderPushdownResult;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use futures::stream::Stream;
-use futures::{FutureExt, StreamExt, TryStreamExt};
+use futures::{FutureExt, StreamExt, TryStreamExt, ready};
 use log::trace;
 use parking_lot::Mutex;
 
 mod distributor_channels;
 use distributor_channels::{
-    channels, partition_aware_channels, DistributionReceiver, DistributionSender,
+    DistributionReceiver, DistributionSender, channels, partition_aware_channels,
 };
 
 /// A batch in the repartition queue - either in memory or spilled to disk.
@@ -225,10 +232,10 @@ impl Debug for RepartitionExecState {
 impl RepartitionExecState {
     fn ensure_input_streams_initialized(
         &mut self,
-        input: Arc<dyn ExecutionPlan>,
-        metrics: ExecutionPlanMetricsSet,
+        input: &Arc<dyn ExecutionPlan>,
+        metrics: &ExecutionPlanMetricsSet,
         output_partitions: usize,
-        ctx: Arc<TaskContext>,
+        ctx: &Arc<TaskContext>,
     ) -> Result<()> {
         if !matches!(self, RepartitionExecState::NotInitialized) {
             return Ok(());
@@ -238,10 +245,10 @@ impl RepartitionExecState {
         let mut streams_and_metrics = Vec::with_capacity(num_input_partitions);
 
         for i in 0..num_input_partitions {
-            let metrics = RepartitionMetrics::new(i, output_partitions, &metrics);
+            let metrics = RepartitionMetrics::new(i, output_partitions, metrics);
 
             let timer = metrics.fetch_time.timer();
-            let stream = input.execute(i, Arc::clone(&ctx))?;
+            let stream = input.execute(i, Arc::clone(ctx))?;
             timer.done();
 
             streams_and_metrics.push((stream, metrics));
@@ -253,26 +260,28 @@ impl RepartitionExecState {
     #[expect(clippy::too_many_arguments)]
     fn consume_input_streams(
         &mut self,
-        input: Arc<dyn ExecutionPlan>,
-        metrics: ExecutionPlanMetricsSet,
-        partitioning: Partitioning,
+        input: &Arc<dyn ExecutionPlan>,
+        metrics: &ExecutionPlanMetricsSet,
+        partitioning: &Partitioning,
         preserve_order: bool,
-        name: String,
-        context: Arc<TaskContext>,
+        name: &str,
+        context: &Arc<TaskContext>,
         spill_manager: SpillManager,
     ) -> Result<&mut ConsumingInputStreamsState> {
         let streams_and_metrics = match self {
             RepartitionExecState::NotInitialized => {
                 self.ensure_input_streams_initialized(
-                    Arc::clone(&input),
-                    metrics.clone(),
+                    input,
+                    metrics,
                     partitioning.partition_count(),
-                    Arc::clone(&context),
+                    context,
                 )?;
                 let RepartitionExecState::InputStreamsInitialized(value) = self else {
                     // This cannot happen, as ensure_input_streams_initialized() was just called,
                     // but the compiler does not know.
-                    return internal_err!("Programming error: RepartitionExecState must be in the InputStreamsInitialized state after calling RepartitionExecState::ensure_input_streams_initialized");
+                    return internal_err!(
+                        "Programming error: RepartitionExecState must be in the InputStreamsInitialized state after calling RepartitionExecState::ensure_input_streams_initialized"
+                    );
                 };
                 value
             }
@@ -379,6 +388,9 @@ impl RepartitionExecState {
                 txs,
                 partitioning.clone(),
                 metrics,
+                // preserve_order depends on partition index to start from 0
+                if preserve_order { 0 } else { i },
+                num_input_partitions,
             ));
 
             // In a separate task, wait for each input to be done
@@ -406,7 +418,6 @@ pub struct BatchPartitioner {
 
 enum BatchPartitionerState {
     Hash {
-        random_state: ahash::RandomState,
         exprs: Vec<Arc<dyn PhysicalExpr>>,
         num_partitions: usize,
         hash_buffer: Vec<u64>,
@@ -417,23 +428,33 @@ enum BatchPartitionerState {
     },
 }
 
+/// Fixed RandomState used for hash repartitioning to ensure consistent behavior across
+/// executions and runs.
+pub const REPARTITION_RANDOM_STATE: SeededRandomState =
+    SeededRandomState::with_seeds(0, 0, 0, 0);
+
 impl BatchPartitioner {
     /// Create a new [`BatchPartitioner`] with the provided [`Partitioning`]
     ///
     /// The time spent repartitioning will be recorded to `timer`
-    pub fn try_new(partitioning: Partitioning, timer: metrics::Time) -> Result<Self> {
+    pub fn try_new(
+        partitioning: Partitioning,
+        timer: metrics::Time,
+        input_partition: usize,
+        num_input_partitions: usize,
+    ) -> Result<Self> {
         let state = match partitioning {
             Partitioning::RoundRobinBatch(num_partitions) => {
                 BatchPartitionerState::RoundRobin {
                     num_partitions,
-                    next_idx: 0,
+                    // Distribute starting index evenly based on input partition, number of input partitions and number of partitions
+                    // to avoid they all start at partition 0 and heavily skew on the lower partitions
+                    next_idx: ((input_partition * num_partitions) / num_input_partitions),
                 }
             }
             Partitioning::Hash(exprs, num_partitions) => BatchPartitionerState::Hash {
                 exprs,
                 num_partitions,
-                // Use fixed random hash
-                random_state: ahash::RandomState::with_seeds(0, 0, 0, 0),
                 hash_buffer: vec![],
             },
             other => return not_impl_err!("Unsupported repartitioning scheme {other:?}"),
@@ -481,7 +502,6 @@ impl BatchPartitioner {
                     Box::new(std::iter::once(Ok((idx, batch))))
                 }
                 BatchPartitionerState::Hash {
-                    random_state,
                     exprs,
                     num_partitions: partitions,
                     hash_buffer,
@@ -489,15 +509,17 @@ impl BatchPartitioner {
                     // Tracking time required for distributing indexes across output partitions
                     let timer = self.timer.timer();
 
-                    let arrays = exprs
-                        .iter()
-                        .map(|expr| expr.evaluate(&batch)?.into_array(batch.num_rows()))
-                        .collect::<Result<Vec<_>>>()?;
+                    let arrays =
+                        evaluate_expressions_to_arrays(exprs.as_slice(), &batch)?;
 
                     hash_buffer.clear();
                     hash_buffer.resize(batch.num_rows(), 0);
 
-                    create_hashes(&arrays, random_state, hash_buffer)?;
+                    create_hashes(
+                        &arrays,
+                        REPARTITION_RANDOM_STATE.random_state(),
+                        hash_buffer,
+                    )?;
 
                     let mut indices: Vec<_> = (0..*partitions)
                         .map(|_| Vec::with_capacity(batch.num_rows()))
@@ -738,6 +760,7 @@ impl RepartitionExec {
 
 impl DisplayAs for RepartitionExec {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        let input_partition_count = self.input.output_partitioning().partition_count();
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 write!(
@@ -745,11 +768,17 @@ impl DisplayAs for RepartitionExec {
                     "{}: partitioning={}, input_partitions={}",
                     self.name(),
                     self.partitioning(),
-                    self.input.output_partitioning().partition_count()
+                    input_partition_count,
                 )?;
 
                 if self.preserve_order {
                     write!(f, ", preserve_order=true")?;
+                } else if input_partition_count <= 1
+                    && self.input.output_ordering().is_some()
+                {
+                    // Make it explicit that repartition maintains sortedness for a single input partition even
+                    // when `preserve_sort order` is false
+                    write!(f, ", maintains_sort_order=true")?;
                 }
 
                 if let Some(sort_exprs) = self.sort_exprs() {
@@ -759,9 +788,6 @@ impl DisplayAs for RepartitionExec {
             }
             DisplayFormatType::TreeRender => {
                 writeln!(f, "partitioning_scheme={}", self.partitioning(),)?;
-
-                let input_partition_count =
-                    self.input.output_partitioning().partition_count();
                 let output_partition_count = self.partitioning().partition_count();
                 let input_to_output_partition_str =
                     format!("{input_partition_count} -> {output_partition_count}");
@@ -852,10 +878,10 @@ impl ExecutionPlan for RepartitionExec {
         let state = Arc::clone(&self.state);
         if let Some(mut state) = state.try_lock() {
             state.ensure_input_streams_initialized(
-                Arc::clone(&input),
-                metrics.clone(),
+                &input,
+                &metrics,
                 partitioning.partition_count(),
-                Arc::clone(&context),
+                &context,
             )?;
         }
 
@@ -867,12 +893,12 @@ impl ExecutionPlan for RepartitionExec {
                 // lock mutexes
                 let mut state = state.lock();
                 let state = state.consume_input_streams(
-                    Arc::clone(&input),
-                    metrics.clone(),
-                    partitioning,
+                    &input,
+                    &metrics,
+                    &partitioning,
                     preserve_order,
-                    name.clone(),
-                    Arc::clone(&context),
+                    &name,
+                    &context,
                     spill_manager.clone(),
                 )?;
 
@@ -915,6 +941,8 @@ impl ExecutionPlan for RepartitionExec {
                             Arc::clone(&reservation),
                             spill_stream,
                             1, // Each receiver handles one input partition
+                            BaselineMetrics::new(&metrics, partition),
+                            None, // subsequent merge sort already does batching https://github.com/apache/datafusion/blob/e4dcf0c85611ad0bd291f03a8e03fe56d773eb16/datafusion/physical-plan/src/sorts/merge.rs#L286
                         )) as SendableRecordBatchStream
                     })
                     .collect::<Vec<_>>();
@@ -952,6 +980,8 @@ impl ExecutionPlan for RepartitionExec {
                     reservation,
                     spill_stream,
                     num_input_partitions,
+                    BaselineMetrics::new(&metrics, partition),
+                    Some(context.session_config().batch_size()),
                 )) as SendableRecordBatchStream)
             }
         })
@@ -975,13 +1005,12 @@ impl ExecutionPlan for RepartitionExec {
                 return Ok(Statistics::new_unknown(&self.schema()));
             }
 
-            if partition >= partition_count {
-                return internal_err!(
-                    "RepartitionExec invalid partition {} (expected less than {})",
-                    partition,
-                    self.partitioning().partition_count()
-                );
-            }
+            assert_or_internal_err!(
+                partition < partition_count,
+                "RepartitionExec invalid partition {} (expected less than {})",
+                partition,
+                partition_count
+            );
 
             let mut stats = self.input.partition_statistics(None)?;
 
@@ -1072,6 +1101,27 @@ impl ExecutionPlan for RepartitionExec {
         Ok(FilterPushdownPropagation::if_all(child_pushdown_result))
     }
 
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // RepartitionExec only maintains input order if preserve_order is set
+        // or if there's only one partition
+        if !self.maintains_input_order()[0] {
+            return Ok(SortOrderPushdownResult::Unsupported);
+        }
+
+        // Delegate to the child and wrap with a new RepartitionExec
+        self.input.try_pushdown_sort(order)?.try_map(|new_input| {
+            let mut new_repartition =
+                RepartitionExec::try_new(new_input, self.partitioning().clone())?;
+            if self.preserve_order {
+                new_repartition = new_repartition.with_preserve_order();
+            }
+            Ok(Arc::new(new_repartition) as Arc<dyn ExecutionPlan>)
+        })
+    }
+
     fn repartitioned(
         &self,
         target_partitions: usize,
@@ -1103,8 +1153,7 @@ impl RepartitionExec {
         partitioning: Partitioning,
     ) -> Result<Self> {
         let preserve_order = false;
-        let cache =
-            Self::compute_properties(&input, partitioning.clone(), preserve_order);
+        let cache = Self::compute_properties(&input, partitioning, preserve_order);
         Ok(RepartitionExec {
             input,
             state: Default::default(),
@@ -1193,9 +1242,15 @@ impl RepartitionExec {
         mut output_channels: HashMap<usize, OutputChannel>,
         partitioning: Partitioning,
         metrics: RepartitionMetrics,
+        input_partition: usize,
+        num_input_partitions: usize,
     ) -> Result<()> {
-        let mut partitioner =
-            BatchPartitioner::try_new(partitioning, metrics.repartition_time.clone())?;
+        let mut partitioner = BatchPartitioner::try_new(
+            partitioning,
+            metrics.repartition_time.clone(),
+            input_partition,
+            num_input_partitions,
+        )?;
 
         // While there are still outputs to send to, keep pulling inputs
         let mut batches_until_yield = partitioner.num_partitions();
@@ -1402,9 +1457,16 @@ struct PerPartitionStream {
     /// In non-preserve-order mode, multiple input partitions send to the same channel,
     /// each sending None when complete. We must wait for all of them.
     remaining_partitions: usize,
+
+    /// Execution metrics
+    baseline_metrics: BaselineMetrics,
+
+    /// None for sort preserving variant (merge sort already does coalescing)
+    batch_coalescer: Option<LimitedBatchCoalescer>,
 }
 
 impl PerPartitionStream {
+    #[expect(clippy::too_many_arguments)]
     fn new(
         schema: SchemaRef,
         receiver: DistributionReceiver<MaybeBatch>,
@@ -1412,7 +1474,11 @@ impl PerPartitionStream {
         reservation: SharedMemoryReservation,
         spill_stream: SendableRecordBatchStream,
         num_input_partitions: usize,
+        baseline_metrics: BaselineMetrics,
+        batch_size: Option<usize>,
     ) -> Self {
+        let batch_coalescer =
+            batch_size.map(|s| LimitedBatchCoalescer::new(Arc::clone(&schema), s, None));
         Self {
             schema,
             receiver,
@@ -1421,18 +1487,18 @@ impl PerPartitionStream {
             spill_stream,
             state: StreamState::ReadingMemory,
             remaining_partitions: num_input_partitions,
+            baseline_metrics,
+            batch_coalescer,
         }
     }
-}
 
-impl Stream for PerPartitionStream {
-    type Item = Result<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
+    fn poll_next_inner(
+        self: &mut Pin<&mut Self>,
         cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
+    ) -> Poll<Option<Result<RecordBatch>>> {
         use futures::StreamExt;
+        let cloned_time = self.baseline_metrics.elapsed_compute().clone();
+        let _timer = cloned_time.timer();
 
         loop {
             match self.state {
@@ -1506,6 +1572,61 @@ impl Stream for PerPartitionStream {
             }
         }
     }
+
+    fn poll_next_and_coalesce(
+        self: &mut Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        coalescer: &mut LimitedBatchCoalescer,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        let cloned_time = self.baseline_metrics.elapsed_compute().clone();
+        let mut completed = false;
+
+        loop {
+            if let Some(batch) = coalescer.next_completed_batch() {
+                return Poll::Ready(Some(Ok(batch)));
+            }
+            if completed {
+                return Poll::Ready(None);
+            }
+
+            match ready!(self.poll_next_inner(cx)) {
+                Some(Ok(batch)) => {
+                    let _timer = cloned_time.timer();
+                    if let Err(err) = coalescer.push_batch(batch) {
+                        return Poll::Ready(Some(Err(err)));
+                    }
+                }
+                Some(err) => {
+                    return Poll::Ready(Some(err));
+                }
+                None => {
+                    completed = true;
+                    let _timer = cloned_time.timer();
+                    if let Err(err) = coalescer.finish() {
+                        return Poll::Ready(Some(Err(err)));
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl Stream for PerPartitionStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        let poll;
+        if let Some(mut coalescer) = self.batch_coalescer.take() {
+            poll = self.poll_next_and_coalesce(cx, &mut coalescer);
+            self.batch_coalescer = Some(coalescer);
+        } else {
+            poll = self.poll_next_inner(cx);
+        }
+        self.baseline_metrics.record_poll(poll)
+    }
 }
 
 impl RecordBatchStream for PerPartitionStream {
@@ -1525,8 +1646,8 @@ mod tests {
         test::{
             assert_is_pending,
             exec::{
-                assert_strong_count_converges_to_zero, BarrierExec, BlockingExec,
-                ErrorExec, MockExec,
+                BarrierExec, BlockingExec, ErrorExec, MockExec,
+                assert_strong_count_converges_to_zero,
             },
         },
         {collect, expressions::col},
@@ -1538,9 +1659,9 @@ mod tests {
     use datafusion_common::exec_err;
     use datafusion_common::test_util::batches_to_sort_string;
     use datafusion_common_runtime::JoinSet;
+    use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use insta::assert_snapshot;
-    use itertools::Itertools;
 
     #[tokio::test]
     async fn one_to_many_round_robin() -> Result<()> {
@@ -1554,10 +1675,13 @@ mod tests {
             repartition(&schema, partitions, Partitioning::RoundRobinBatch(4)).await?;
 
         assert_eq!(4, output_partitions.len());
-        assert_eq!(13, output_partitions[0].len());
-        assert_eq!(13, output_partitions[1].len());
-        assert_eq!(12, output_partitions[2].len());
-        assert_eq!(12, output_partitions[3].len());
+        for partition in &output_partitions {
+            assert_eq!(1, partition.len());
+        }
+        assert_eq!(13 * 8, output_partitions[0][0].num_rows());
+        assert_eq!(13 * 8, output_partitions[1][0].num_rows());
+        assert_eq!(12 * 8, output_partitions[2][0].num_rows());
+        assert_eq!(12 * 8, output_partitions[3][0].num_rows());
 
         Ok(())
     }
@@ -1574,7 +1698,7 @@ mod tests {
             repartition(&schema, partitions, Partitioning::RoundRobinBatch(1)).await?;
 
         assert_eq!(1, output_partitions.len());
-        assert_eq!(150, output_partitions[0].len());
+        assert_eq!(150 * 8, output_partitions[0][0].num_rows());
 
         Ok(())
     }
@@ -1590,12 +1714,12 @@ mod tests {
         let output_partitions =
             repartition(&schema, partitions, Partitioning::RoundRobinBatch(5)).await?;
 
+        let total_rows_per_partition = 8 * 50 * 3 / 5;
         assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
+        for partition in output_partitions {
+            assert_eq!(1, partition.len());
+            assert_eq!(total_rows_per_partition, partition[0].num_rows());
+        }
 
         Ok(())
     }
@@ -1625,6 +1749,32 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_repartition_with_coalescing() -> Result<()> {
+        let schema = test_schema();
+        // create 50 batches, each having 8 rows
+        let partition = create_vec_batches(50);
+        let partitions = vec![partition.clone(), partition.clone()];
+        let partitioning = Partitioning::RoundRobinBatch(1);
+
+        let session_config = SessionConfig::new().with_batch_size(200);
+        let task_ctx = TaskContext::default().with_session_config(session_config);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec = TestMemoryExec::try_new_exec(&partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                assert_eq!(200, batch.num_rows());
+            }
+        }
+        Ok(())
+    }
+
     fn test_schema() -> Arc<Schema> {
         Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
     }
@@ -1670,12 +1820,12 @@ mod tests {
 
         let output_partitions = handle.join().await.unwrap().unwrap();
 
+        let total_rows_per_partition = 8 * 50 * 3 / 5;
         assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
+        for partition in output_partitions {
+            assert_eq!(1, partition.len());
+            assert_eq!(total_rows_per_partition, partition[0].num_rows());
+        }
 
         Ok(())
     }
@@ -1844,16 +1994,16 @@ mod tests {
         // output stream 1 should *not* error and have one of the input batches
         let batches = crate::common::collect(output_stream1).await.unwrap();
 
-        assert_snapshot!(batches_to_sort_string(&batches), @r#"
-            +------------------+
-            | my_awesome_field |
-            +------------------+
-            | baz              |
-            | frob             |
-            | gaz              |
-            | grob             |
-            +------------------+
-            "#);
+        assert_snapshot!(batches_to_sort_string(&batches), @r"
+        +------------------+
+        | my_awesome_field |
+        +------------------+
+        | baz              |
+        | frob             |
+        | gar              |
+        | goo              |
+        +------------------+
+        ");
     }
 
     #[tokio::test]
@@ -1913,14 +2063,13 @@ mod tests {
         });
         let batches_with_drop = crate::common::collect(output_stream1).await.unwrap();
 
-        fn sort(batch: Vec<RecordBatch>) -> Vec<RecordBatch> {
-            batch
-                .into_iter()
-                .sorted_by_key(|b| format!("{b:?}"))
-                .collect()
-        }
-
-        assert_eq!(sort(batches_without_drop), sort(batches_with_drop));
+        let items_vec_with_drop = str_batches_to_vec(&batches_with_drop);
+        let items_set_with_drop: HashSet<&str> =
+            items_vec_with_drop.iter().copied().collect();
+        assert_eq!(
+            items_set_with_drop.symmetric_difference(&items_set).count(),
+            0
+        );
     }
 
     fn str_batches_to_vec(batches: &[RecordBatch]) -> Vec<&str> {
@@ -2414,7 +2563,7 @@ mod test {
 
         // Repartition should not preserve order
         assert_plan!(&exec, @r"
-        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1, maintains_sort_order=true
           DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
         ");
 
@@ -2443,8 +2592,8 @@ mod test {
 
     #[tokio::test]
     async fn test_preserve_order_with_spilling() -> Result<()> {
-        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
         use datafusion_execution::TaskContext;
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
         // Create sorted input data across multiple partitions
         // Partition1: [1,3], [5,7], [9,11]
@@ -2477,7 +2626,8 @@ mod test {
         // Create physical plan with order preservation
         let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?
             .try_with_sort_information(vec![sort_exprs.clone(), sort_exprs])?;
-        let exec = Arc::new(TestMemoryExec::update_cache(Arc::new(exec)));
+        let exec = Arc::new(exec);
+        let exec = Arc::new(TestMemoryExec::update_cache(&exec));
         // Repartition into 3 partitions with order preservation
         // We expect 1 batch per output partition after repartitioning
         let exec = RepartitionExec::try_new(exec, Partitioning::RoundRobinBatch(3))?
@@ -2569,8 +2719,8 @@ mod test {
 
     #[tokio::test]
     async fn test_hash_partitioning_with_spilling() -> Result<()> {
-        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
         use datafusion_execution::TaskContext;
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
         // Create input data similar to the round-robin test
         let batch1 = record_batch!(("c0", UInt32, [1, 3])).unwrap();
@@ -2593,7 +2743,8 @@ mod test {
 
         // Create physical plan with hash partitioning
         let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?;
-        let exec = Arc::new(TestMemoryExec::update_cache(Arc::new(exec)));
+        let exec = Arc::new(exec);
+        let exec = Arc::new(TestMemoryExec::update_cache(&exec));
         // Hash partition into 2 partitions by column c0
         let hash_expr = col("c0", &schema)?;
         let exec =
@@ -2651,7 +2802,7 @@ mod test {
 
         // Repartition should not preserve order
         assert_plan!(exec.as_ref(), @r"
-        RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1
+        RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1, maintains_sort_order=true
           DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
         ");
         Ok(())
@@ -2677,11 +2828,11 @@ mod test {
         schema: &SchemaRef,
         sort_exprs: LexOrdering,
     ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(TestMemoryExec::update_cache(Arc::new(
-            TestMemoryExec::try_new(&[vec![]], Arc::clone(schema), None)
-                .unwrap()
-                .try_with_sort_information(vec![sort_exprs])
-                .unwrap(),
-        )))
+        let exec = TestMemoryExec::try_new(&[vec![]], Arc::clone(schema), None)
+            .unwrap()
+            .try_with_sort_information(vec![sort_exprs])
+            .unwrap();
+        let exec = Arc::new(exec);
+        Arc::new(TestMemoryExec::update_cache(&exec))
     }
 }
diff --git a/datafusion/physical-plan/src/sort_pushdown.rs b/datafusion/physical-plan/src/sort_pushdown.rs
new file mode 100644
index 0000000000000..8432fd5dabee7
--- /dev/null
+++ b/datafusion/physical-plan/src/sort_pushdown.rs
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Sort pushdown types for physical execution plans.
+//!
+//! This module provides types used for pushing sort ordering requirements
+//! down through the execution plan tree to data sources.
+
+/// Result of attempting to push down sort ordering to a node.
+///
+/// Used by [`ExecutionPlan::try_pushdown_sort`] to communicate
+/// whether and how sort ordering was successfully pushed down.
+///
+/// [`ExecutionPlan::try_pushdown_sort`]: crate::ExecutionPlan::try_pushdown_sort
+#[derive(Debug, Clone)]
+pub enum SortOrderPushdownResult<T> {
+    /// The source can guarantee exact ordering (data is perfectly sorted).
+    ///
+    /// When this is returned, the optimizer can safely remove the Sort operator
+    /// entirely since the data source guarantees the requested ordering.
+    Exact {
+        /// The optimized node that provides exact ordering
+        inner: T,
+    },
+    /// The source has optimized for the ordering but cannot guarantee perfect sorting.
+    ///
+    /// This indicates the data source has been optimized (e.g., reordered files/row groups
+    /// based on statistics, enabled reverse scanning) but the data may not be perfectly
+    /// sorted. The optimizer should keep the Sort operator but benefits from the
+    /// optimization (e.g., faster TopK queries due to early termination).
+    Inexact {
+        /// The optimized node that provides approximate ordering
+        inner: T,
+    },
+    /// The source cannot optimize for this ordering.
+    ///
+    /// The data source does not support the requested sort ordering and no
+    /// optimization was applied.
+    Unsupported,
+}
+
+impl<T> SortOrderPushdownResult<T> {
+    /// Extract the inner value if present
+    pub fn into_inner(self) -> Option<T> {
+        match self {
+            Self::Exact { inner } | Self::Inexact { inner } => Some(inner),
+            Self::Unsupported => None,
+        }
+    }
+
+    /// Map the inner value to a different type while preserving the variant.
+    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> SortOrderPushdownResult<U> {
+        match self {
+            Self::Exact { inner } => SortOrderPushdownResult::Exact { inner: f(inner) },
+            Self::Inexact { inner } => {
+                SortOrderPushdownResult::Inexact { inner: f(inner) }
+            }
+            Self::Unsupported => SortOrderPushdownResult::Unsupported,
+        }
+    }
+
+    /// Try to map the inner value, returning an error if the function fails.
+    pub fn try_map<U, E, F: FnOnce(T) -> Result<U, E>>(
+        self,
+        f: F,
+    ) -> Result<SortOrderPushdownResult<U>, E> {
+        match self {
+            Self::Exact { inner } => {
+                Ok(SortOrderPushdownResult::Exact { inner: f(inner)? })
+            }
+            Self::Inexact { inner } => {
+                Ok(SortOrderPushdownResult::Inexact { inner: f(inner)? })
+            }
+            Self::Unsupported => Ok(SortOrderPushdownResult::Unsupported),
+        }
+    }
+
+    /// Convert this result to `Inexact`, downgrading `Exact` if present.
+    ///
+    /// This is useful when an operation (like merging multiple partitions)
+    /// cannot guarantee exact ordering even if the input provides it.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datafusion_physical_plan::SortOrderPushdownResult;
+    /// let exact = SortOrderPushdownResult::Exact { inner: 42 };
+    /// let inexact = exact.into_inexact();
+    /// assert!(matches!(inexact, SortOrderPushdownResult::Inexact { inner: 42 }));
+    ///
+    /// let already_inexact = SortOrderPushdownResult::Inexact { inner: 42 };
+    /// let still_inexact = already_inexact.into_inexact();
+    /// assert!(matches!(still_inexact, SortOrderPushdownResult::Inexact { inner: 42 }));
+    ///
+    /// let unsupported = SortOrderPushdownResult::<i32>::Unsupported;
+    /// let still_unsupported = unsupported.into_inexact();
+    /// assert!(matches!(still_unsupported, SortOrderPushdownResult::Unsupported));
+    /// ```
+    pub fn into_inexact(self) -> Self {
+        match self {
+            Self::Exact { inner } => Self::Inexact { inner },
+            Self::Inexact { inner } => Self::Inexact { inner },
+            Self::Unsupported => Self::Unsupported,
+        }
+    }
+}
diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
index 54dc2414e4f08..de3ec2e7a91ed 100644
--- a/datafusion/physical-plan/src/sorts/cursor.rs
+++ b/datafusion/physical-plan/src/sorts/cursor.rs
@@ -19,8 +19,8 @@ use std::cmp::Ordering;
 use std::sync::Arc;
 
 use arrow::array::{
-    types::ByteArrayType, Array, ArrowPrimitiveType, GenericByteArray,
-    GenericByteViewArray, OffsetSizeTrait, PrimitiveArray, StringViewArray,
+    Array, ArrowPrimitiveType, GenericByteArray, GenericByteViewArray, OffsetSizeTrait,
+    PrimitiveArray, StringViewArray, types::ByteArrayType,
 };
 use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer};
 use arrow::compute::SortOptions;
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index 720a3e53e4597..272816251daf9 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -20,13 +20,13 @@
 
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll, ready};
 
+use crate::RecordBatchStream;
 use crate::metrics::BaselineMetrics;
 use crate::sorts::builder::BatchBuilder;
 use crate::sorts::cursor::{Cursor, CursorValues};
 use crate::sorts::stream::PartitionedStream;
-use crate::RecordBatchStream;
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
diff --git a/datafusion/physical-plan/src/sorts/multi_level_merge.rs b/datafusion/physical-plan/src/sorts/multi_level_merge.rs
index 6e7a5e7a72616..3540f1de3ed10 100644
--- a/datafusion/physical-plan/src/sorts/multi_level_merge.rs
+++ b/datafusion/physical-plan/src/sorts/multi_level_merge.rs
@@ -145,7 +145,7 @@ impl Debug for MultiLevelMergeBuilder {
 }
 
 impl MultiLevelMergeBuilder {
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub(crate) fn new(
         spill_manager: SpillManager,
         schema: SchemaRef,
@@ -290,7 +290,11 @@ impl MultiLevelMergeBuilder {
                 // If we're only merging memory streams, we don't need to attach the memory reservation
                 // as it's empty
                 if is_only_merging_memory_streams {
-                    assert_eq!(memory_reservation.size(), 0, "when only merging memory streams, we should not have any memory reservation and let the merge sort handle the memory");
+                    assert_eq!(
+                        memory_reservation.size(),
+                        0,
+                        "when only merging memory streams, we should not have any memory reservation and let the merge sort handle the memory"
+                    );
 
                     Ok(merge_sort_stream)
                 } else {
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 7a623b0c30d32..73ba889c9e40b 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -67,12 +67,12 @@ use crate::{
 use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::utils::evaluate_partition_ranges;
 use datafusion_common::Result;
+use datafusion_common::utils::evaluate_partition_ranges;
 use datafusion_execution::{RecordBatchStream, TaskContext};
 use datafusion_physical_expr::LexOrdering;
 
-use futures::{ready, Stream, StreamExt};
+use futures::{Stream, StreamExt, ready};
 use log::trace;
 
 /// Partial Sort execution plan.
@@ -220,9 +220,17 @@ impl DisplayAs for PartialSortExec {
                 let common_prefix_length = self.common_prefix_length;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr)
+                        write!(
+                            f,
+                            "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]",
+                            self.expr
+                        )
                     }
-                    None => write!(f, "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr),
+                    None => write!(
+                        f,
+                        "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]",
+                        self.expr
+                    ),
                 }
             }
             DisplayFormatType::TreeRender => match self.fetch {
@@ -291,7 +299,12 @@ impl ExecutionPlan for PartialSortExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start PartialSortExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start PartialSortExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
 
         let input = self.input.execute(partition, Arc::clone(&context))?;
 
@@ -484,13 +497,13 @@ mod tests {
     use itertools::Itertools;
 
     use crate::collect;
-    use crate::expressions::col;
     use crate::expressions::PhysicalSortExpr;
+    use crate::expressions::col;
     use crate::sorts::sort::SortExec;
     use crate::test;
-    use crate::test::assert_is_pending;
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::TestMemoryExec;
+    use crate::test::assert_is_pending;
+    use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero};
 
     use super::*;
 
@@ -536,18 +549,18 @@ mod tests {
 
         assert_eq!(2, result.len());
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&result), @r#"
-                +---+---+---+
-                | a | b | c |
-                +---+---+---+
-                | 0 | 1 | 0 |
-                | 0 | 1 | 1 |
-                | 0 | 2 | 5 |
-                | 1 | 2 | 4 |
-                | 1 | 3 | 2 |
-                | 1 | 3 | 3 |
-                +---+---+---+
-                "#);
+            assert_snapshot!(batches_to_string(&result), @r"
+            +---+---+---+
+            | a | b | c |
+            +---+---+---+
+            | 0 | 1 | 0 |
+            | 0 | 1 | 1 |
+            | 0 | 2 | 5 |
+            | 1 | 2 | 4 |
+            | 1 | 3 | 2 |
+            | 1 | 3 | 3 |
+            +---+---+---+
+            ");
         }
         assert_eq!(
             task_ctx.runtime_env().memory_pool.reserved(),
@@ -604,16 +617,16 @@ mod tests {
 
             assert_eq!(2, result.len());
             allow_duplicates! {
-                assert_snapshot!(batches_to_string(&result), @r#"
-                    +---+---+---+
-                    | a | b | c |
-                    +---+---+---+
-                    | 0 | 1 | 4 |
-                    | 0 | 2 | 3 |
-                    | 1 | 2 | 2 |
-                    | 1 | 3 | 0 |
-                    +---+---+---+
-                    "#);
+                assert_snapshot!(batches_to_string(&result), @r"
+                +---+---+---+
+                | a | b | c |
+                +---+---+---+
+                | 0 | 1 | 4 |
+                | 0 | 2 | 3 |
+                | 1 | 2 | 2 |
+                | 1 | 3 | 0 |
+                +---+---+---+
+                ");
             }
             assert_eq!(
                 task_ctx.runtime_env().memory_pool.reserved(),
@@ -680,20 +693,20 @@ mod tests {
                 "The sort should have returned all memory used back to the memory manager"
             );
             allow_duplicates! {
-                assert_snapshot!(batches_to_string(&result), @r#"
-                    +---+---+---+
-                    | a | b | c |
-                    +---+---+---+
-                    | 0 | 1 | 6 |
-                    | 0 | 1 | 7 |
-                    | 0 | 3 | 4 |
-                    | 0 | 3 | 5 |
-                    | 1 | 2 | 0 |
-                    | 1 | 2 | 1 |
-                    | 1 | 4 | 2 |
-                    | 1 | 4 | 3 |
-                    +---+---+---+
-                    "#);
+                assert_snapshot!(batches_to_string(&result), @r"
+                +---+---+---+
+                | a | b | c |
+                +---+---+---+
+                | 0 | 1 | 6 |
+                | 0 | 1 | 7 |
+                | 0 | 3 | 4 |
+                | 0 | 3 | 5 |
+                | 1 | 2 | 0 |
+                | 1 | 2 | 1 |
+                | 1 | 4 | 2 |
+                | 1 | 4 | 3 |
+                +---+---+---+
+                ");
             }
         }
         Ok(())
@@ -1038,20 +1051,20 @@ mod tests {
             task_ctx,
         )
         .await?;
-        assert_snapshot!(batches_to_string(&result), @r#"
-            +-----+------+-------+
-            | a   | b    | c     |
-            +-----+------+-------+
-            | 1.0 | 20.0 | 20.0  |
-            | 1.0 | 20.0 | 10.0  |
-            | 1.0 | 40.0 | 10.0  |
-            | 2.0 | 40.0 | 100.0 |
-            | 2.0 | NaN  | NaN   |
-            | 3.0 |      |       |
-            | 3.0 |      | 100.0 |
-            | 3.0 | NaN  | NaN   |
-            +-----+------+-------+
-            "#);
+        assert_snapshot!(batches_to_string(&result), @r"
+        +-----+------+-------+
+        | a   | b    | c     |
+        +-----+------+-------+
+        | 1.0 | 20.0 | 20.0  |
+        | 1.0 | 20.0 | 10.0  |
+        | 1.0 | 40.0 | 10.0  |
+        | 2.0 | 40.0 | 100.0 |
+        | 2.0 | NaN  | NaN   |
+        | 3.0 |      |       |
+        | 3.0 |      | 100.0 |
+        | 3.0 | NaN  | NaN   |
+        +-----+------+-------+
+        ");
         assert_eq!(result.len(), 2);
         let metrics = partial_sort_exec.metrics().unwrap();
         assert!(metrics.elapsed_compute().unwrap() > 0);
@@ -1164,21 +1177,21 @@ mod tests {
         assert_eq!(result.len(), 3,);
 
         allow_duplicates! {
-            assert_snapshot!(batches_to_string(&result), @r#"
-                +---+---+---+
-                | a | b | c |
-                +---+---+---+
-                | 1 | 1 | 1 |
-                | 1 | 1 | 2 |
-                | 1 | 1 | 3 |
-                | 2 | 2 | 4 |
-                | 2 | 2 | 4 |
-                | 2 | 2 | 6 |
-                | 3 | 3 | 7 |
-                | 3 | 3 | 8 |
-                | 3 | 3 | 9 |
-                +---+---+---+
-                "#);
+            assert_snapshot!(batches_to_string(&result), @r"
+            +---+---+---+
+            | a | b | c |
+            +---+---+---+
+            | 1 | 1 | 1 |
+            | 1 | 1 | 2 |
+            | 1 | 1 | 3 |
+            | 2 | 2 | 4 |
+            | 2 | 2 | 4 |
+            | 2 | 2 | 6 |
+            | 3 | 3 | 7 |
+            | 3 | 3 | 8 |
+            | 3 | 3 | 9 |
+            +---+---+---+
+            ");
         }
 
         assert_eq!(task_ctx.runtime_env().memory_pool.reserved(), 0,);
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index a95fad19f614b..18cdcbe9debcc 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -34,9 +34,10 @@ use crate::filter_pushdown::{
 };
 use crate::limit::LimitStream;
 use crate::metrics::{
-    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, SpillMetrics, SplitMetrics,
+    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput, SpillMetrics,
+    SplitMetrics,
 };
-use crate::projection::{make_with_child, update_ordering, ProjectionExec};
+use crate::projection::{ProjectionExec, make_with_child, update_ordering};
 use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder};
 use crate::spill::get_record_batch_memory_size;
 use crate::spill::in_progress_spill_file::InProgressSpillFile;
@@ -56,15 +57,15 @@ use arrow::compute::{concat_batches, lexsort_to_indices, take_arrays};
 use arrow::datatypes::SchemaRef;
 use datafusion_common::config::SpillCompression;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, unwrap_or_internal_err, DataFusionError,
-    Result,
+    DataFusionError, Result, assert_or_internal_err, internal_datafusion_err,
+    unwrap_or_internal_err,
 };
+use datafusion_execution::TaskContext;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_execution::TaskContext;
-use datafusion_physical_expr::expressions::{lit, DynamicFilterPhysicalExpr};
 use datafusion_physical_expr::LexOrdering;
 use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit};
 
 use futures::{StreamExt, TryStreamExt};
 use log::{debug, trace};
@@ -265,7 +266,7 @@ struct ExternalSorter {
 impl ExternalSorter {
     // TODO: make a builder or some other nicer API to avoid the
     // clippy warning
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     pub fn new(
         partition_id: usize,
         schema: SchemaRef,
@@ -424,9 +425,10 @@ impl ExternalSorter {
                 (*max_record_batch_size).max(batch.get_sliced_size()?);
         }
 
-        if !globally_sorted_batches.is_empty() {
-            return internal_err!("This function consumes globally_sorted_batches, so it should be empty after taking.");
-        }
+        assert_or_internal_err!(
+            globally_sorted_batches.is_empty(),
+            "This function consumes globally_sorted_batches, so it should be empty after taking."
+        );
 
         Ok(())
     }
@@ -517,11 +519,10 @@ impl ExternalSorter {
     /// Sorts the in-memory batches and merges them into a single sorted run, then writes
     /// the result to spill files.
     async fn sort_and_spill_in_mem_batches(&mut self) -> Result<()> {
-        if self.in_mem_batches.is_empty() {
-            return internal_err!(
-                "in_mem_batches must not be empty when attempting to sort and spill"
-            );
-        }
+        assert_or_internal_err!(
+            !self.in_mem_batches.is_empty(),
+            "in_mem_batches must not be empty when attempting to sort and spill"
+        );
 
         // Release the memory reserved for merge back to the pool so
         // there is some left when `in_mem_sort_stream` requests an
@@ -533,11 +534,10 @@ impl ExternalSorter {
             self.in_mem_sort_stream(self.metrics.baseline.intermediate())?;
         // After `in_mem_sort_stream()` is constructed, all `in_mem_batches` is taken
         // to construct a globally sorted stream.
-        if !self.in_mem_batches.is_empty() {
-            return internal_err!(
-                "in_mem_batches should be empty after constructing sorted stream"
-            );
-        }
+        assert_or_internal_err!(
+            self.in_mem_batches.is_empty(),
+            "in_mem_batches should be empty after constructing sorted stream"
+        );
         // 'global' here refers to all buffered batches when the memory limit is
         // reached. This variable will buffer the sorted batches after
         // sort-preserving merge and incrementally append to spill files.
@@ -569,11 +569,10 @@ impl ExternalSorter {
         // Sanity check after spilling
         let buffers_cleared_property =
             self.in_mem_batches.is_empty() && globally_sorted_batches.is_empty();
-        if !buffers_cleared_property {
-            return internal_err!(
-                "in_mem_batches and globally_sorted_batches should be cleared before"
-            );
-        }
+        assert_or_internal_err!(
+            buffers_cleared_property,
+            "in_mem_batches and globally_sorted_batches should be cleared before"
+        );
 
         // Reserve headroom for next sort/merge
         self.reserve_memory_for_merge()?;
@@ -738,7 +737,7 @@ impl ExternalSorter {
 
             let sorted = sort_batch(&batch, &expressions, None)?;
 
-            metrics.record_output(sorted.num_rows());
+            (&sorted).record_output(&metrics);
             drop(batch);
             drop(reservation);
             Ok(sorted)
@@ -1088,13 +1087,16 @@ impl DisplayAs for SortExec {
                 let preserve_partitioning = self.preserve_partitioning;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)?;
-                        if let Some(filter) = &self.filter {
-                            if let Ok(current) = filter.read().expr().current() {
-                                if !current.eq(&lit(true)) {
-                                    write!(f, ", filter=[{current}]")?;
-                                }
-                            }
+                        write!(
+                            f,
+                            "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]",
+                            self.expr
+                        )?;
+                        if let Some(filter) = &self.filter
+                            && let Ok(current) = filter.read().expr().current()
+                            && !current.eq(&lit(true))
+                        {
+                            write!(f, ", filter=[{current}]")?;
                         }
                         if !self.common_sort_prefix.is_empty() {
                             write!(f, ", sort_prefix=[")?;
@@ -1112,7 +1114,11 @@ impl DisplayAs for SortExec {
                             Ok(())
                         }
                     }
-                    None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr),
+                    None => write!(
+                        f,
+                        "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]",
+                        self.expr
+                    ),
                 }
             }
             DisplayFormatType::TreeRender => match self.fetch {
@@ -1204,7 +1210,12 @@ impl ExecutionPlan for SortExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start SortExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start SortExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
 
         let mut input = self.input.execute(partition, Arc::clone(&context))?;
 
@@ -1353,10 +1364,10 @@ impl ExecutionPlan for SortExec {
         let mut child =
             ChildFilterDescription::from_child(&parent_filters, self.input())?;
 
-        if let Some(filter) = &self.filter {
-            if config.optimizer.enable_topk_dynamic_filter_pushdown {
-                child = child.with_self_filter(filter.read().expr());
-            }
+        if let Some(filter) = &self.filter
+            && config.optimizer.enable_topk_dynamic_filter_pushdown
+        {
+            child = child.with_self_filter(filter.read().expr());
         }
 
         Ok(FilterDescription::new().with_child(child))
@@ -1375,8 +1386,8 @@ mod tests {
     use crate::execution_plan::Boundedness;
     use crate::expressions::col;
     use crate::test;
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::TestMemoryExec;
+    use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero};
     use crate::test::{assert_is_pending, make_partition};
 
     use arrow::array::*;
@@ -1385,11 +1396,11 @@ mod tests {
     use datafusion_common::cast::as_primitive_array;
     use datafusion_common::test_util::batches_to_string;
     use datafusion_common::{DataFusionError, Result, ScalarValue};
+    use datafusion_execution::RecordBatchStream;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-    use datafusion_execution::RecordBatchStream;
-    use datafusion_physical_expr::expressions::{Column, Literal};
     use datafusion_physical_expr::EquivalenceProperties;
+    use datafusion_physical_expr::expressions::{Column, Literal};
 
     use futures::{FutureExt, Stream};
     use insta::assert_snapshot;
@@ -2108,21 +2119,21 @@ mod tests {
         plan = plan.with_fetch(Some(9));
 
         let batches = collect(Arc::new(plan), task_ctx).await?;
-        assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+
-            | c1 |
-            +----+
-            | 0  |
-            | 1  |
-            | 2  |
-            | 3  |
-            | 4  |
-            | 5  |
-            | 6  |
-            | 7  |
-            | 8  |
-            +----+
-            "#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +----+
+        | c1 |
+        +----+
+        | 0  |
+        | 1  |
+        | 2  |
+        | 3  |
+        | 4  |
+        | 5  |
+        | 6  |
+        | 7  |
+        | 8  |
+        +----+
+        ");
         Ok(())
     }
 
@@ -2151,8 +2162,8 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn should_return_stream_with_batches_in_the_requested_size_when_sorting_in_place(
-    ) -> Result<()> {
+    async fn should_return_stream_with_batches_in_the_requested_size_when_sorting_in_place()
+    -> Result<()> {
         let batch_size = 100;
 
         let create_task_ctx = |_: &[RecordBatch]| {
@@ -2203,8 +2214,8 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn should_return_stream_with_batches_in_the_requested_size_when_having_a_single_batch(
-    ) -> Result<()> {
+    async fn should_return_stream_with_batches_in_the_requested_size_when_having_a_single_batch()
+    -> Result<()> {
         let batch_size = 100;
 
         let create_task_ctx = |_: &[RecordBatch]| {
@@ -2267,8 +2278,8 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn should_return_stream_with_batches_in_the_requested_size_when_having_to_spill(
-    ) -> Result<()> {
+    async fn should_return_stream_with_batches_in_the_requested_size_when_having_to_spill()
+    -> Result<()> {
         let batch_size = 100;
 
         let create_task_ctx = |generated_batches: &[RecordBatch]| {
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 3a94f156fa9b3..4b26f84099505 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -23,16 +23,16 @@ use std::sync::Arc;
 use crate::common::spawn_buffered;
 use crate::limit::LimitStream;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use crate::projection::{make_with_child, update_ordering, ProjectionExec};
+use crate::projection::{ProjectionExec, make_with_child, update_ordering};
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
     Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
-use datafusion_common::{internal_err, Result};
-use datafusion_execution::memory_pool::MemoryConsumer;
+use datafusion_common::{Result, assert_eq_or_internal_err, internal_err};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, OrderingRequirements};
 
 use crate::execution_plan::{EvaluationType, SchedulingType};
@@ -281,11 +281,11 @@ impl ExecutionPlan for SortPreservingMergeExec {
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         trace!("Start SortPreservingMergeExec::execute for partition: {partition}");
-        if 0 != partition {
-            return internal_err!(
-                "SortPreservingMergeExec invalid partition {partition}"
-            );
-        }
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "SortPreservingMergeExec invalid partition {partition}"
+        );
 
         let input_partitions = self.input.output_partitioning().partition_count();
         trace!(
@@ -304,7 +304,9 @@ impl ExecutionPlan for SortPreservingMergeExec {
             1 => match self.fetch {
                 Some(fetch) => {
                     let stream = self.input.execute(0, context)?;
-                    debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}");
+                    debug!(
+                        "Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}"
+                    );
                     Ok(Box::pin(LimitStream::new(
                         stream,
                         0,
@@ -314,7 +316,9 @@ impl ExecutionPlan for SortPreservingMergeExec {
                 }
                 None => {
                     let stream = self.input.execute(0, context);
-                    debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch");
+                    debug!(
+                        "Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch"
+                    );
                     stream
                 }
             },
@@ -327,7 +331,9 @@ impl ExecutionPlan for SortPreservingMergeExec {
                     })
                     .collect::<Result<_>>()?;
 
-                debug!("Done setting up sender-receiver for SortPreservingMergeExec::execute");
+                debug!(
+                    "Done setting up sender-receiver for SortPreservingMergeExec::execute"
+                );
 
                 let result = StreamingMergeBuilder::new()
                     .with_streams(receivers)
@@ -340,7 +346,9 @@ impl ExecutionPlan for SortPreservingMergeExec {
                     .with_round_robin_tie_breaker(self.enable_round_robin_repartition)
                     .build()?;
 
-                debug!("Got stream result from SortPreservingMergeStream::new_from_receivers");
+                debug!(
+                    "Got stream result from SortPreservingMergeStream::new_from_receivers"
+                );
 
                 Ok(result)
             }
@@ -396,7 +404,7 @@ mod tests {
     use std::fmt::Formatter;
     use std::pin::Pin;
     use std::sync::Mutex;
-    use std::task::{ready, Context, Poll, Waker};
+    use std::task::{Context, Poll, Waker, ready};
     use std::time::Duration;
 
     use super::*;
@@ -408,8 +416,8 @@ mod tests {
     use crate::repartition::RepartitionExec;
     use crate::sorts::sort::SortExec;
     use crate::stream::RecordBatchReceiverStream;
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::TestMemoryExec;
+    use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero};
     use crate::test::{self, assert_is_pending, make_partition};
     use crate::{collect, common};
 
@@ -422,11 +430,11 @@ mod tests {
     use datafusion_common::test_util::batches_to_string;
     use datafusion_common::{assert_batches_eq, exec_err};
     use datafusion_common_runtime::SpawnedTask;
+    use datafusion_execution::RecordBatchStream;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
-    use datafusion_execution::RecordBatchStream;
-    use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr::EquivalenceProperties;
+    use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
@@ -975,22 +983,22 @@ mod tests {
         let collected = collect(merge, task_ctx).await.unwrap();
         assert_eq!(collected.len(), 1);
 
-        assert_snapshot!(batches_to_string(collected.as_slice()), @r#"
-            +---+---+-------------------------------+
-            | a | b | c                             |
-            +---+---+-------------------------------+
-            | 1 |   | 1970-01-01T00:00:00.000000008 |
-            | 1 |   | 1970-01-01T00:00:00.000000008 |
-            | 2 | a |                               |
-            | 7 | b | 1970-01-01T00:00:00.000000006 |
-            | 2 | b |                               |
-            | 9 | d |                               |
-            | 3 | e | 1970-01-01T00:00:00.000000004 |
-            | 3 | g | 1970-01-01T00:00:00.000000005 |
-            | 4 | h |                               |
-            | 5 | i | 1970-01-01T00:00:00.000000004 |
-            +---+---+-------------------------------+
-            "#);
+        assert_snapshot!(batches_to_string(collected.as_slice()), @r"
+        +---+---+-------------------------------+
+        | a | b | c                             |
+        +---+---+-------------------------------+
+        | 1 |   | 1970-01-01T00:00:00.000000008 |
+        | 1 |   | 1970-01-01T00:00:00.000000008 |
+        | 2 | a |                               |
+        | 7 | b | 1970-01-01T00:00:00.000000006 |
+        | 2 | b |                               |
+        | 9 | d |                               |
+        | 3 | e | 1970-01-01T00:00:00.000000004 |
+        | 3 | g | 1970-01-01T00:00:00.000000005 |
+        | 4 | h |                               |
+        | 5 | i | 1970-01-01T00:00:00.000000004 |
+        +---+---+-------------------------------+
+        ");
     }
 
     #[tokio::test]
@@ -1016,14 +1024,14 @@ mod tests {
         let collected = collect(merge, task_ctx).await.unwrap();
         assert_eq!(collected.len(), 1);
 
-        assert_snapshot!(batches_to_string(collected.as_slice()), @r#"
-            +---+---+
-            | a | b |
-            +---+---+
-            | 1 | a |
-            | 2 | b |
-            +---+---+
-            "#);
+        assert_snapshot!(batches_to_string(collected.as_slice()), @r"
+        +---+---+
+        | a | b |
+        +---+---+
+        | 1 | a |
+        | 2 | b |
+        +---+---+
+        ");
     }
 
     #[tokio::test]
@@ -1048,17 +1056,17 @@ mod tests {
         let collected = collect(merge, task_ctx).await.unwrap();
         assert_eq!(collected.len(), 1);
 
-        assert_snapshot!(batches_to_string(collected.as_slice()), @r#"
-            +---+---+
-            | a | b |
-            +---+---+
-            | 1 | a |
-            | 2 | b |
-            | 7 | c |
-            | 9 | d |
-            | 3 | e |
-            +---+---+
-            "#);
+        assert_snapshot!(batches_to_string(collected.as_slice()), @r"
+        +---+---+
+        | a | b |
+        +---+---+
+        | 1 | a |
+        | 2 | b |
+        | 7 | c |
+        | 9 | d |
+        | 3 | e |
+        +---+---+
+        ");
     }
 
     #[tokio::test]
@@ -1157,16 +1165,16 @@ mod tests {
         let collected = collect(Arc::clone(&merge) as Arc<dyn ExecutionPlan>, task_ctx)
             .await
             .unwrap();
-        assert_snapshot!(batches_to_string(collected.as_slice()), @r#"
-            +----+---+
-            | a  | b |
-            +----+---+
-            | 1  | a |
-            | 10 | b |
-            | 2  | c |
-            | 20 | d |
-            +----+---+
-            "#);
+        assert_snapshot!(batches_to_string(collected.as_slice()), @r"
+        +----+---+
+        | a  | b |
+        +----+---+
+        | 1  | a |
+        | 10 | b |
+        | 2  | c |
+        | 20 | d |
+        +----+---+
+        ");
 
         // Now, validate metrics
         let metrics = merge.metrics().unwrap();
@@ -1272,32 +1280,32 @@ mod tests {
         // Expect the data to be sorted first by "batch_number" (because
         // that was the order it was fed in, even though only "value"
         // is in the sort key)
-        assert_snapshot!(batches_to_string(collected.as_slice()), @r#"
-                +--------------+-------+
-                | batch_number | value |
-                +--------------+-------+
-                | 0            | A     |
-                | 1            | A     |
-                | 2            | A     |
-                | 3            | A     |
-                | 4            | A     |
-                | 5            | A     |
-                | 6            | A     |
-                | 7            | A     |
-                | 8            | A     |
-                | 9            | A     |
-                | 0            | B     |
-                | 1            | B     |
-                | 2            | B     |
-                | 3            | B     |
-                | 4            | B     |
-                | 5            | B     |
-                | 6            | B     |
-                | 7            | B     |
-                | 8            | B     |
-                | 9            | B     |
-                +--------------+-------+
-            "#);
+        assert_snapshot!(batches_to_string(collected.as_slice()), @r"
+        +--------------+-------+
+        | batch_number | value |
+        +--------------+-------+
+        | 0            | A     |
+        | 1            | A     |
+        | 2            | A     |
+        | 3            | A     |
+        | 4            | A     |
+        | 5            | A     |
+        | 6            | A     |
+        | 7            | A     |
+        | 8            | A     |
+        | 9            | A     |
+        | 0            | B     |
+        | 1            | B     |
+        | 2            | B     |
+        | 3            | B     |
+        | 4            | B     |
+        | 5            | B     |
+        | 6            | B     |
+        | 7            | B     |
+        | 8            | B     |
+        | 9            | B     |
+        +--------------+-------+
+        ");
     }
 
     #[derive(Debug)]
diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
index 97dd1761b14cf..a510f44e4f4df 100644
--- a/datafusion/physical-plan/src/sorts/stream.rs
+++ b/datafusion/physical-plan/src/sorts/stream.rs
@@ -15,20 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::sorts::cursor::{ArrayValues, CursorArray, RowValues};
 use crate::SendableRecordBatchStream;
+use crate::sorts::cursor::{ArrayValues, CursorArray, RowValues};
 use crate::{PhysicalExpr, PhysicalSortExpr};
 use arrow::array::Array;
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
 use arrow::row::{RowConverter, Rows, SortField};
-use datafusion_common::{internal_datafusion_err, Result};
+use datafusion_common::{Result, internal_datafusion_err};
 use datafusion_execution::memory_pool::MemoryReservation;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use futures::stream::{Fuse, StreamExt};
 use std::marker::PhantomData;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll, ready};
 
 /// A [`Stream`](futures::Stream) that has multiple partitions that can
 /// be polled separately but not concurrently
@@ -98,8 +99,8 @@ impl ReusableRows {
         })
     }
     // save the Rows
-    fn save(&mut self, stream_idx: usize, rows: Arc<Rows>) {
-        self.inner[stream_idx][1] = Some(Arc::clone(&rows));
+    fn save(&mut self, stream_idx: usize, rows: &Arc<Rows>) {
+        self.inner[stream_idx][1] = Some(Arc::clone(rows));
         // swap the current with the previous one, so that the next poll can reuse the Rows from the previous poll
         let [a, b] = &mut self.inner[stream_idx];
         std::mem::swap(a, b);
@@ -164,11 +165,7 @@ impl RowCursorStream {
         batch: &RecordBatch,
         stream_idx: usize,
     ) -> Result<RowValues> {
-        let cols = self
-            .column_expressions
-            .iter()
-            .map(|expr| expr.evaluate(batch)?.into_array(batch.num_rows()))
-            .collect::<Result<Vec<_>>>()?;
+        let cols = evaluate_expressions_to_arrays(&self.column_expressions, batch)?;
 
         // At this point, ownership should of this Rows should be unique
         let mut rows = self.rows.take_next(stream_idx)?;
@@ -180,7 +177,7 @@ impl RowCursorStream {
 
         let rows = Arc::new(rows);
 
-        self.rows.save(stream_idx, Arc::clone(&rows));
+        self.rows.save(stream_idx, &rows);
 
         // track the memory in the newly created Rows.
         let mut rows_reservation = self.reservation.new_empty();
diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs
index 191b135753412..8129c3d8f695d 100644
--- a/datafusion/physical-plan/src/sorts/streaming_merge.rs
+++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs
@@ -27,11 +27,11 @@ use crate::sorts::{
 use crate::{SendableRecordBatchStream, SpillManager};
 use arrow::array::*;
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::{internal_err, Result};
+use datafusion_common::human_readable_size;
+use datafusion_common::{Result, assert_or_internal_err, internal_err};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::{
-    human_readable_size, MemoryConsumer, MemoryPool, MemoryReservation,
-    UnboundedMemoryPool,
+    MemoryConsumer, MemoryPool, MemoryReservation, UnboundedMemoryPool,
 };
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use std::sync::Arc;
@@ -213,11 +213,10 @@ impl<'a> StreamingMergeBuilder<'a> {
         }
 
         // Early return if streams are empty:
-        if streams.is_empty() {
-            return internal_err!(
-                "Streams/sorted spill files cannot be empty for streaming merge"
-            );
-        }
+        assert_or_internal_err!(
+            !streams.is_empty(),
+            "Streams/sorted spill files cannot be empty for streaming merge"
+        );
 
         // Unwrapping mandatory fields
         let schema = schema.expect("Schema cannot be empty for streaming merge");
diff --git a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
index e7f354a73b4cd..d2acf4993b857 100644
--- a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
+++ b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
@@ -24,7 +24,7 @@ use arrow::array::RecordBatch;
 use datafusion_common::exec_datafusion_err;
 use datafusion_execution::disk_manager::RefCountedTempFile;
 
-use super::{spill_manager::SpillManager, IPCStreamWriter};
+use super::{IPCStreamWriter, spill_manager::SpillManager};
 
 /// Represents an in-progress spill file used for writing `RecordBatch`es to disk, created by `SpillManager`.
 /// Caller is able to use this struct to incrementally append in-memory batches to
diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs
index 58fd016a63dd7..78dea99ac820c 100644
--- a/datafusion/physical-plan/src/spill/mod.rs
+++ b/datafusion/physical-plan/src/spill/mod.rs
@@ -21,6 +21,8 @@ pub(crate) mod in_progress_spill_file;
 pub(crate) mod spill_manager;
 pub mod spill_pool;
 
+// Moved for refactor, re-export to keep the public API stable
+pub use datafusion_common::utils::memory::get_record_batch_memory_size;
 // Re-export SpillManager for doctests only (hidden from public docs)
 #[doc(hidden)]
 pub use spill_manager::SpillManager;
@@ -29,24 +31,23 @@ use std::fs::File;
 use std::io::BufReader;
 use std::path::{Path, PathBuf};
 use std::pin::Pin;
-use std::ptr::NonNull;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use arrow::array::{layout, ArrayData, BufferSpec};
+use arrow::array::{BufferSpec, layout};
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::ipc::{
+    MetadataVersion,
     reader::StreamReader,
     writer::{IpcWriteOptions, StreamWriter},
-    MetadataVersion,
 };
 use arrow::record_batch::RecordBatch;
 
 use datafusion_common::config::SpillCompression;
-use datafusion_common::{exec_datafusion_err, DataFusionError, HashSet, Result};
+use datafusion_common::{DataFusionError, Result, exec_datafusion_err};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::RecordBatchStream;
+use datafusion_execution::disk_manager::RefCountedTempFile;
 use futures::{FutureExt as _, Stream};
 use log::warn;
 
@@ -154,11 +155,11 @@ impl SpillReaderStream {
                                             + SPILL_BATCH_MEMORY_MARGIN
                                     {
                                         warn!(
-                                                "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes) \n\
+                                            "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes) \n\
                                                 by more than the allowed tolerance ({SPILL_BATCH_MEMORY_MARGIN} bytes).\n\
                                                 This likely indicates a bug in memory accounting during spilling.\n\
                                                 Please report this issue in https://github.com/apache/datafusion/issues/17340."
-                                            );
+                                        );
                                     }
                                 }
                                 self.state = SpillReaderStreamState::Waiting(reader);
@@ -227,6 +228,7 @@ impl RecordBatchStream for SpillReaderStream {
     since = "46.0.0",
     note = "This method is deprecated. Use `SpillManager::spill_record_batch_by_size` instead."
 )]
+#[expect(clippy::needless_pass_by_value)]
 pub fn spill_record_batch_by_size(
     batch: &RecordBatch,
     path: PathBuf,
@@ -249,74 +251,6 @@ pub fn spill_record_batch_by_size(
     Ok(())
 }
 
-/// Calculate total used memory of this batch.
-///
-/// This function is used to estimate the physical memory usage of the `RecordBatch`.
-/// It only counts the memory of large data `Buffer`s, and ignores metadata like
-/// types and pointers.
-/// The implementation will add up all unique `Buffer`'s memory
-/// size, due to:
-/// - The data pointer inside `Buffer` are memory regions returned by global memory
-///   allocator, those regions can't have overlap.
-/// - The actual used range of `ArrayRef`s inside `RecordBatch` can have overlap
-///   or reuse the same `Buffer`. For example: taking a slice from `Array`.
-///
-/// Example:
-/// For a `RecordBatch` with two columns: `col1` and `col2`, two columns are pointing
-/// to a sub-region of the same buffer.
-///
-/// {xxxxxxxxxxxxxxxxxxx} <--- buffer
-///       ^    ^  ^    ^
-///       |    |  |    |
-/// col1->{    }  |    |
-/// col2--------->{    }
-///
-/// In the above case, `get_record_batch_memory_size` will return the size of
-/// the buffer, instead of the sum of `col1` and `col2`'s actual memory size.
-///
-/// Note: Current `RecordBatch`.get_array_memory_size()` will double count the
-/// buffer memory size if multiple arrays within the batch are sharing the same
-/// `Buffer`. This method provides temporary fix until the issue is resolved:
-/// <https://github.com/apache/arrow-rs/issues/6439>
-pub fn get_record_batch_memory_size(batch: &RecordBatch) -> usize {
-    // Store pointers to `Buffer`'s start memory address (instead of actual
-    // used data region's pointer represented by current `Array`)
-    let mut counted_buffers: HashSet<NonNull<u8>> = HashSet::new();
-    let mut total_size = 0;
-
-    for array in batch.columns() {
-        let array_data = array.to_data();
-        count_array_data_memory_size(&array_data, &mut counted_buffers, &mut total_size);
-    }
-
-    total_size
-}
-
-/// Count the memory usage of `array_data` and its children recursively.
-fn count_array_data_memory_size(
-    array_data: &ArrayData,
-    counted_buffers: &mut HashSet<NonNull<u8>>,
-    total_size: &mut usize,
-) {
-    // Count memory usage for `array_data`
-    for buffer in array_data.buffers() {
-        if counted_buffers.insert(buffer.data_ptr()) {
-            *total_size += buffer.capacity();
-        } // Otherwise the buffer's memory is already counted
-    }
-
-    if let Some(null_buffer) = array_data.nulls() {
-        if counted_buffers.insert(null_buffer.inner().inner().data_ptr()) {
-            *total_size += null_buffer.inner().inner().capacity();
-        }
-    }
-
-    // Count all children `ArrayData` recursively
-    for child in array_data.child_data() {
-        count_array_data_memory_size(child, counted_buffers, total_size);
-    }
-}
-
 /// Write in Arrow IPC Stream format to a file.
 ///
 /// Stream format is used for spill because it supports dictionary replacement, and the random
@@ -414,9 +348,9 @@ mod tests {
     use crate::metrics::SpillMetrics;
     use crate::spill::spill_manager::SpillManager;
     use crate::test::build_table_i32;
-    use arrow::array::{ArrayRef, Float64Array, Int32Array, ListArray, StringArray};
+    use arrow::array::{ArrayRef, Int32Array, StringArray};
     use arrow::compute::cast;
-    use arrow::datatypes::{DataType, Field, Int32Type, Schema};
+    use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
     use datafusion_common::Result;
     use datafusion_execution::runtime_env::RuntimeEnv;
@@ -664,133 +598,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn test_get_record_batch_memory_size() {
-        // Create a simple record batch with two columns
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("ints", DataType::Int32, true),
-            Field::new("float64", DataType::Float64, false),
-        ]));
-
-        let int_array =
-            Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)]);
-        let float64_array = Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![Arc::new(int_array), Arc::new(float64_array)],
-        )
-        .unwrap();
-
-        let size = get_record_batch_memory_size(&batch);
-        assert_eq!(size, 60);
-    }
-
-    #[test]
-    fn test_get_record_batch_memory_size_with_null() {
-        // Create a simple record batch with two columns
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("ints", DataType::Int32, true),
-            Field::new("float64", DataType::Float64, false),
-        ]));
-
-        let int_array = Int32Array::from(vec![None, Some(2), Some(3)]);
-        let float64_array = Float64Array::from(vec![1.0, 2.0, 3.0]);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![Arc::new(int_array), Arc::new(float64_array)],
-        )
-        .unwrap();
-
-        let size = get_record_batch_memory_size(&batch);
-        assert_eq!(size, 100);
-    }
-
-    #[test]
-    fn test_get_record_batch_memory_size_empty() {
-        // Test with empty record batch
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "ints",
-            DataType::Int32,
-            false,
-        )]));
-
-        let int_array: Int32Array = Int32Array::from(vec![] as Vec<i32>);
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(int_array)]).unwrap();
-
-        let size = get_record_batch_memory_size(&batch);
-        assert_eq!(size, 0, "Empty batch should have 0 memory size");
-    }
-
-    #[test]
-    fn test_get_record_batch_memory_size_shared_buffer() {
-        // Test with slices that share the same underlying buffer
-        let original = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let slice1 = original.slice(0, 3);
-        let slice2 = original.slice(2, 3);
-
-        // `RecordBatch` with `original` array
-        // ----
-        let schema_origin = Arc::new(Schema::new(vec![Field::new(
-            "origin_col",
-            DataType::Int32,
-            false,
-        )]));
-        let batch_origin =
-            RecordBatch::try_new(schema_origin, vec![Arc::new(original)]).unwrap();
-
-        // `RecordBatch` with all columns are reference to `original` array
-        // ----
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("slice1", DataType::Int32, false),
-            Field::new("slice2", DataType::Int32, false),
-        ]));
-
-        let batch_sliced =
-            RecordBatch::try_new(schema, vec![Arc::new(slice1), Arc::new(slice2)])
-                .unwrap();
-
-        // Two sizes should all be only counting the buffer in `original` array
-        let size_origin = get_record_batch_memory_size(&batch_origin);
-        let size_sliced = get_record_batch_memory_size(&batch_sliced);
-
-        assert_eq!(size_origin, size_sliced);
-    }
-
-    #[test]
-    fn test_get_record_batch_memory_size_nested_array() {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new(
-                "nested_int",
-                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
-                false,
-            ),
-            Field::new(
-                "nested_int2",
-                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
-                false,
-            ),
-        ]));
-
-        let int_list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-            Some(vec![Some(1), Some(2), Some(3)]),
-        ]);
-
-        let int_list_array2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-            Some(vec![Some(4), Some(5), Some(6)]),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![Arc::new(int_list_array), Arc::new(int_list_array2)],
-        )
-        .unwrap();
-
-        let size = get_record_batch_memory_size(&batch);
-        assert_eq!(size, 8208);
-    }
-
     // ==== Spill manager tests ====
 
     #[test]
diff --git a/datafusion/physical-plan/src/spill/spill_manager.rs b/datafusion/physical-plan/src/spill/spill_manager.rs
index 6fd97a8e2e6a0..d4600673394b7 100644
--- a/datafusion/physical-plan/src/spill/spill_manager.rs
+++ b/datafusion/physical-plan/src/spill/spill_manager.rs
@@ -23,11 +23,11 @@ use arrow::record_batch::RecordBatch;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use std::sync::Arc;
 
-use datafusion_common::{config::SpillCompression, Result};
-use datafusion_execution::disk_manager::RefCountedTempFile;
+use datafusion_common::{Result, config::SpillCompression};
 use datafusion_execution::SendableRecordBatchStream;
+use datafusion_execution::disk_manager::RefCountedTempFile;
 
-use super::{in_progress_spill_file::InProgressSpillFile, SpillReaderStream};
+use super::{SpillReaderStream, in_progress_spill_file::InProgressSpillFile};
 use crate::coop::cooperative;
 use crate::{common::spawn_buffered, metrics::SpillMetrics};
 
diff --git a/datafusion/physical-plan/src/spill/spill_pool.rs b/datafusion/physical-plan/src/spill/spill_pool.rs
index bbe54ca45caa3..e3b547b5731f3 100644
--- a/datafusion/physical-plan/src/spill/spill_pool.rs
+++ b/datafusion/physical-plan/src/spill/spill_pool.rs
@@ -384,28 +384,33 @@ impl Drop for SpillPoolWriter {
 /// // Create channel with 1MB file size limit
 /// let (writer, mut reader) = spill_pool::channel(1024 * 1024, spill_manager);
 ///
-/// // Spawn writer task to produce batches
-/// let write_handle = tokio::spawn(async move {
+/// // Spawn writer and reader concurrently; writer wakes reader via wakers
+/// let writer_task = tokio::spawn(async move {
 ///     for i in 0..5 {
 ///         let array: ArrayRef = Arc::new(Int32Array::from(vec![i; 100]));
 ///         let batch = RecordBatch::try_new(schema.clone(), vec![array]).unwrap();
-///         writer.push_batch(&batch).unwrap();
+///         writer.push_batch(&batch)?;
 ///     }
-///     // Writer dropped here, finalizing current file
+///     // Explicitly drop writer to finalize the spill file and wake the reader
+///     drop(writer);
+///     datafusion_common::Result::<()>::Ok(())
 /// });
 ///
-/// // Reader consumes batches in FIFO order (can run concurrently with writer)
-/// let mut batches_read = 0;
-/// while let Some(result) = reader.next().await {
-///     let batch = result?;
-///     batches_read += 1;
-///     // Process batch...
-///     if batches_read == 5 {
-///         break; // Got all expected batches
+/// let reader_task = tokio::spawn(async move {
+///     let mut batches_read = 0;
+///     while let Some(result) = reader.next().await {
+///         let _batch = result?;
+///         batches_read += 1;
 ///     }
-/// }
+///     datafusion_common::Result::<usize>::Ok(batches_read)
+/// });
+///
+/// let (writer_res, reader_res) = tokio::join!(writer_task, reader_task);
+/// writer_res
+///     .map_err(|e| datafusion_common::DataFusionError::Execution(e.to_string()))??;
+/// let batches_read = reader_res
+///     .map_err(|e| datafusion_common::DataFusionError::Execution(e.to_string()))??;
 ///
-/// write_handle.await.unwrap();
 /// assert_eq!(batches_read, 5);
 /// # Ok(())
 /// # }
@@ -1173,6 +1178,9 @@ mod tests {
     async fn test_reader_catches_up_to_writer() -> Result<()> {
         let (writer, mut reader) = create_spill_channel(1024 * 1024);
 
+        let (reader_waiting_tx, reader_waiting_rx) = tokio::sync::oneshot::channel();
+        let (first_read_done_tx, first_read_done_rx) = tokio::sync::oneshot::channel();
+
         #[derive(Clone, Copy, Debug, PartialEq, Eq)]
         enum ReadWriteEvent {
             ReadStart,
@@ -1185,36 +1193,41 @@ mod tests {
         let reader_events = Arc::clone(&events);
         let reader_handle = SpawnedTask::spawn(async move {
             reader_events.lock().push(ReadWriteEvent::ReadStart);
+            reader_waiting_tx
+                .send(())
+                .expect("reader_waiting channel closed unexpectedly");
             let result = reader.next().await.unwrap().unwrap();
             reader_events
                 .lock()
                 .push(ReadWriteEvent::Read(result.num_rows()));
+            first_read_done_tx
+                .send(())
+                .expect("first_read_done channel closed unexpectedly");
             let result = reader.next().await.unwrap().unwrap();
             reader_events
                 .lock()
                 .push(ReadWriteEvent::Read(result.num_rows()));
         });
 
-        // Give reader time to start pending
-        tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+        // Wait until the reader is pending on the first batch
+        reader_waiting_rx
+            .await
+            .expect("reader should signal when waiting");
 
         // Now write a batch (should wake the reader)
         let batch = create_test_batch(0, 5);
         events.lock().push(ReadWriteEvent::Write(batch.num_rows()));
         writer.push_batch(&batch)?;
 
-        // Wait for the reader to process
-        let processed = async {
-            loop {
-                if events.lock().len() >= 3 {
-                    break;
-                }
-                tokio::time::sleep(std::time::Duration::from_micros(500)).await;
-            }
-        };
-        tokio::time::timeout(std::time::Duration::from_secs(1), processed)
+        // Wait for the reader to finish the first read before allowing the
+        // second write. This ensures deterministic ordering of events:
+        // 1. The reader starts and pends on the first `next()`
+        // 2. The first write wakes the reader
+        // 3. The reader processes the first batch and signals completion
+        // 4. The second write is issued, ensuring consistent event ordering
+        first_read_done_rx
             .await
-            .unwrap();
+            .expect("reader should signal when first read completes");
 
         // Write another batch
         let batch = create_test_batch(5, 10);
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index 480b723d0b151..8b2ea1006893e 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -29,7 +29,7 @@ use super::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream};
 use crate::displayable;
 
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_common_runtime::JoinSet;
 use datafusion_execution::TaskContext;
 
@@ -703,7 +703,7 @@ impl RecordBatchStream for BatchSplitStream {
 mod test {
     use super::*;
     use crate::test::exec::{
-        assert_strong_count_converges_to_zero, BlockingExec, MockExec, PanicExec,
+        BlockingExec, MockExec, PanicExec, assert_strong_count_converges_to_zero,
     };
 
     use arrow::datatypes::{DataType, Field, Schema};
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index f9a7feb9e726e..c8b8d95718cb8 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -23,18 +23,18 @@ use std::sync::Arc;
 
 use super::{DisplayAs, DisplayFormatType, PlanProperties};
 use crate::coop::make_cooperative;
-use crate::display::{display_orderings, ProjectSchemaDisplay};
+use crate::display::{ProjectSchemaDisplay, display_orderings};
 use crate::execution_plan::{Boundedness, EmissionType, SchedulingType};
 use crate::limit::LimitStream;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::projection::{
-    all_alias_free_columns, new_projections_for_columns, update_ordering, ProjectionExec,
+    ProjectionExec, all_alias_free_columns, new_projections_for_columns, update_ordering,
 };
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 
 use arrow::datatypes::{Schema, SchemaRef};
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 
@@ -346,7 +346,7 @@ mod test {
     use super::*;
     use crate::collect_partitioned;
     use crate::streaming::PartitionStream;
-    use crate::test::{make_partition, TestPartitionStream};
+    use crate::test::{TestPartitionStream, make_partition};
     use arrow::record_batch::RecordBatch;
 
     #[tokio::test]
diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs
index 349f9955b6914..c94b5a4131397 100644
--- a/datafusion/physical-plan/src/test.rs
+++ b/datafusion/physical-plan/src/test.rs
@@ -25,19 +25,19 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::Context;
 
+use crate::ExecutionPlan;
 use crate::common;
 use crate::execution_plan::{Boundedness, EmissionType};
 use crate::memory::MemoryStream;
 use crate::metrics::MetricsSet;
 use crate::stream::RecordBatchStreamAdapter;
 use crate::streaming::PartitionStream;
-use crate::ExecutionPlan;
 use crate::{DisplayAs, DisplayFormatType, PlanProperties};
 
 use arrow::array::{Array, ArrayRef, Int32Array, RecordBatch};
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::{
-    config::ConfigOptions, internal_err, project_schema, Result, Statistics,
+    Result, Statistics, assert_or_internal_err, config::ConfigOptions, project_schema,
 };
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr::equivalence::{
@@ -105,10 +105,10 @@ impl DisplayAs for TestMemoryExec {
                     .map_or(String::new(), |limit| format!(", fetch={limit}"));
                 if self.show_sizes {
                     write!(
-                                f,
-                                "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}",
-                                partition_sizes.len(),
-                            )
+                        f,
+                        "partitions={}, partition_sizes={partition_sizes:?}{limit}{output_ordering}{constraints}",
+                        partition_sizes.len(),
+                    )
                 } else {
                     write!(
                         f,
@@ -270,10 +270,9 @@ impl TestMemoryExec {
     }
 
     // Equivalent of `DataSourceExec::new`
-    pub fn update_cache(source: Arc<TestMemoryExec>) -> TestMemoryExec {
+    pub fn update_cache(source: &Arc<TestMemoryExec>) -> TestMemoryExec {
         let cache = source.compute_properties();
-        let source = &*source;
-        let mut source = source.clone();
+        let mut source = (**source).clone();
         source.cache = cache;
         source
     }
@@ -317,12 +316,11 @@ impl TestMemoryExec {
                     .map(|field| field.name() != col.name())
                     .unwrap_or(true)
             });
-        if let Some(col) = ambiguous_column {
-            return internal_err!(
-                "Column {:?} is not found in the original schema of the TestMemoryExec",
-                col
-            );
-        }
+        assert_or_internal_err!(
+            ambiguous_column.is_none(),
+            "Column {:?} is not found in the original schema of the TestMemoryExec",
+            ambiguous_column.as_ref().unwrap()
+        );
 
         // If there is a projection on the source, we also need to project orderings
         if let Some(projection) = &self.projection {
@@ -344,6 +342,7 @@ impl TestMemoryExec {
         }
 
         self.sort_information = sort_information;
+        self.cache = self.compute_properties();
         Ok(self)
     }
 
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index b720181b27fe0..4507cccba05a9 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -25,9 +25,9 @@ use std::{
 };
 
 use crate::{
-    common, execution_plan::Boundedness, DisplayAs, DisplayFormatType, ExecutionPlan,
-    Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
-    Statistics,
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream, Statistics, common,
+    execution_plan::Boundedness,
 };
 use crate::{
     execution_plan::EmissionType,
@@ -36,7 +36,7 @@ use crate::{
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
@@ -522,8 +522,8 @@ pub struct StatisticsExec {
 impl StatisticsExec {
     pub fn new(stats: Statistics, schema: Schema) -> Self {
         assert_eq!(
-            stats
-                .column_statistics.len(), schema.fields().len(),
+            stats.column_statistics.len(),
+            schema.fields().len(),
             "if defined, the column statistics vector length should be the number of fields"
         );
         let cache = Self::compute_properties(Arc::new(schema.clone()));
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index 9435de1cc4488..ebac497f4fbc3 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -19,29 +19,31 @@
 
 use arrow::{
     array::{Array, AsArray},
-    compute::{interleave_record_batch, prep_null_mask_filter, FilterBuilder},
+    compute::{FilterBuilder, interleave_record_batch, prep_null_mask_filter},
     row::{RowConverter, Rows, SortField},
 };
 use datafusion_expr::{ColumnarValue, Operator};
 use std::mem::size_of;
 use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
 
-use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder};
+use super::metrics::{
+    BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, RecordOutput,
+};
 use crate::spill::get_record_batch_memory_size;
-use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
+use crate::{SendableRecordBatchStream, stream::RecordBatchStreamAdapter};
 
 use arrow::array::{ArrayRef, RecordBatch};
 use arrow::datatypes::SchemaRef;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, HashMap, Result, ScalarValue,
+    HashMap, Result, ScalarValue, internal_datafusion_err, internal_err,
 };
 use datafusion_execution::{
     memory_pool::{MemoryConsumer, MemoryReservation},
     runtime_env::RuntimeEnv,
 };
 use datafusion_physical_expr::{
-    expressions::{is_not_null, is_null, lit, BinaryExpr, DynamicFilterPhysicalExpr},
     PhysicalExpr,
+    expressions::{BinaryExpr, DynamicFilterPhysicalExpr, is_not_null, is_null, lit},
 };
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use parking_lot::RwLock;
@@ -176,7 +178,8 @@ impl TopK {
     /// Create a new [`TopK`] that stores the top `k` values, as
     /// defined by the sort expressions in `expr`.
     // TODO: make a builder or some other nicer API
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
+    #[expect(clippy::needless_pass_by_value)]
     pub fn try_new(
         partition_id: usize,
         schema: SchemaRef,
@@ -224,6 +227,7 @@ impl TopK {
 
     /// Insert `batch`, remembering if any of its values are among
     /// the top k seen so far.
+    #[expect(clippy::needless_pass_by_value)]
     pub fn insert_batch(&mut self, batch: RecordBatch) -> Result<()> {
         // Updates on drop
         let baseline = self.metrics.baseline.clone();
@@ -375,7 +379,7 @@ impl TopK {
         };
 
         // Build the filter expression OUTSIDE any synchronization
-        let predicate = Self::build_filter_expression(&self.expr, thresholds)?;
+        let predicate = Self::build_filter_expression(&self.expr, &thresholds)?;
         let new_threshold = new_threshold_row.to_vec();
 
         // update the threshold. Since there was a lock gap, we must check if it is still the best
@@ -405,10 +409,10 @@ impl TopK {
         };
 
         // Update the filter expression
-        if let Some(pred) = predicate {
-            if !pred.eq(&lit(true)) {
-                filter.expr.update(pred)?;
-            }
+        if let Some(pred) = predicate
+            && !pred.eq(&lit(true))
+        {
+            filter.expr.update(pred)?;
         }
 
         Ok(())
@@ -418,7 +422,7 @@ impl TopK {
     /// This is now called outside of any locks to reduce critical section time.
     fn build_filter_expression(
         sort_exprs: &[PhysicalSortExpr],
-        thresholds: Vec<ScalarValue>,
+        thresholds: &[ScalarValue],
     ) -> Result<Option<Arc<dyn PhysicalExpr>>> {
         // Create filter expressions for each threshold
         let mut filters: Vec<Arc<dyn PhysicalExpr>> =
@@ -589,14 +593,17 @@ impl TopK {
             common_sort_prefix_converter: _,
             common_sort_prefix: _,
             finished: _,
-            filter: _,
+            filter,
         } = self;
         let _timer = metrics.baseline.elapsed_compute().timer(); // time updated on drop
 
+        // Mark the dynamic filter as complete now that TopK processing is finished.
+        filter.read().expr().mark_complete();
+
         // break into record batches as needed
         let mut batches = vec![];
         if let Some(mut batch) = heap.emit()? {
-            metrics.baseline.output_rows().add(batch.num_rows());
+            (&batch).record_output(&metrics.baseline);
 
             loop {
                 if batch.num_rows() <= batch_size {
@@ -863,7 +870,7 @@ impl TopKHeap {
                     ScalarValue::try_from_array(&array, 0)?
                 }
                 array => {
-                    return internal_err!("Expected a scalar value, got {:?}", array)
+                    return internal_err!("Expected a scalar value, got {:?}", array);
                 }
             };
 
@@ -1196,4 +1203,52 @@ mod tests {
 
         Ok(())
     }
+
+    /// This test verifies that the dynamic filter is marked as complete after TopK processing finishes.
+    #[tokio::test]
+    async fn test_topk_marks_filter_complete() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+
+        let sort_expr = PhysicalSortExpr {
+            expr: col("a", schema.as_ref())?,
+            options: SortOptions::default(),
+        };
+
+        let full_expr = LexOrdering::from([sort_expr.clone()]);
+        let prefix = vec![sort_expr];
+
+        // Create a dummy runtime environment and metrics
+        let runtime = Arc::new(RuntimeEnv::default());
+        let metrics = ExecutionPlanMetricsSet::new();
+
+        // Create a dynamic filter that we'll check for completion
+        let dynamic_filter = Arc::new(DynamicFilterPhysicalExpr::new(vec![], lit(true)));
+        let dynamic_filter_clone = Arc::clone(&dynamic_filter);
+
+        // Create a TopK instance
+        let mut topk = TopK::try_new(
+            0,
+            Arc::clone(&schema),
+            prefix,
+            full_expr,
+            2,
+            10,
+            runtime,
+            &metrics,
+            Arc::new(RwLock::new(TopKDynamicFilters::new(dynamic_filter))),
+        )?;
+
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(3), Some(1), Some(2)]));
+        let batch = RecordBatch::try_new(Arc::clone(&schema), vec![array])?;
+        topk.insert_batch(batch)?;
+
+        // Call emit to finish TopK processing
+        let _results: Vec<_> = topk.emit()?.try_collect().await?;
+
+        // After emit is called, the dynamic filter should be marked as complete
+        // wait_complete() should return immediately
+        dynamic_filter_clone.wait_complete().await;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs
index 85d7b33575ca2..aa4f144f91898 100644
--- a/datafusion/physical-plan/src/tree_node.rs
+++ b/datafusion/physical-plan/src/tree_node.rs
@@ -20,10 +20,10 @@
 use std::fmt::{self, Display, Formatter};
 use std::sync::Arc;
 
-use crate::{displayable, with_new_children_if_necessary, ExecutionPlan};
+use crate::{ExecutionPlan, displayable, with_new_children_if_necessary};
 
-use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode};
 use datafusion_common::Result;
+use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode};
 
 impl DynTreeNode for dyn ExecutionPlan {
     fn arc_children(&self) -> Vec<&Arc<Self>> {
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index c95678dac9cdd..d27c81b968490 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -27,27 +27,29 @@ use std::task::{Context, Poll};
 use std::{any::Any, sync::Arc};
 
 use super::{
-    metrics::{ExecutionPlanMetricsSet, MetricsSet},
     ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan,
     ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
+    metrics::{ExecutionPlanMetricsSet, MetricsSet},
 };
 use crate::execution_plan::{
-    boundedness_from_children, check_default_invariants, emission_type_from_children,
-    InvariantLevel,
+    InvariantLevel, boundedness_from_children, check_default_invariants,
+    emission_type_from_children,
 };
 use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase};
 use crate::metrics::BaselineMetrics;
-use crate::projection::{make_with_child, ProjectionExec};
+use crate::projection::{ProjectionExec, make_with_child};
 use crate::stream::ObservedStream;
 
 use arrow::datatypes::{Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::stats::Precision;
-use datafusion_common::{exec_err, internal_datafusion_err, internal_err, Result};
+use datafusion_common::{
+    Result, assert_or_internal_err, exec_err, internal_datafusion_err,
+};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{calculate_union, EquivalenceProperties, PhysicalExpr};
+use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr, calculate_union};
 
 use futures::Stream;
 use itertools::Itertools;
@@ -265,7 +267,12 @@ impl ExecutionPlan for UnionExec {
         mut partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start UnionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start UnionExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
         let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
         // record the tiny amount of work done in this function so
         // elapsed_compute is reported as non zero
@@ -410,11 +417,10 @@ pub struct InterleaveExec {
 impl InterleaveExec {
     /// Create a new InterleaveExec
     pub fn try_new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Result<Self> {
-        if !can_interleave(inputs.iter()) {
-            return internal_err!(
-                "Not all InterleaveExec children have a consistent hash partitioning"
-            );
-        }
+        assert_or_internal_err!(
+            can_interleave(inputs.iter()),
+            "Not all InterleaveExec children have a consistent hash partitioning"
+        );
         let cache = Self::compute_properties(&inputs)?;
         Ok(InterleaveExec {
             inputs,
@@ -485,11 +491,10 @@ impl ExecutionPlan for InterleaveExec {
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         // New children are no longer interleavable, which might be a bug of optimization rewrite.
-        if !can_interleave(children.iter()) {
-            return internal_err!(
-                "Can not create InterleaveExec: new children can not be interleaved"
-            );
-        }
+        assert_or_internal_err!(
+            can_interleave(children.iter()),
+            "Can not create InterleaveExec: new children can not be interleaved"
+        );
         Ok(Arc::new(InterleaveExec::try_new(children)?))
     }
 
@@ -498,7 +503,12 @@ impl ExecutionPlan for InterleaveExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        trace!("Start InterleaveExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
+        trace!(
+            "Start InterleaveExec::execute for partition {} of context session_id {} and task_id {:?}",
+            partition,
+            context.session_id(),
+            context.task_id()
+        );
         let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
         // record the tiny amount of work done in this function so
         // elapsed_compute is reported as non zero
@@ -591,7 +601,8 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> Result<SchemaRef> {
             let base_field = first_schema.field(i).clone();
 
             // Coerce metadata and nullability across all inputs
-            let merged_field = inputs
+
+            inputs
                 .iter()
                 .enumerate()
                 .map(|(input_idx, input)| {
@@ -613,9 +624,7 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> Result<SchemaRef> {
                 // We can unwrap this because if inputs was empty, this would've already panic'ed when we
                 // indexed into inputs[0].
                 .unwrap()
-                .with_name(base_field.name());
-
-            merged_field
+                .with_name(base_field.name())
         })
         .collect::<Vec<_>>();
 
@@ -699,7 +708,7 @@ impl Stream for CombinedRecordBatchStream {
 
 fn col_stats_union(
     mut left: ColumnStatistics,
-    right: ColumnStatistics,
+    right: &ColumnStatistics,
 ) -> ColumnStatistics {
     left.distinct_count = Precision::Absent;
     left.min_value = left.min_value.min(&right.min_value);
@@ -711,12 +720,18 @@ fn col_stats_union(
 }
 
 fn stats_union(mut left: Statistics, right: Statistics) -> Statistics {
-    left.num_rows = left.num_rows.add(&right.num_rows);
-    left.total_byte_size = left.total_byte_size.add(&right.total_byte_size);
+    let Statistics {
+        num_rows: right_num_rows,
+        total_byte_size: right_total_bytes,
+        column_statistics: right_column_statistics,
+        ..
+    } = right;
+    left.num_rows = left.num_rows.add(&right_num_rows);
+    left.total_byte_size = left.total_byte_size.add(&right_total_bytes);
     left.column_statistics = left
         .column_statistics
         .into_iter()
-        .zip(right.column_statistics)
+        .zip(right_column_statistics.iter())
         .map(|(a, b)| col_stats_union(a, b))
         .collect::<Vec<_>>();
     left
@@ -785,6 +800,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(0),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Exact(1),
@@ -792,6 +808,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Exact(3),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -799,6 +816,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
                     sum_value: Precision::Exact(ScalarValue::Float32(Some(42.0))),
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -813,6 +831,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(1))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
                     null_count: Precision::Exact(1),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -820,6 +839,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("b")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -827,6 +847,7 @@ mod tests {
                     min_value: Precision::Absent,
                     sum_value: Precision::Absent,
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -842,6 +863,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
                     sum_value: Precision::Exact(ScalarValue::Int64(Some(84))),
                     null_count: Precision::Exact(1),
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -849,6 +871,7 @@ mod tests {
                     min_value: Precision::Exact(ScalarValue::from("a")),
                     sum_value: Precision::Absent,
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
                 ColumnStatistics {
                     distinct_count: Precision::Absent,
@@ -856,6 +879,7 @@ mod tests {
                     min_value: Precision::Absent,
                     sum_value: Precision::Absent,
                     null_count: Precision::Absent,
+                    byte_size: Precision::Absent,
                 },
             ],
         };
@@ -926,14 +950,14 @@ mod tests {
             let first_orderings = convert_to_orderings(first_child_orderings);
             let second_orderings = convert_to_orderings(second_child_orderings);
             let union_expected_orderings = convert_to_orderings(union_orderings);
-            let child1 = Arc::new(TestMemoryExec::update_cache(Arc::new(
-                TestMemoryExec::try_new(&[], Arc::clone(&schema), None)?
-                    .try_with_sort_information(first_orderings)?,
-            )));
-            let child2 = Arc::new(TestMemoryExec::update_cache(Arc::new(
-                TestMemoryExec::try_new(&[], Arc::clone(&schema), None)?
-                    .try_with_sort_information(second_orderings)?,
-            )));
+            let child1_exec = TestMemoryExec::try_new(&[], Arc::clone(&schema), None)?
+                .try_with_sort_information(first_orderings)?;
+            let child1 = Arc::new(child1_exec);
+            let child1 = Arc::new(TestMemoryExec::update_cache(&child1));
+            let child2_exec = TestMemoryExec::try_new(&[], Arc::clone(&schema), None)?
+                .try_with_sort_information(second_orderings)?;
+            let child2 = Arc::new(child2_exec);
+            let child2 = Arc::new(TestMemoryExec::update_cache(&child2));
 
             let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema));
             union_expected_eq.add_orderings(union_expected_orderings);
@@ -967,20 +991,24 @@ mod tests {
     fn test_union_empty_inputs() {
         // Test that UnionExec::try_new fails with empty inputs
         let result = UnionExec::try_new(vec![]);
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("UnionExec requires at least one input"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("UnionExec requires at least one input")
+        );
     }
 
     #[test]
     fn test_union_schema_empty_inputs() {
         // Test that union_schema fails with empty inputs
         let result = union_schema(&[]);
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Cannot create union schema from empty inputs"));
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("Cannot create union schema from empty inputs")
+        );
     }
 
     #[test]
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 7212c764130e0..5fef754e80780 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -18,7 +18,7 @@
 //! Define a plan for unnesting values in columns that contain a list type.
 
 use std::cmp::{self, Ordering};
-use std::task::{ready, Poll};
+use std::task::{Poll, ready};
 use std::{any::Any, sync::Arc};
 
 use super::metrics::{
@@ -32,8 +32,8 @@ use crate::{
 };
 
 use arrow::array::{
-    new_null_array, Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray,
-    Int64Array, LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
+    Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray, Int64Array,
+    LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray, new_null_array,
 };
 use arrow::compute::kernels::length::length;
 use arrow::compute::kernels::zip::zip;
@@ -43,13 +43,13 @@ use arrow::record_batch::RecordBatch;
 use arrow_ord::cmp::lt;
 use async_trait::async_trait;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, Constraints, HashMap, HashSet, Result,
-    UnnestOptions,
+    Constraints, HashMap, HashSet, Result, UnnestOptions, exec_datafusion_err, exec_err,
+    internal_err,
 };
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::PhysicalExpr;
 use futures::{Stream, StreamExt};
 use log::trace;
 
@@ -90,7 +90,7 @@ impl UnnestExec {
             &input,
             &list_column_indices,
             &struct_column_indices,
-            Arc::clone(&schema),
+            &schema,
         )?;
 
         Ok(UnnestExec {
@@ -109,7 +109,7 @@ impl UnnestExec {
         input: &Arc<dyn ExecutionPlan>,
         list_column_indices: &[ListUnnest],
         struct_column_indices: &[usize],
-        schema: SchemaRef,
+        schema: &SchemaRef,
     ) -> Result<PlanProperties> {
         // Find out which indices are not unnested, such that they can be copied over from the input plan
         let input_schema = input.schema();
@@ -159,7 +159,7 @@ impl UnnestExec {
         // the unnest operation invalidates any global uniqueness or primary-key constraints.
         let input_eq_properties = input.equivalence_properties();
         let eq_properties = input_eq_properties
-            .project(&projection_mapping, Arc::clone(&schema))
+            .project(&projection_mapping, Arc::clone(schema))
             .with_constraints(Constraints::default());
 
         // Output partitioning must use the projection mapping
@@ -277,8 +277,6 @@ struct UnnestMetrics {
     input_batches: metrics::Count,
     /// Number of rows consumed
     input_rows: metrics::Count,
-    /// Number of batches produced
-    output_batches: metrics::Count,
 }
 
 impl UnnestMetrics {
@@ -288,14 +286,10 @@ impl UnnestMetrics {
 
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
-
         Self {
             baseline_metrics: BaselineMetrics::new(metrics, partition),
             input_batches,
             input_rows,
-            output_batches,
         }
     }
 }
@@ -361,7 +355,6 @@ impl UnnestStream {
                     let Some(result_batch) = result else {
                         continue;
                     };
-                    self.metrics.output_batches.add(1);
                     (&result_batch).record_output(&self.metrics.baseline_metrics);
 
                     // Empty record batches should not be emitted.
@@ -375,7 +368,7 @@ impl UnnestStream {
                         produced {} output batches containing {} rows in {}",
                         self.metrics.input_batches,
                         self.metrics.input_rows,
-                        self.metrics.output_batches,
+                        self.metrics.baseline_metrics.output_batches(),
                         self.metrics.baseline_metrics.output_rows(),
                         self.metrics.baseline_metrics.elapsed_compute(),
                     );
@@ -1206,32 +1199,32 @@ mod tests {
         .unwrap();
 
         assert_snapshot!(batches_to_string(&[ret]),
-        @r###"
-+---------------------------------+---------------------------------+---------------------------------+
-| col1_unnest_placeholder_depth_1 | col1_unnest_placeholder_depth_2 | col2_unnest_placeholder_depth_1 |
-+---------------------------------+---------------------------------+---------------------------------+
-| [1, 2, 3]                       | 1                               | a                               |
-|                                 | 2                               | b                               |
-| [4, 5]                          | 3                               |                                 |
-| [1, 2, 3]                       |                                 | a                               |
-|                                 |                                 | b                               |
-| [4, 5]                          |                                 |                                 |
-| [1, 2, 3]                       | 4                               | a                               |
-|                                 | 5                               | b                               |
-| [4, 5]                          |                                 |                                 |
-| [7, 8, 9, 10]                   | 7                               | c                               |
-|                                 | 8                               | d                               |
-| [11, 12, 13]                    | 9                               |                                 |
-|                                 | 10                              |                                 |
-| [7, 8, 9, 10]                   |                                 | c                               |
-|                                 |                                 | d                               |
-| [11, 12, 13]                    |                                 |                                 |
-| [7, 8, 9, 10]                   | 11                              | c                               |
-|                                 | 12                              | d                               |
-| [11, 12, 13]                    | 13                              |                                 |
-|                                 |                                 | e                               |
-+---------------------------------+---------------------------------+---------------------------------+
-        "###);
+        @r"
+        +---------------------------------+---------------------------------+---------------------------------+
+        | col1_unnest_placeholder_depth_1 | col1_unnest_placeholder_depth_2 | col2_unnest_placeholder_depth_1 |
+        +---------------------------------+---------------------------------+---------------------------------+
+        | [1, 2, 3]                       | 1                               | a                               |
+        |                                 | 2                               | b                               |
+        | [4, 5]                          | 3                               |                                 |
+        | [1, 2, 3]                       |                                 | a                               |
+        |                                 |                                 | b                               |
+        | [4, 5]                          |                                 |                                 |
+        | [1, 2, 3]                       | 4                               | a                               |
+        |                                 | 5                               | b                               |
+        | [4, 5]                          |                                 |                                 |
+        | [7, 8, 9, 10]                   | 7                               | c                               |
+        |                                 | 8                               | d                               |
+        | [11, 12, 13]                    | 9                               |                                 |
+        |                                 | 10                              |                                 |
+        | [7, 8, 9, 10]                   |                                 | c                               |
+        |                                 |                                 | d                               |
+        | [11, 12, 13]                    |                                 |                                 |
+        | [7, 8, 9, 10]                   | 11                              | c                               |
+        |                                 | 12                              | d                               |
+        | [11, 12, 13]                    | 13                              |                                 |
+        |                                 |                                 | e                               |
+        +---------------------------------+---------------------------------+---------------------------------+
+        ");
         Ok(())
     }
 
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index a76316369ec77..987a400ec369e 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -21,7 +21,7 @@
 //! infinite inputs.
 
 use std::any::Any;
-use std::cmp::{min, Ordering};
+use std::cmp::{Ordering, min};
 use std::collections::VecDeque;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -52,11 +52,11 @@ use datafusion_common::utils::{
     evaluate_partition_ranges, get_at_indices, get_row_at_idx,
 };
 use datafusion_common::{
-    arrow_datafusion_err, exec_datafusion_err, exec_err, DataFusionError, HashMap, Result,
+    HashMap, Result, arrow_datafusion_err, exec_datafusion_err, exec_err,
 };
 use datafusion_execution::TaskContext;
-use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
 use datafusion_expr::ColumnarValue;
+use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
 use datafusion_physical_expr::window::{
     PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
 };
@@ -67,7 +67,7 @@ use datafusion_physical_expr_common::sort_expr::{
 
 use ahash::RandomState;
 use futures::stream::Stream;
-use futures::{ready, StreamExt};
+use futures::{StreamExt, ready};
 use hashbrown::hash_table::HashTable;
 use indexmap::IndexMap;
 use log::debug;
@@ -175,7 +175,9 @@ impl BoundedWindowAggExec {
                 if self.window_expr()[0].partition_by().len()
                     != ordered_partition_by_indices.len()
                 {
-                    return exec_err!("All partition by columns should have an ordering in Sorted mode.");
+                    return exec_err!(
+                        "All partition by columns should have an ordering in Sorted mode."
+                    );
                 }
                 Box::new(SortedSearch {
                     partition_by_sort_keys,
@@ -627,23 +629,23 @@ impl PartitionSearcher for LinearSearch {
     fn mark_partition_end(&self, partition_buffers: &mut PartitionBatches) {
         // We should be in the `PartiallySorted` case, otherwise we can not
         // tell when we are at the end of a given partition.
-        if !self.ordered_partition_by_indices.is_empty() {
-            if let Some((last_row, _)) = partition_buffers.last() {
-                let last_sorted_cols = self
+        if !self.ordered_partition_by_indices.is_empty()
+            && let Some((last_row, _)) = partition_buffers.last()
+        {
+            let last_sorted_cols = self
+                .ordered_partition_by_indices
+                .iter()
+                .map(|idx| last_row[*idx].clone())
+                .collect::<Vec<_>>();
+            for (row, partition_batch_state) in partition_buffers.iter_mut() {
+                let sorted_cols = self
                     .ordered_partition_by_indices
                     .iter()
-                    .map(|idx| last_row[*idx].clone())
-                    .collect::<Vec<_>>();
-                for (row, partition_batch_state) in partition_buffers.iter_mut() {
-                    let sorted_cols = self
-                        .ordered_partition_by_indices
-                        .iter()
-                        .map(|idx| &row[*idx]);
-                    // All the partitions other than `last_sorted_cols` are done.
-                    // We are sure that we will no longer receive values for these
-                    // partitions (arrival of a new value would violate ordering).
-                    partition_batch_state.is_end = !sorted_cols.eq(&last_sorted_cols);
-                }
+                    .map(|idx| &row[*idx]);
+                // All the partitions other than `last_sorted_cols` are done.
+                // We are sure that we will no longer receive values for these
+                // partitions (arrival of a new value would violate ordering).
+                partition_batch_state.is_end = !sorted_cols.eq(&last_sorted_cols);
             }
         }
     }
@@ -1247,18 +1249,18 @@ mod tests {
     use crate::streaming::{PartitionStream, StreamingTableExec};
     use crate::test::TestMemoryExec;
     use crate::windows::{
-        create_udwf_window_expr, create_window_expr, BoundedWindowAggExec, InputOrderMode,
+        BoundedWindowAggExec, InputOrderMode, create_udwf_window_expr, create_window_expr,
     };
-    use crate::{displayable, execute_stream, ExecutionPlan};
+    use crate::{ExecutionPlan, displayable, execute_stream};
 
     use arrow::array::{
-        builder::{Int64Builder, UInt64Builder},
         RecordBatch,
+        builder::{Int64Builder, UInt64Builder},
     };
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::{exec_datafusion_err, Result, ScalarValue};
+    use datafusion_common::{Result, ScalarValue, exec_datafusion_err};
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::{
         RecordBatchStream, SendableRecordBatchStream, TaskContext,
@@ -1269,12 +1271,12 @@ mod tests {
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_window::nth_value::last_value_udwf;
     use datafusion_functions_window::nth_value::nth_value_udwf;
-    use datafusion_physical_expr::expressions::{col, Column, Literal};
+    use datafusion_physical_expr::expressions::{Column, Literal, col};
     use datafusion_physical_expr::window::StandardWindowExpr;
     use datafusion_physical_expr::{LexOrdering, PhysicalExpr};
 
     use futures::future::Shared;
-    use futures::{pin_mut, ready, FutureExt, Stream, StreamExt};
+    use futures::{FutureExt, Stream, StreamExt, pin_mut, ready};
     use insta::assert_snapshot;
     use itertools::Itertools;
     use tokio::time::timeout;
@@ -1474,20 +1476,6 @@ mod tests {
         Ok(results)
     }
 
-    /// Execute the [ExecutionPlan] and collect the results in memory
-    #[allow(dead_code)]
-    pub async fn collect_bonafide(
-        plan: Arc<dyn ExecutionPlan>,
-        context: Arc<TaskContext>,
-    ) -> Result<Vec<RecordBatch>> {
-        let stream = execute_stream(plan, context)?;
-        let mut results = vec![];
-
-        collect_stream(stream, &mut results).await?;
-
-        Ok(results)
-    }
-
     fn test_schema() -> SchemaRef {
         Arc::new(Schema::new(vec![
             Field::new("sn", DataType::UInt64, true),
@@ -1496,14 +1484,16 @@ mod tests {
     }
 
     fn schema_orders(schema: &SchemaRef) -> Result<Vec<LexOrdering>> {
-        let orderings = vec![[PhysicalSortExpr {
-            expr: col("sn", schema)?,
-            options: SortOptions {
-                descending: false,
-                nulls_first: false,
-            },
-        }]
-        .into()];
+        let orderings = vec![
+            [PhysicalSortExpr {
+                expr: col("sn", schema)?,
+                options: SortOptions {
+                    descending: false,
+                    nulls_first: false,
+                },
+            }]
+            .into(),
+        ];
         Ok(orderings)
     }
 
@@ -1700,21 +1690,21 @@ mod tests {
           DataSourceExec: partitions=1, partition_sizes=[3]
         "#);
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
-            +---+------+---------------+---------------+
-            | a | last | nth_value(-1) | nth_value(-2) |
-            +---+------+---------------+---------------+
-            | 1 | 1    | 1             |               |
-            | 2 | 2    | 2             | 1             |
-            | 3 | 3    | 3             | 2             |
-            | 1 | 1    | 1             | 3             |
-            | 2 | 2    | 2             | 1             |
-            | 3 | 3    | 3             | 2             |
-            | 1 | 1    | 1             | 3             |
-            | 2 | 2    | 2             | 1             |
-            | 3 | 3    | 3             | 2             |
-            +---+------+---------------+---------------+
-            "#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +---+------+---------------+---------------+
+        | a | last | nth_value(-1) | nth_value(-2) |
+        +---+------+---------------+---------------+
+        | 1 | 1    | 1             |               |
+        | 2 | 2    | 2             | 1             |
+        | 3 | 3    | 3             | 2             |
+        | 1 | 1    | 1             | 3             |
+        | 2 | 2    | 2             | 1             |
+        | 3 | 3    | 3             | 2             |
+        | 1 | 1    | 1             | 3             |
+        | 2 | 2    | 2             | 1             |
+        | 3 | 3    | 3             | 2             |
+        +---+------+---------------+---------------+
+        ");
         Ok(())
     }
 
@@ -1821,20 +1811,20 @@ mod tests {
         let task_ctx = task_context();
         let batches = collect_with_timeout(plan, task_ctx, timeout_duration).await?;
 
-        assert_snapshot!(batches_to_string(&batches), @r#"
-            +----+------+-------+
-            | sn | hash | col_2 |
-            +----+------+-------+
-            | 0  | 2    | 2     |
-            | 1  | 2    | 2     |
-            | 2  | 2    | 2     |
-            | 3  | 2    | 1     |
-            | 4  | 1    | 2     |
-            | 5  | 1    | 2     |
-            | 6  | 1    | 2     |
-            | 7  | 1    | 1     |
-            +----+------+-------+
-            "#);
+        assert_snapshot!(batches_to_string(&batches), @r"
+        +----+------+-------+
+        | sn | hash | col_2 |
+        +----+------+-------+
+        | 0  | 2    | 2     |
+        | 1  | 2    | 2     |
+        | 2  | 2    | 2     |
+        | 3  | 2    | 1     |
+        | 4  | 1    | 2     |
+        | 5  | 1    | 2     |
+        | 6  | 1    | 2     |
+        | 7  | 1    | 1     |
+        +----+------+-------+
+        ");
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index cd35325eb3d7a..d0e1eab099871 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -25,13 +25,13 @@ use std::borrow::Borrow;
 use std::sync::Arc;
 
 use crate::{
-    expressions::PhysicalSortExpr, ExecutionPlan, ExecutionPlanProperties,
-    InputOrderMode, PhysicalExpr,
+    ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
+    expressions::PhysicalSortExpr,
 };
 
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow_schema::{FieldRef, SortOptions};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::{
     LimitEffect, PartitionEvaluator, ReversedUDWF, SetMonotonicity, WindowFrame,
     WindowFunctionDefinition, WindowUDF,
@@ -88,7 +88,7 @@ pub fn schema_add_window_field(
 }
 
 /// Create a physical expression for window function
-#[allow(clippy::too_many_arguments)]
+#[expect(clippy::too_many_arguments)]
 pub fn create_window_expr(
     fun: &WindowFunctionDefinition,
     name: String,
@@ -389,11 +389,11 @@ pub(crate) fn window_equivalence_properties(
             let mut found = false;
             for sort_expr in sort_options.into_iter() {
                 candidate_ordering.push(sort_expr);
-                if let Some(lex) = LexOrdering::new(candidate_ordering.clone()) {
-                    if window_eq_properties.ordering_satisfy(lex)? {
-                        found = true;
-                        break;
-                    }
+                if let Some(lex) = LexOrdering::new(candidate_ordering.clone())
+                    && window_eq_properties.ordering_satisfy(lex)?
+                {
+                    found = true;
+                    break;
                 }
                 // This option didn't work, remove it and try the next one
                 candidate_ordering.pop();
@@ -407,10 +407,10 @@ pub(crate) fn window_equivalence_properties(
 
         // If we successfully built an ordering for all columns, use it
         // When there are no partition expressions, candidate_ordering will be empty and won't be added
-        if candidate_ordering.len() == partitioning_exprs.len() {
-            if let Some(lex) = LexOrdering::new(candidate_ordering) {
-                all_satisfied_lexs.push(lex);
-            }
+        if candidate_ordering.len() == partitioning_exprs.len()
+            && let Some(lex) = LexOrdering::new(candidate_ordering)
+        {
+            all_satisfied_lexs.push(lex);
         }
         // If there is a partitioning, and no possible ordering cannot satisfy
         // the input plan's orderings, then we cannot further introduce any
@@ -512,21 +512,21 @@ pub(crate) fn window_equivalence_properties(
                             let is_asc = !sort_expr.options.descending;
                             candidate_order.push(sort_expr);
 
-                            if let Some(lex) = LexOrdering::new(candidate_order.clone()) {
-                                if window_eq_properties.ordering_satisfy(lex)? {
-                                    if idx == 0 {
-                                        // The first column's ordering direction determines the overall
-                                        // monotonicity behavior of the window result.
-                                        // - If the aggregate has increasing set monotonicity (e.g., MAX, COUNT)
-                                        //   and the first arg is ascending, the window result is increasing
-                                        // - If the aggregate has decreasing set monotonicity (e.g., MIN)
-                                        //   and the first arg is ascending, the window result is also increasing
-                                        // This flag is used to determine the final window column ordering.
-                                        asc = is_asc;
-                                    }
-                                    found = true;
-                                    break;
+                            if let Some(lex) = LexOrdering::new(candidate_order.clone())
+                                && window_eq_properties.ordering_satisfy(lex)?
+                            {
+                                if idx == 0 {
+                                    // The first column's ordering direction determines the overall
+                                    // monotonicity behavior of the window result.
+                                    // - If the aggregate has increasing set monotonicity (e.g., MAX, COUNT)
+                                    //   and the first arg is ascending, the window result is increasing
+                                    // - If the aggregate has decreasing set monotonicity (e.g., MIN)
+                                    //   and the first arg is ascending, the window result is also increasing
+                                    // This flag is used to determine the final window column ordering.
+                                    asc = is_asc;
                                 }
+                                found = true;
+                                break;
                             }
                             // This option didn't work, remove it and try the next one
                             candidate_order.pop();
@@ -740,13 +740,13 @@ mod tests {
     use crate::expressions::col;
     use crate::streaming::StreamingTableExec;
     use crate::test::assert_is_pending;
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
+    use crate::test::exec::{BlockingExec, assert_strong_count_converges_to_zero};
 
+    use InputOrderMode::{Linear, PartiallySorted, Sorted};
     use arrow::compute::SortOptions;
     use arrow_schema::{DataType, Field};
     use datafusion_execution::TaskContext;
     use datafusion_functions_aggregate::count::count_udaf;
-    use InputOrderMode::{Linear, PartiallySorted, Sorted};
 
     use futures::FutureExt;
 
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 1b7cb9bb76e1b..d6d5f4fdd2a67 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -42,13 +42,13 @@ use arrow::error::ArrowError;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
 use datafusion_common::utils::{evaluate_partition_ranges, transpose};
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr_common::sort_expr::{
     OrderingRequirements, PhysicalSortExpr,
 };
 
-use futures::{ready, Stream, StreamExt};
+use futures::{Stream, StreamExt, ready};
 
 /// Window execution plan
 #[derive(Debug, Clone)]
@@ -82,7 +82,7 @@ impl WindowAggExec {
 
         let ordered_partition_by_indices =
             get_ordered_partition_by_indices(window_expr[0].partition_by(), &input)?;
-        let cache = Self::compute_properties(Arc::clone(&schema), &input, &window_expr)?;
+        let cache = Self::compute_properties(&schema, &input, &window_expr)?;
         Ok(Self {
             input,
             window_expr,
@@ -120,12 +120,12 @@ impl WindowAggExec {
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn compute_properties(
-        schema: SchemaRef,
+        schema: &SchemaRef,
         input: &Arc<dyn ExecutionPlan>,
         window_exprs: &[Arc<dyn WindowExpr>],
     ) -> Result<PlanProperties> {
         // Calculate equivalence properties:
-        let eq_properties = window_equivalence_properties(&schema, input, window_exprs)?;
+        let eq_properties = window_equivalence_properties(schema, input, window_exprs)?;
 
         // Get output partitioning:
         // Because we can have repartitioning using the partition keys this
@@ -337,9 +337,11 @@ impl WindowAggStream {
         ordered_partition_by_indices: Vec<usize>,
     ) -> Result<Self> {
         // In WindowAggExec all partition by columns should be ordered.
-        if window_expr[0].partition_by().len() != ordered_partition_by_indices.len() {
-            return internal_err!("All partition by columns should have an ordering");
-        }
+        assert_eq_or_internal_err!(
+            window_expr[0].partition_by().len(),
+            ordered_partition_by_indices.len(),
+            "All partition by columns should have an ordering"
+        );
         Ok(Self {
             schema,
             input,
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index 40a22f94b81f6..f1b9e3e88d123 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -31,16 +31,15 @@ use crate::{
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_datafusion_err, internal_err, Result};
-use datafusion_execution::memory_pool::MemoryReservation;
+use datafusion_common::{Result, assert_eq_or_internal_err, internal_datafusion_err};
 use datafusion_execution::TaskContext;
+use datafusion_execution::memory_pool::MemoryReservation;
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 
 /// A vector of record batches with a memory reservation.
 #[derive(Debug)]
 pub(super) struct ReservedBatches {
     batches: Vec<RecordBatch>,
-    #[allow(dead_code)]
     reservation: MemoryReservation,
 }
 
@@ -59,13 +58,15 @@ impl ReservedBatches {
 #[derive(Debug)]
 pub struct WorkTable {
     batches: Mutex<Option<ReservedBatches>>,
+    name: String,
 }
 
 impl WorkTable {
     /// Create a new work table.
-    pub(super) fn new() -> Self {
+    pub(super) fn new(name: String) -> Self {
         Self {
             batches: Mutex::new(None),
+            name,
         }
     }
 
@@ -101,6 +102,8 @@ pub struct WorkTableExec {
     name: String,
     /// The schema of the stream
     schema: SchemaRef,
+    /// Projection to apply to build the output stream from the recursion state
+    projection: Option<Vec<usize>>,
     /// The work table
     work_table: Arc<WorkTable>,
     /// Execution metrics
@@ -111,15 +114,23 @@ pub struct WorkTableExec {
 
 impl WorkTableExec {
     /// Create a new execution plan for a worktable exec.
-    pub fn new(name: String, schema: SchemaRef) -> Self {
+    pub fn new(
+        name: String,
+        mut schema: SchemaRef,
+        projection: Option<Vec<usize>>,
+    ) -> Result<Self> {
+        if let Some(projection) = &projection {
+            schema = Arc::new(schema.project(projection)?);
+        }
         let cache = Self::compute_properties(Arc::clone(&schema));
-        Self {
-            name,
+        Ok(Self {
+            name: name.clone(),
             schema,
+            projection,
+            work_table: Arc::new(WorkTable::new(name)),
             metrics: ExecutionPlanMetricsSet::new(),
-            work_table: Arc::new(WorkTable::new()),
             cache,
-        }
+        })
     }
 
     /// Ref to name
@@ -192,16 +203,27 @@ impl ExecutionPlan for WorkTableExec {
         _context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         // WorkTable streams must be the plan base.
-        if partition != 0 {
-            return internal_err!(
-                "WorkTableExec got an invalid partition {partition} (expected 0)"
-            );
+        assert_eq_or_internal_err!(
+            partition,
+            0,
+            "WorkTableExec got an invalid partition {partition} (expected 0)"
+        );
+        let ReservedBatches {
+            mut batches,
+            reservation,
+        } = self.work_table.take()?;
+        if let Some(projection) = &self.projection {
+            // We apply the projection
+            // TODO: it would be better to apply it as soon as possible and not only here
+            // TODO: an aggressive projection makes the memory reservation smaller, even if we do not edit it
+            batches = batches
+                .into_iter()
+                .map(|b| b.project(projection))
+                .collect::<Result<Vec<_>, _>>()?;
         }
-        let batch = self.work_table.take()?;
 
-        let stream =
-            MemoryStream::try_new(batch.batches, Arc::clone(&self.schema), None)?
-                .with_reservation(batch.reservation);
+        let stream = MemoryStream::try_new(batches, Arc::clone(&self.schema), None)?
+            .with_reservation(reservation);
         Ok(Box::pin(cooperative(stream)))
     }
 
@@ -231,9 +253,14 @@ impl ExecutionPlan for WorkTableExec {
         // Down-cast to the expected state type; propagate `None` on failure
         let work_table = state.downcast::<WorkTable>().ok()?;
 
+        if work_table.name != self.name {
+            return None; // Different table
+        }
+
         Some(Arc::new(Self {
             name: self.name.clone(),
             schema: Arc::clone(&self.schema),
+            projection: self.projection.clone(),
             metrics: ExecutionPlanMetricsSet::new(),
             work_table,
             cache: self.cache.clone(),
@@ -244,12 +271,14 @@ impl ExecutionPlan for WorkTableExec {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::{ArrayRef, Int32Array};
+    use arrow::array::{ArrayRef, Int16Array, Int32Array, Int64Array};
+    use arrow_schema::{DataType, Field, Schema};
     use datafusion_execution::memory_pool::{MemoryConsumer, UnboundedMemoryPool};
+    use futures::StreamExt;
 
     #[test]
     fn test_work_table() {
-        let work_table = WorkTable::new();
+        let work_table = WorkTable::new("test".into());
         // Can't take from empty work_table
         assert!(work_table.take().is_err());
 
@@ -278,4 +307,53 @@ mod tests {
         drop(memory_stream);
         assert_eq!(pool.reserved(), 0);
     }
+
+    #[tokio::test]
+    async fn test_work_table_exec() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int16, false),
+        ]));
+        let work_table_exec =
+            WorkTableExec::new("wt".into(), Arc::clone(&schema), Some(vec![2, 1]))
+                .unwrap();
+
+        // We inject the work table
+        let work_table = Arc::new(WorkTable::new("wt".into()));
+        let work_table_exec = work_table_exec
+            .with_new_state(Arc::clone(&work_table) as _)
+            .unwrap();
+
+        // We update the work table
+        let pool = Arc::new(UnboundedMemoryPool::default()) as _;
+        let reservation = MemoryConsumer::new("test_work_table").register(&pool);
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5])),
+                Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])),
+                Arc::new(Int16Array::from(vec![1, 2, 3, 4, 5])),
+            ],
+        )
+        .unwrap();
+        work_table.update(ReservedBatches::new(vec![batch], reservation));
+
+        // We get back the batch from the work table
+        let returned_batch = work_table_exec
+            .execute(0, Arc::new(TaskContext::default()))
+            .unwrap()
+            .next()
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(
+            returned_batch,
+            RecordBatch::try_from_iter(vec![
+                ("c", Arc::new(Int16Array::from(vec![1, 2, 3, 4, 5])) as _),
+                ("b", Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _),
+            ])
+            .unwrap()
+        );
+    }
 }
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index 267953556b166..08bb25bd715b9 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -461,12 +461,14 @@ message CsvOptions {
   bytes newlines_in_values = 16; // Indicates if newlines are supported in values
   bytes terminator = 17; // Optional terminator character as a byte
   bytes truncated_rows = 18; // Indicates if truncated rows are allowed
+  optional uint32 compression_level = 19; // Optional compression level
 }
 
 // Options controlling CSV format
 message JsonOptions {
   CompressionTypeVariant compression = 1; // Compression type
   optional uint64 schema_infer_max_rec = 2; // Optional max records for schema inference
+  optional uint32 compression_level = 3; // Optional compression level
 }
 
 message TableParquetOptions {
@@ -519,6 +521,7 @@ message ParquetOptions {
   bool skip_metadata = 3; // default = true
   bool pushdown_filters = 5; // default = false
   bool reorder_filters = 6; // default = false
+  bool force_filter_selections = 34; // default = false
   uint64 data_pagesize_limit = 7; // default = 1024 * 1024
   uint64 write_batch_size = 8; // default = 1024
   string writer_version = 9; // default = "1.0"
@@ -608,6 +611,8 @@ message Statistics {
   Precision num_rows = 1;
   Precision total_byte_size = 2;
   repeated ColumnStats column_stats = 3;
+  // total_rows was removed - field 4 is reserved
+  reserved 4;
 }
 
 message ColumnStats {
@@ -616,4 +621,5 @@ message ColumnStats {
   Precision sum_value = 5;
   Precision null_count = 3;
   Precision distinct_count = 4;
+  Precision byte_size = 6;
 }
diff --git a/datafusion/proto-common/src/common.rs b/datafusion/proto-common/src/common.rs
index 9af63e3b07365..d5046aee2e2c7 100644
--- a/datafusion/proto-common/src/common.rs
+++ b/datafusion/proto-common/src/common.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{internal_datafusion_err, DataFusionError};
+use datafusion_common::{DataFusionError, internal_datafusion_err};
 
 /// Return a `DataFusionError::Internal` with the given message
 pub fn proto_error<S: Into<String>>(message: S) -> DataFusionError {
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 4ede5b970eaeb..7cb7a92031427 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -25,12 +25,14 @@ use arrow::array::{ArrayRef, AsArray};
 use arrow::buffer::Buffer;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::{
-    i256, DataType, Field, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
-    Schema, TimeUnit, UnionFields, UnionMode,
+    DataType, Field, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, Schema,
+    TimeUnit, UnionFields, UnionMode, i256,
 };
 use arrow::ipc::{reader::read_record_batch, root_as_message};
 
 use datafusion_common::{
+    Column, ColumnStatistics, Constraint, Constraints, DFSchema, DFSchemaRef,
+    DataFusionError, JoinSide, ScalarValue, Statistics, TableReference,
     arrow_datafusion_err,
     config::{
         CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
@@ -40,8 +42,6 @@ use datafusion_common::{
     parsers::CompressionTypeVariant,
     plan_datafusion_err,
     stats::Precision,
-    Column, ColumnStatistics, Constraint, Constraints, DFSchema, DFSchemaRef,
-    DataFusionError, JoinSide, ScalarValue, Statistics, TableReference,
 };
 
 #[derive(Debug)]
@@ -699,6 +699,11 @@ impl From<&protobuf::ColumnStats> for ColumnStatistics {
             } else {
                 Precision::Absent
             },
+            byte_size: if let Some(sbs) = &cs.byte_size {
+                sbs.clone().into()
+            } else {
+                Precision::Absent
+            },
         }
     }
 }
@@ -900,9 +905,10 @@ impl TryFrom<&protobuf::CsvOptions> for CsvOptions {
             quote: proto_opts.quote[0],
             terminator: proto_opts.terminator.first().copied(),
             escape: proto_opts.escape.first().copied(),
-            double_quote: proto_opts.has_header.first().map(|h| *h != 0),
+            double_quote: proto_opts.double_quote.first().map(|h| *h != 0),
             newlines_in_values: proto_opts.newlines_in_values.first().map(|h| *h != 0),
             compression: proto_opts.compression().into(),
+            compression_level: proto_opts.compression_level,
             schema_infer_max_rec: proto_opts.schema_infer_max_rec.map(|h| h as usize),
             date_format: (!proto_opts.date_format.is_empty())
                 .then(|| proto_opts.date_format.clone()),
@@ -930,7 +936,6 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
     fn try_from(
         value: &protobuf::ParquetOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
-        #[allow(deprecated)] // max_statistics_size
         Ok(ParquetOptions {
             enable_page_index: value.enable_page_index,
             pruning: value.pruning,
@@ -943,6 +948,7 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
                 .unwrap_or(None),
             pushdown_filters: value.pushdown_filters,
             reorder_filters: value.reorder_filters,
+            force_filter_selections: value.force_filter_selections,
             data_pagesize_limit: value.data_pagesize_limit as usize,
             write_batch_size: value.write_batch_size as usize,
             writer_version: value.writer_version.clone(),
@@ -1014,7 +1020,6 @@ impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions {
     fn try_from(
         value: &protobuf::ParquetColumnOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
-        #[allow(deprecated)] // max_statistics_size
         Ok(ParquetColumnOptions {
             compression: value.compression_opt.clone().map(|opt| match opt {
                 protobuf::parquet_column_options::CompressionOpt::Compression(v) => Some(v),
@@ -1091,6 +1096,7 @@ impl TryFrom<&protobuf::JsonOptions> for JsonOptions {
         let compression: protobuf::CompressionTypeVariant = proto_opts.compression();
         Ok(JsonOptions {
             compression: compression.into(),
+            compression_level: proto_opts.compression_level,
             schema_infer_max_rec: proto_opts.schema_infer_max_rec.map(|h| h as usize),
         })
     }
diff --git a/datafusion/proto-common/src/generated/mod.rs b/datafusion/proto-common/src/generated/mod.rs
index 24a062e4cad59..08cd75b622db3 100644
--- a/datafusion/proto-common/src/generated/mod.rs
+++ b/datafusion/proto-common/src/generated/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#[allow(clippy::allow_attributes)]
 #[allow(clippy::all)]
 #[rustfmt::skip]
 pub mod datafusion_proto_common {
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index e63f345459b8f..d38cf86825d46 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -1091,6 +1091,9 @@ impl serde::Serialize for ColumnStats {
         if self.distinct_count.is_some() {
             len += 1;
         }
+        if self.byte_size.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.ColumnStats", len)?;
         if let Some(v) = self.min_value.as_ref() {
             struct_ser.serialize_field("minValue", v)?;
@@ -1107,6 +1110,9 @@ impl serde::Serialize for ColumnStats {
         if let Some(v) = self.distinct_count.as_ref() {
             struct_ser.serialize_field("distinctCount", v)?;
         }
+        if let Some(v) = self.byte_size.as_ref() {
+            struct_ser.serialize_field("byteSize", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -1127,6 +1133,8 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
             "nullCount",
             "distinct_count",
             "distinctCount",
+            "byte_size",
+            "byteSize",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -1136,6 +1144,7 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
             SumValue,
             NullCount,
             DistinctCount,
+            ByteSize,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -1162,6 +1171,7 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
                             "sumValue" | "sum_value" => Ok(GeneratedField::SumValue),
                             "nullCount" | "null_count" => Ok(GeneratedField::NullCount),
                             "distinctCount" | "distinct_count" => Ok(GeneratedField::DistinctCount),
+                            "byteSize" | "byte_size" => Ok(GeneratedField::ByteSize),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -1186,6 +1196,7 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
                 let mut sum_value__ = None;
                 let mut null_count__ = None;
                 let mut distinct_count__ = None;
+                let mut byte_size__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::MinValue => {
@@ -1218,6 +1229,12 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
                             }
                             distinct_count__ = map_.next_value()?;
                         }
+                        GeneratedField::ByteSize => {
+                            if byte_size__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("byteSize"));
+                            }
+                            byte_size__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(ColumnStats {
@@ -1226,6 +1243,7 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
                     sum_value: sum_value__,
                     null_count: null_count__,
                     distinct_count: distinct_count__,
+                    byte_size: byte_size__,
                 })
             }
         }
@@ -1666,6 +1684,9 @@ impl serde::Serialize for CsvOptions {
         if !self.truncated_rows.is_empty() {
             len += 1;
         }
+        if self.compression_level.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.CsvOptions", len)?;
         if !self.has_header.is_empty() {
             #[allow(clippy::needless_borrow)]
@@ -1743,6 +1764,9 @@ impl serde::Serialize for CsvOptions {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("truncatedRows", pbjson::private::base64::encode(&self.truncated_rows).as_str())?;
         }
+        if let Some(v) = self.compression_level.as_ref() {
+            struct_ser.serialize_field("compressionLevel", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -1783,6 +1807,8 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
             "terminator",
             "truncated_rows",
             "truncatedRows",
+            "compression_level",
+            "compressionLevel",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -1805,6 +1831,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
             NewlinesInValues,
             Terminator,
             TruncatedRows,
+            CompressionLevel,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -1844,6 +1871,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                             "newlinesInValues" | "newlines_in_values" => Ok(GeneratedField::NewlinesInValues),
                             "terminator" => Ok(GeneratedField::Terminator),
                             "truncatedRows" | "truncated_rows" => Ok(GeneratedField::TruncatedRows),
+                            "compressionLevel" | "compression_level" => Ok(GeneratedField::CompressionLevel),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -1881,6 +1909,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                 let mut newlines_in_values__ = None;
                 let mut terminator__ = None;
                 let mut truncated_rows__ = None;
+                let mut compression_level__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::HasHeader => {
@@ -2011,6 +2040,14 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                                 Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
                             ;
                         }
+                        GeneratedField::CompressionLevel => {
+                            if compression_level__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compressionLevel"));
+                            }
+                            compression_level__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                     }
                 }
                 Ok(CsvOptions {
@@ -2032,6 +2069,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                     newlines_in_values: newlines_in_values__.unwrap_or_default(),
                     terminator: terminator__.unwrap_or_default(),
                     truncated_rows: truncated_rows__.unwrap_or_default(),
+                    compression_level: compression_level__,
                 })
             }
         }
@@ -4548,6 +4586,9 @@ impl serde::Serialize for JsonOptions {
         if self.schema_infer_max_rec.is_some() {
             len += 1;
         }
+        if self.compression_level.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.JsonOptions", len)?;
         if self.compression != 0 {
             let v = CompressionTypeVariant::try_from(self.compression)
@@ -4559,6 +4600,9 @@ impl serde::Serialize for JsonOptions {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("schemaInferMaxRec", ToString::to_string(&v).as_str())?;
         }
+        if let Some(v) = self.compression_level.as_ref() {
+            struct_ser.serialize_field("compressionLevel", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -4572,12 +4616,15 @@ impl<'de> serde::Deserialize<'de> for JsonOptions {
             "compression",
             "schema_infer_max_rec",
             "schemaInferMaxRec",
+            "compression_level",
+            "compressionLevel",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Compression,
             SchemaInferMaxRec,
+            CompressionLevel,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4601,6 +4648,7 @@ impl<'de> serde::Deserialize<'de> for JsonOptions {
                         match value {
                             "compression" => Ok(GeneratedField::Compression),
                             "schemaInferMaxRec" | "schema_infer_max_rec" => Ok(GeneratedField::SchemaInferMaxRec),
+                            "compressionLevel" | "compression_level" => Ok(GeneratedField::CompressionLevel),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -4622,6 +4670,7 @@ impl<'de> serde::Deserialize<'de> for JsonOptions {
             {
                 let mut compression__ = None;
                 let mut schema_infer_max_rec__ = None;
+                let mut compression_level__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Compression => {
@@ -4638,11 +4687,20 @@ impl<'de> serde::Deserialize<'de> for JsonOptions {
                                 map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::CompressionLevel => {
+                            if compression_level__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compressionLevel"));
+                            }
+                            compression_level__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                     }
                 }
                 Ok(JsonOptions {
                     compression: compression__.unwrap_or_default(),
                     schema_infer_max_rec: schema_infer_max_rec__,
+                    compression_level: compression_level__,
                 })
             }
         }
@@ -5557,6 +5615,9 @@ impl serde::Serialize for ParquetOptions {
         if self.reorder_filters {
             len += 1;
         }
+        if self.force_filter_selections {
+            len += 1;
+        }
         if self.data_pagesize_limit != 0 {
             len += 1;
         }
@@ -5651,6 +5712,9 @@ impl serde::Serialize for ParquetOptions {
         if self.reorder_filters {
             struct_ser.serialize_field("reorderFilters", &self.reorder_filters)?;
         }
+        if self.force_filter_selections {
+            struct_ser.serialize_field("forceFilterSelections", &self.force_filter_selections)?;
+        }
         if self.data_pagesize_limit != 0 {
             #[allow(clippy::needless_borrow)]
             #[allow(clippy::needless_borrows_for_generic_args)]
@@ -5816,6 +5880,8 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             "pushdownFilters",
             "reorder_filters",
             "reorderFilters",
+            "force_filter_selections",
+            "forceFilterSelections",
             "data_pagesize_limit",
             "dataPagesizeLimit",
             "write_batch_size",
@@ -5875,6 +5941,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             SkipMetadata,
             PushdownFilters,
             ReorderFilters,
+            ForceFilterSelections,
             DataPagesizeLimit,
             WriteBatchSize,
             WriterVersion,
@@ -5927,6 +5994,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             "skipMetadata" | "skip_metadata" => Ok(GeneratedField::SkipMetadata),
                             "pushdownFilters" | "pushdown_filters" => Ok(GeneratedField::PushdownFilters),
                             "reorderFilters" | "reorder_filters" => Ok(GeneratedField::ReorderFilters),
+                            "forceFilterSelections" | "force_filter_selections" => Ok(GeneratedField::ForceFilterSelections),
                             "dataPagesizeLimit" | "data_pagesize_limit" => Ok(GeneratedField::DataPagesizeLimit),
                             "writeBatchSize" | "write_batch_size" => Ok(GeneratedField::WriteBatchSize),
                             "writerVersion" | "writer_version" => Ok(GeneratedField::WriterVersion),
@@ -5977,6 +6045,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                 let mut skip_metadata__ = None;
                 let mut pushdown_filters__ = None;
                 let mut reorder_filters__ = None;
+                let mut force_filter_selections__ = None;
                 let mut data_pagesize_limit__ = None;
                 let mut write_batch_size__ = None;
                 let mut writer_version__ = None;
@@ -6035,6 +6104,12 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             }
                             reorder_filters__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::ForceFilterSelections => {
+                            if force_filter_selections__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("forceFilterSelections"));
+                            }
+                            force_filter_selections__ = Some(map_.next_value()?);
+                        }
                         GeneratedField::DataPagesizeLimit => {
                             if data_pagesize_limit__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("dataPagesizeLimit"));
@@ -6213,6 +6288,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                     skip_metadata: skip_metadata__.unwrap_or_default(),
                     pushdown_filters: pushdown_filters__.unwrap_or_default(),
                     reorder_filters: reorder_filters__.unwrap_or_default(),
+                    force_filter_selections: force_filter_selections__.unwrap_or_default(),
                     data_pagesize_limit: data_pagesize_limit__.unwrap_or_default(),
                     write_batch_size: write_batch_size__.unwrap_or_default(),
                     writer_version: writer_version__.unwrap_or_default(),
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index aa7c3d51a9d6d..16601dcf46977 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -649,6 +649,9 @@ pub struct CsvOptions {
     /// Indicates if truncated rows are allowed
     #[prost(bytes = "vec", tag = "18")]
     pub truncated_rows: ::prost::alloc::vec::Vec<u8>,
+    /// Optional compression level
+    #[prost(uint32, optional, tag = "19")]
+    pub compression_level: ::core::option::Option<u32>,
 }
 /// Options controlling CSV format
 #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
@@ -659,6 +662,9 @@ pub struct JsonOptions {
     /// Optional max records for schema inference
     #[prost(uint64, optional, tag = "2")]
     pub schema_infer_max_rec: ::core::option::Option<u64>,
+    /// Optional compression level
+    #[prost(uint32, optional, tag = "3")]
+    pub compression_level: ::core::option::Option<u32>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct TableParquetOptions {
@@ -763,6 +769,9 @@ pub struct ParquetOptions {
     /// default = false
     #[prost(bool, tag = "6")]
     pub reorder_filters: bool,
+    /// default = false
+    #[prost(bool, tag = "34")]
+    pub force_filter_selections: bool,
     /// default = 1024 * 1024
     #[prost(uint64, tag = "7")]
     pub data_pagesize_limit: u64,
@@ -927,6 +936,8 @@ pub struct ColumnStats {
     pub null_count: ::core::option::Option<Precision>,
     #[prost(message, optional, tag = "4")]
     pub distinct_count: ::core::option::Option<Precision>,
+    #[prost(message, optional, tag = "6")]
+    pub byte_size: ::core::option::Option<Precision>,
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
diff --git a/datafusion/proto-common/src/lib.rs b/datafusion/proto-common/src/lib.rs
index b0061168c5ce6..b7e1c906d90f5 100644
--- a/datafusion/proto-common/src/lib.rs
+++ b/datafusion/proto-common/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -23,6 +24,7 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::allow_attributes)]
 
 //! Serialize / Deserialize DataFusion Primitive Types to bytes
 //!
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index e9de1d9e9a9ef..dfa136717f3ad 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 
 use crate::protobuf_common as protobuf;
 use crate::protobuf_common::{
-    arrow_type::ArrowTypeEnum, scalar_value::Value, EmptyMessage,
+    EmptyMessage, arrow_type::ArrowTypeEnum, scalar_value::Value,
 };
 use arrow::array::{ArrayRef, RecordBatch};
 use arrow::csv::WriterBuilder;
@@ -32,6 +32,8 @@ use arrow::ipc::writer::{
     CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
 };
 use datafusion_common::{
+    Column, ColumnStatistics, Constraint, Constraints, DFSchema, DFSchemaRef,
+    DataFusionError, JoinSide, ScalarValue, Statistics,
     config::{
         CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
         TableParquetOptions,
@@ -40,8 +42,6 @@ use datafusion_common::{
     parsers::CompressionTypeVariant,
     plan_datafusion_err,
     stats::Precision,
-    Column, ColumnStatistics, Constraint, Constraints, DFSchema, DFSchemaRef,
-    DataFusionError, JoinSide, ScalarValue, Statistics,
 };
 
 #[derive(Debug)]
@@ -795,6 +795,7 @@ impl From<&ColumnStatistics> for protobuf::ColumnStats {
             sum_value: Some(protobuf::Precision::from(&s.sum_value)),
             null_count: Some(protobuf::Precision::from(&s.null_count)),
             distinct_count: Some(protobuf::Precision::from(&s.distinct_count)),
+            byte_size: Some(protobuf::Precision::from(&s.byte_size)),
         }
     }
 }
@@ -856,6 +857,7 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
             metadata_size_hint_opt: value.metadata_size_hint.map(|v| protobuf::parquet_options::MetadataSizeHintOpt::MetadataSizeHint(v as u64)),
             pushdown_filters: value.pushdown_filters,
             reorder_filters: value.reorder_filters,
+            force_filter_selections: value.force_filter_selections,
             data_pagesize_limit: value.data_pagesize_limit as u64,
             write_batch_size: value.write_batch_size as u64,
             writer_version: value.writer_version.clone(),
@@ -974,6 +976,7 @@ impl TryFrom<&CsvOptions> for protobuf::CsvOptions {
             null_regex: opts.null_regex.clone().unwrap_or_default(),
             comment: opts.comment.map_or_else(Vec::new, |h| vec![h]),
             truncated_rows: opts.truncated_rows.map_or_else(Vec::new, |h| vec![h as u8]),
+            compression_level: opts.compression_level,
         })
     }
 }
@@ -986,6 +989,7 @@ impl TryFrom<&JsonOptions> for protobuf::JsonOptions {
         Ok(protobuf::JsonOptions {
             compression: compression.into(),
             schema_infer_max_rec: opts.schema_infer_max_rec.map(|h| h as u64),
+            compression_level: opts.compression_level,
         })
     }
 }
@@ -1018,11 +1022,11 @@ fn encode_scalar_nested_value(
         ))
     })?;
 
-    let gen = IpcDataGenerator {};
+    let ipc_gen = IpcDataGenerator {};
     let mut dict_tracker = DictionaryTracker::new(false);
     let write_options = IpcWriteOptions::default();
     let mut compression_context = CompressionContext::default();
-    let (encoded_dictionaries, encoded_message) = gen
+    let (encoded_dictionaries, encoded_message) = ipc_gen
         .encode(
             &batch,
             &mut dict_tracker,
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index 920e277b8ccc0..b00bd0dcc6bfd 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -73,6 +73,7 @@ serde = { version = "1.0", optional = true }
 serde_json = { workspace = true, optional = true }
 
 [dev-dependencies]
+async-trait = { workspace = true }
 datafusion = { workspace = true, default-features = false, features = [
     "sql",
     "datetime_expressions",
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index f9400d14a59c9..bd7dd3a6aff3c 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -748,6 +748,7 @@ message PhysicalPlanNode {
     GenerateSeriesNode generate_series = 33;
     SortMergeJoinExecNode sort_merge_join = 34;
     MemoryScanExecNode memory_scan = 35;
+    AsyncFuncExecNode async_func = 36;
   }
 }
 
@@ -871,6 +872,8 @@ message PhysicalExprNode {
     PhysicalExtensionExprNode extension = 19;
 
     UnknownColumn unknown_column = 20;
+
+    PhysicalHashExprNode hash_expr = 21;
   }
 }
 
@@ -989,6 +992,15 @@ message PhysicalExtensionExprNode {
   repeated PhysicalExprNode inputs = 2;
 }
 
+message PhysicalHashExprNode {
+  repeated PhysicalExprNode on_columns = 1;
+  uint64 seed0 = 2;
+  uint64 seed1 = 3;
+  uint64 seed2 = 4;
+  uint64 seed3 = 5;
+  string description = 6;
+}
+
 message FilterExecNode {
   PhysicalPlanNode input = 1;
   PhysicalExprNode expr = 2;
@@ -1009,6 +1021,15 @@ message PhysicalSortExprNodeCollection {
   repeated PhysicalSortExprNode physical_sort_expr_nodes = 1;
 }
 
+message ProjectionExpr {
+  string alias = 1;
+  PhysicalExprNode expr = 2;
+}
+
+message ProjectionExprs {
+  repeated ProjectionExpr projections = 1;
+}
+
 message FileScanExecConf {
   repeated FileGroup file_groups = 1;
   datafusion_common.Schema schema = 2;
@@ -1024,6 +1045,8 @@ message FileScanExecConf {
 
   datafusion_common.Constraints constraints = 11;
   optional uint64 batch_size = 12;
+
+  optional ProjectionExprs projection_exprs = 13;
 }
 
 message ParquetScanExecNode {
@@ -1211,6 +1234,7 @@ message AggregateExecNode {
   repeated bool groups = 9;
   repeated MaybeFilter filter_expr = 10;
   AggLimit limit = 11;
+  bool has_grouping_set = 12;
 }
 
 message GlobalLimitExecNode {
@@ -1382,3 +1406,9 @@ message SortMergeJoinExecNode {
   repeated SortExprNode sort_options = 6;
   datafusion_common.NullEquality null_equality = 7;
 }
+
+message AsyncFuncExecNode {
+  PhysicalPlanNode input = 1;
+  repeated PhysicalExprNode async_exprs = 2;
+  repeated string async_expr_names = 3;
+}
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 6eab2239015a7..d95bdd388699e 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -24,15 +24,15 @@ use crate::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
 use crate::protobuf;
-use datafusion_common::{plan_datafusion_err, Result};
+use datafusion_common::{Result, plan_datafusion_err};
 use datafusion_execution::TaskContext;
 use datafusion_expr::{
-    create_udaf, create_udf, create_udwf, AggregateUDF, Expr, LogicalPlan, Volatility,
-    WindowUDF,
+    AggregateUDF, Expr, LogicalPlan, Volatility, WindowUDF, create_udaf, create_udf,
+    create_udwf,
 };
 use prost::{
-    bytes::{Bytes, BytesMut},
     Message,
+    bytes::{Bytes, BytesMut},
 };
 use std::sync::Arc;
 
diff --git a/datafusion/proto/src/bytes/registry.rs b/datafusion/proto/src/bytes/registry.rs
index 087e073db21af..a3f74787e2b50 100644
--- a/datafusion/proto/src/bytes/registry.rs
+++ b/datafusion/proto/src/bytes/registry.rs
@@ -17,8 +17,8 @@
 
 use std::{collections::HashSet, sync::Arc};
 
-use datafusion_common::plan_err;
 use datafusion_common::Result;
+use datafusion_common::plan_err;
 use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
@@ -33,27 +33,42 @@ impl FunctionRegistry for NoRegistry {
     }
 
     fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        plan_err!("No function registry provided to deserialize, so can not deserialize User Defined Function '{name}'")
+        plan_err!(
+            "No function registry provided to deserialize, so can not deserialize User Defined Function '{name}'"
+        )
     }
 
     fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        plan_err!("No function registry provided to deserialize, so can not deserialize User Defined Aggregate Function '{name}'")
+        plan_err!(
+            "No function registry provided to deserialize, so can not deserialize User Defined Aggregate Function '{name}'"
+        )
     }
 
     fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
-        plan_err!("No function registry provided to deserialize, so can not deserialize User Defined Window Function '{name}'")
+        plan_err!(
+            "No function registry provided to deserialize, so can not deserialize User Defined Window Function '{name}'"
+        )
     }
     fn register_udaf(
         &mut self,
         udaf: Arc<AggregateUDF>,
     ) -> Result<Option<Arc<AggregateUDF>>> {
-        plan_err!("No function registry provided to deserialize, so can not register User Defined Aggregate Function '{}'", udaf.inner().name())
+        plan_err!(
+            "No function registry provided to deserialize, so can not register User Defined Aggregate Function '{}'",
+            udaf.inner().name()
+        )
     }
     fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
-        plan_err!("No function registry provided to deserialize, so can not deserialize User Defined Function '{}'", udf.inner().name())
+        plan_err!(
+            "No function registry provided to deserialize, so can not deserialize User Defined Function '{}'",
+            udf.inner().name()
+        )
     }
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
-        plan_err!("No function registry provided to deserialize, so can not deserialize User Defined Window Function '{}'", udwf.inner().name())
+        plan_err!(
+            "No function registry provided to deserialize, so can not deserialize User Defined Window Function '{}'",
+            udwf.inner().name()
+        )
     }
 
     fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
diff --git a/datafusion/proto/src/common.rs b/datafusion/proto/src/common.rs
index 2aa12dd3504b6..22ded708d8c71 100644
--- a/datafusion/proto/src/common.rs
+++ b/datafusion/proto/src/common.rs
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{internal_datafusion_err, internal_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err, internal_datafusion_err};
 
 pub(crate) fn str_to_byte(s: &String, description: &str) -> Result<u8> {
-    if s.len() != 1 {
-        return internal_err!(
-            "Invalid CSV {description}: expected single character, got {s}"
-        );
-    }
+    assert_eq_or_internal_err!(
+        s.len(),
+        1,
+        "Invalid CSV {description}: expected single character, got {s}"
+    );
     Ok(s.as_bytes()[0])
 }
 
diff --git a/datafusion/proto/src/generated/datafusion.rs b/datafusion/proto/src/generated/datafusion.rs
new file mode 100644
index 0000000000000..8b137891791fe
--- /dev/null
+++ b/datafusion/proto/src/generated/datafusion.rs
@@ -0,0 +1 @@
+
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index aa7c3d51a9d6d..16601dcf46977 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -649,6 +649,9 @@ pub struct CsvOptions {
     /// Indicates if truncated rows are allowed
     #[prost(bytes = "vec", tag = "18")]
     pub truncated_rows: ::prost::alloc::vec::Vec<u8>,
+    /// Optional compression level
+    #[prost(uint32, optional, tag = "19")]
+    pub compression_level: ::core::option::Option<u32>,
 }
 /// Options controlling CSV format
 #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
@@ -659,6 +662,9 @@ pub struct JsonOptions {
     /// Optional max records for schema inference
     #[prost(uint64, optional, tag = "2")]
     pub schema_infer_max_rec: ::core::option::Option<u64>,
+    /// Optional compression level
+    #[prost(uint32, optional, tag = "3")]
+    pub compression_level: ::core::option::Option<u32>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct TableParquetOptions {
@@ -763,6 +769,9 @@ pub struct ParquetOptions {
     /// default = false
     #[prost(bool, tag = "6")]
     pub reorder_filters: bool,
+    /// default = false
+    #[prost(bool, tag = "34")]
+    pub force_filter_selections: bool,
     /// default = 1024 * 1024
     #[prost(uint64, tag = "7")]
     pub data_pagesize_limit: u64,
@@ -927,6 +936,8 @@ pub struct ColumnStats {
     pub null_count: ::core::option::Option<Precision>,
     #[prost(message, optional, tag = "4")]
     pub distinct_count: ::core::option::Option<Precision>,
+    #[prost(message, optional, tag = "6")]
+    pub byte_size: ::core::option::Option<Precision>,
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
diff --git a/datafusion/proto/src/generated/mod.rs b/datafusion/proto/src/generated/mod.rs
index da3302a743753..adf5125457c14 100644
--- a/datafusion/proto/src/generated/mod.rs
+++ b/datafusion/proto/src/generated/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::allow_attributes)]
+
 #[allow(clippy::all)]
 #[rustfmt::skip]
 pub mod datafusion {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 4cf834d0601e4..e269606d163a3 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -134,6 +134,9 @@ impl serde::Serialize for AggregateExecNode {
         if self.limit.is_some() {
             len += 1;
         }
+        if self.has_grouping_set {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.AggregateExecNode", len)?;
         if !self.group_expr.is_empty() {
             struct_ser.serialize_field("groupExpr", &self.group_expr)?;
@@ -170,6 +173,9 @@ impl serde::Serialize for AggregateExecNode {
         if let Some(v) = self.limit.as_ref() {
             struct_ser.serialize_field("limit", v)?;
         }
+        if self.has_grouping_set {
+            struct_ser.serialize_field("hasGroupingSet", &self.has_grouping_set)?;
+        }
         struct_ser.end()
     }
 }
@@ -198,6 +204,8 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
             "filter_expr",
             "filterExpr",
             "limit",
+            "has_grouping_set",
+            "hasGroupingSet",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -213,6 +221,7 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
             Groups,
             FilterExpr,
             Limit,
+            HasGroupingSet,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -245,6 +254,7 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
                             "groups" => Ok(GeneratedField::Groups),
                             "filterExpr" | "filter_expr" => Ok(GeneratedField::FilterExpr),
                             "limit" => Ok(GeneratedField::Limit),
+                            "hasGroupingSet" | "has_grouping_set" => Ok(GeneratedField::HasGroupingSet),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -275,6 +285,7 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
                 let mut groups__ = None;
                 let mut filter_expr__ = None;
                 let mut limit__ = None;
+                let mut has_grouping_set__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::GroupExpr => {
@@ -343,6 +354,12 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
                             }
                             limit__ = map_.next_value()?;
                         }
+                        GeneratedField::HasGroupingSet => {
+                            if has_grouping_set__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("hasGroupingSet"));
+                            }
+                            has_grouping_set__ = Some(map_.next_value()?);
+                        }
                     }
                 }
                 Ok(AggregateExecNode {
@@ -357,6 +374,7 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
                     groups: groups__.unwrap_or_default(),
                     filter_expr: filter_expr__.unwrap_or_default(),
                     limit: limit__,
+                    has_grouping_set: has_grouping_set__.unwrap_or_default(),
                 })
             }
         }
@@ -1260,6 +1278,133 @@ impl<'de> serde::Deserialize<'de> for AnalyzedLogicalPlanType {
         deserializer.deserialize_struct("datafusion.AnalyzedLogicalPlanType", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for AsyncFuncExecNode {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.input.is_some() {
+            len += 1;
+        }
+        if !self.async_exprs.is_empty() {
+            len += 1;
+        }
+        if !self.async_expr_names.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.AsyncFuncExecNode", len)?;
+        if let Some(v) = self.input.as_ref() {
+            struct_ser.serialize_field("input", v)?;
+        }
+        if !self.async_exprs.is_empty() {
+            struct_ser.serialize_field("asyncExprs", &self.async_exprs)?;
+        }
+        if !self.async_expr_names.is_empty() {
+            struct_ser.serialize_field("asyncExprNames", &self.async_expr_names)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for AsyncFuncExecNode {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "input",
+            "async_exprs",
+            "asyncExprs",
+            "async_expr_names",
+            "asyncExprNames",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Input,
+            AsyncExprs,
+            AsyncExprNames,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "input" => Ok(GeneratedField::Input),
+                            "asyncExprs" | "async_exprs" => Ok(GeneratedField::AsyncExprs),
+                            "asyncExprNames" | "async_expr_names" => Ok(GeneratedField::AsyncExprNames),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = AsyncFuncExecNode;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.AsyncFuncExecNode")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<AsyncFuncExecNode, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut input__ = None;
+                let mut async_exprs__ = None;
+                let mut async_expr_names__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Input => {
+                            if input__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("input"));
+                            }
+                            input__ = map_.next_value()?;
+                        }
+                        GeneratedField::AsyncExprs => {
+                            if async_exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("asyncExprs"));
+                            }
+                            async_exprs__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::AsyncExprNames => {
+                            if async_expr_names__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("asyncExprNames"));
+                            }
+                            async_expr_names__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(AsyncFuncExecNode {
+                    input: input__,
+                    async_exprs: async_exprs__.unwrap_or_default(),
+                    async_expr_names: async_expr_names__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.AsyncFuncExecNode", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for AvroScanExecNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -5949,6 +6094,9 @@ impl serde::Serialize for FileScanExecConf {
         if self.batch_size.is_some() {
             len += 1;
         }
+        if self.projection_exprs.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.FileScanExecConf", len)?;
         if !self.file_groups.is_empty() {
             struct_ser.serialize_field("fileGroups", &self.file_groups)?;
@@ -5982,6 +6130,9 @@ impl serde::Serialize for FileScanExecConf {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("batchSize", ToString::to_string(&v).as_str())?;
         }
+        if let Some(v) = self.projection_exprs.as_ref() {
+            struct_ser.serialize_field("projectionExprs", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -6007,6 +6158,8 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
             "constraints",
             "batch_size",
             "batchSize",
+            "projection_exprs",
+            "projectionExprs",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -6021,6 +6174,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
             OutputOrdering,
             Constraints,
             BatchSize,
+            ProjectionExprs,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -6052,6 +6206,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                             "outputOrdering" | "output_ordering" => Ok(GeneratedField::OutputOrdering),
                             "constraints" => Ok(GeneratedField::Constraints),
                             "batchSize" | "batch_size" => Ok(GeneratedField::BatchSize),
+                            "projectionExprs" | "projection_exprs" => Ok(GeneratedField::ProjectionExprs),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -6081,6 +6236,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                 let mut output_ordering__ = None;
                 let mut constraints__ = None;
                 let mut batch_size__ = None;
+                let mut projection_exprs__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::FileGroups => {
@@ -6148,6 +6304,12 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                                 map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::ProjectionExprs => {
+                            if projection_exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("projectionExprs"));
+                            }
+                            projection_exprs__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(FileScanExecConf {
@@ -6161,6 +6323,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                     output_ordering: output_ordering__.unwrap_or_default(),
                     constraints: constraints__,
                     batch_size: batch_size__,
+                    projection_exprs: projection_exprs__,
                 })
             }
         }
@@ -15771,6 +15934,9 @@ impl serde::Serialize for PhysicalExprNode {
                 physical_expr_node::ExprType::UnknownColumn(v) => {
                     struct_ser.serialize_field("unknownColumn", v)?;
                 }
+                physical_expr_node::ExprType::HashExpr(v) => {
+                    struct_ser.serialize_field("hashExpr", v)?;
+                }
             }
         }
         struct_ser.end()
@@ -15813,6 +15979,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
             "extension",
             "unknown_column",
             "unknownColumn",
+            "hash_expr",
+            "hashExpr",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -15835,6 +16003,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
             LikeExpr,
             Extension,
             UnknownColumn,
+            HashExpr,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -15874,6 +16043,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
                             "likeExpr" | "like_expr" => Ok(GeneratedField::LikeExpr),
                             "extension" => Ok(GeneratedField::Extension),
                             "unknownColumn" | "unknown_column" => Ok(GeneratedField::UnknownColumn),
+                            "hashExpr" | "hash_expr" => Ok(GeneratedField::HashExpr),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -16020,6 +16190,13 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
                                 return Err(serde::de::Error::duplicate_field("unknownColumn"));
                             }
                             expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::UnknownColumn)
+;
+                        }
+                        GeneratedField::HashExpr => {
+                            if expr_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("hashExpr"));
+                            }
+                            expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::HashExpr)
 ;
                         }
                     }
@@ -16256,6 +16433,199 @@ impl<'de> serde::Deserialize<'de> for PhysicalExtensionNode {
         deserializer.deserialize_struct("datafusion.PhysicalExtensionNode", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for PhysicalHashExprNode {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.on_columns.is_empty() {
+            len += 1;
+        }
+        if self.seed0 != 0 {
+            len += 1;
+        }
+        if self.seed1 != 0 {
+            len += 1;
+        }
+        if self.seed2 != 0 {
+            len += 1;
+        }
+        if self.seed3 != 0 {
+            len += 1;
+        }
+        if !self.description.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.PhysicalHashExprNode", len)?;
+        if !self.on_columns.is_empty() {
+            struct_ser.serialize_field("onColumns", &self.on_columns)?;
+        }
+        if self.seed0 != 0 {
+            #[allow(clippy::needless_borrow)]
+            #[allow(clippy::needless_borrows_for_generic_args)]
+            struct_ser.serialize_field("seed0", ToString::to_string(&self.seed0).as_str())?;
+        }
+        if self.seed1 != 0 {
+            #[allow(clippy::needless_borrow)]
+            #[allow(clippy::needless_borrows_for_generic_args)]
+            struct_ser.serialize_field("seed1", ToString::to_string(&self.seed1).as_str())?;
+        }
+        if self.seed2 != 0 {
+            #[allow(clippy::needless_borrow)]
+            #[allow(clippy::needless_borrows_for_generic_args)]
+            struct_ser.serialize_field("seed2", ToString::to_string(&self.seed2).as_str())?;
+        }
+        if self.seed3 != 0 {
+            #[allow(clippy::needless_borrow)]
+            #[allow(clippy::needless_borrows_for_generic_args)]
+            struct_ser.serialize_field("seed3", ToString::to_string(&self.seed3).as_str())?;
+        }
+        if !self.description.is_empty() {
+            struct_ser.serialize_field("description", &self.description)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for PhysicalHashExprNode {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "on_columns",
+            "onColumns",
+            "seed0",
+            "seed1",
+            "seed2",
+            "seed3",
+            "description",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            OnColumns,
+            Seed0,
+            Seed1,
+            Seed2,
+            Seed3,
+            Description,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "onColumns" | "on_columns" => Ok(GeneratedField::OnColumns),
+                            "seed0" => Ok(GeneratedField::Seed0),
+                            "seed1" => Ok(GeneratedField::Seed1),
+                            "seed2" => Ok(GeneratedField::Seed2),
+                            "seed3" => Ok(GeneratedField::Seed3),
+                            "description" => Ok(GeneratedField::Description),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = PhysicalHashExprNode;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.PhysicalHashExprNode")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<PhysicalHashExprNode, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut on_columns__ = None;
+                let mut seed0__ = None;
+                let mut seed1__ = None;
+                let mut seed2__ = None;
+                let mut seed3__ = None;
+                let mut description__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::OnColumns => {
+                            if on_columns__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("onColumns"));
+                            }
+                            on_columns__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Seed0 => {
+                            if seed0__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("seed0"));
+                            }
+                            seed0__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Seed1 => {
+                            if seed1__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("seed1"));
+                            }
+                            seed1__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Seed2 => {
+                            if seed2__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("seed2"));
+                            }
+                            seed2__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Seed3 => {
+                            if seed3__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("seed3"));
+                            }
+                            seed3__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Description => {
+                            if description__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("description"));
+                            }
+                            description__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(PhysicalHashExprNode {
+                    on_columns: on_columns__.unwrap_or_default(),
+                    seed0: seed0__.unwrap_or_default(),
+                    seed1: seed1__.unwrap_or_default(),
+                    seed2: seed2__.unwrap_or_default(),
+                    seed3: seed3__.unwrap_or_default(),
+                    description: description__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.PhysicalHashExprNode", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for PhysicalHashRepartition {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -17118,6 +17488,9 @@ impl serde::Serialize for PhysicalPlanNode {
                 physical_plan_node::PhysicalPlanType::MemoryScan(v) => {
                     struct_ser.serialize_field("memoryScan", v)?;
                 }
+                physical_plan_node::PhysicalPlanType::AsyncFunc(v) => {
+                    struct_ser.serialize_field("asyncFunc", v)?;
+                }
             }
         }
         struct_ser.end()
@@ -17183,6 +17556,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalPlanNode {
             "sortMergeJoin",
             "memory_scan",
             "memoryScan",
+            "async_func",
+            "asyncFunc",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -17221,6 +17596,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalPlanNode {
             GenerateSeries,
             SortMergeJoin,
             MemoryScan,
+            AsyncFunc,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -17276,6 +17652,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalPlanNode {
                             "generateSeries" | "generate_series" => Ok(GeneratedField::GenerateSeries),
                             "sortMergeJoin" | "sort_merge_join" => Ok(GeneratedField::SortMergeJoin),
                             "memoryScan" | "memory_scan" => Ok(GeneratedField::MemoryScan),
+                            "asyncFunc" | "async_func" => Ok(GeneratedField::AsyncFunc),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -17534,6 +17911,13 @@ impl<'de> serde::Deserialize<'de> for PhysicalPlanNode {
                                 return Err(serde::de::Error::duplicate_field("memoryScan"));
                             }
                             physical_plan_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_plan_node::PhysicalPlanType::MemoryScan)
+;
+                        }
+                        GeneratedField::AsyncFunc => {
+                            if physical_plan_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("asyncFunc"));
+                            }
+                            physical_plan_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_plan_node::PhysicalPlanType::AsyncFunc)
 ;
                         }
                     }
@@ -19268,6 +19652,205 @@ impl<'de> serde::Deserialize<'de> for ProjectionExecNode {
         deserializer.deserialize_struct("datafusion.ProjectionExecNode", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for ProjectionExpr {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.alias.is_empty() {
+            len += 1;
+        }
+        if self.expr.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ProjectionExpr", len)?;
+        if !self.alias.is_empty() {
+            struct_ser.serialize_field("alias", &self.alias)?;
+        }
+        if let Some(v) = self.expr.as_ref() {
+            struct_ser.serialize_field("expr", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ProjectionExpr {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "alias",
+            "expr",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Alias,
+            Expr,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "alias" => Ok(GeneratedField::Alias),
+                            "expr" => Ok(GeneratedField::Expr),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ProjectionExpr;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ProjectionExpr")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ProjectionExpr, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut alias__ = None;
+                let mut expr__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Alias => {
+                            if alias__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("alias"));
+                            }
+                            alias__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Expr => {
+                            if expr__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("expr"));
+                            }
+                            expr__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(ProjectionExpr {
+                    alias: alias__.unwrap_or_default(),
+                    expr: expr__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ProjectionExpr", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for ProjectionExprs {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.projections.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ProjectionExprs", len)?;
+        if !self.projections.is_empty() {
+            struct_ser.serialize_field("projections", &self.projections)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ProjectionExprs {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "projections",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Projections,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "projections" => Ok(GeneratedField::Projections),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ProjectionExprs;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ProjectionExprs")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ProjectionExprs, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut projections__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Projections => {
+                            if projections__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("projections"));
+                            }
+                            projections__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(ProjectionExprs {
+                    projections: projections__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ProjectionExprs", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for ProjectionNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 12b4176274113..cf343e0258d0b 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1076,7 +1076,7 @@ pub mod table_reference {
 pub struct PhysicalPlanNode {
     #[prost(
         oneof = "physical_plan_node::PhysicalPlanType",
-        tags = "1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35"
+        tags = "1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36"
     )]
     pub physical_plan_type: ::core::option::Option<physical_plan_node::PhysicalPlanType>,
 }
@@ -1154,6 +1154,8 @@ pub mod physical_plan_node {
         SortMergeJoin(::prost::alloc::boxed::Box<super::SortMergeJoinExecNode>),
         #[prost(message, tag = "35")]
         MemoryScan(super::MemoryScanExecNode),
+        #[prost(message, tag = "36")]
+        AsyncFunc(::prost::alloc::boxed::Box<super::AsyncFuncExecNode>),
     }
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -1274,7 +1276,7 @@ pub struct PhysicalExtensionNode {
 pub struct PhysicalExprNode {
     #[prost(
         oneof = "physical_expr_node::ExprType",
-        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20"
+        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21"
     )]
     pub expr_type: ::core::option::Option<physical_expr_node::ExprType>,
 }
@@ -1325,6 +1327,8 @@ pub mod physical_expr_node {
         Extension(super::PhysicalExtensionExprNode),
         #[prost(message, tag = "20")]
         UnknownColumn(super::UnknownColumn),
+        #[prost(message, tag = "21")]
+        HashExpr(super::PhysicalHashExprNode),
     }
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -1515,6 +1519,21 @@ pub struct PhysicalExtensionExprNode {
     pub inputs: ::prost::alloc::vec::Vec<PhysicalExprNode>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct PhysicalHashExprNode {
+    #[prost(message, repeated, tag = "1")]
+    pub on_columns: ::prost::alloc::vec::Vec<PhysicalExprNode>,
+    #[prost(uint64, tag = "2")]
+    pub seed0: u64,
+    #[prost(uint64, tag = "3")]
+    pub seed1: u64,
+    #[prost(uint64, tag = "4")]
+    pub seed2: u64,
+    #[prost(uint64, tag = "5")]
+    pub seed3: u64,
+    #[prost(string, tag = "6")]
+    pub description: ::prost::alloc::string::String,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct FilterExecNode {
     #[prost(message, optional, boxed, tag = "1")]
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<PhysicalPlanNode>>,
@@ -1542,6 +1561,18 @@ pub struct PhysicalSortExprNodeCollection {
     pub physical_sort_expr_nodes: ::prost::alloc::vec::Vec<PhysicalSortExprNode>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ProjectionExpr {
+    #[prost(string, tag = "1")]
+    pub alias: ::prost::alloc::string::String,
+    #[prost(message, optional, tag = "2")]
+    pub expr: ::core::option::Option<PhysicalExprNode>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ProjectionExprs {
+    #[prost(message, repeated, tag = "1")]
+    pub projections: ::prost::alloc::vec::Vec<ProjectionExpr>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct FileScanExecConf {
     #[prost(message, repeated, tag = "1")]
     pub file_groups: ::prost::alloc::vec::Vec<FileGroup>,
@@ -1563,6 +1594,8 @@ pub struct FileScanExecConf {
     pub constraints: ::core::option::Option<super::datafusion_common::Constraints>,
     #[prost(uint64, optional, tag = "12")]
     pub batch_size: ::core::option::Option<u64>,
+    #[prost(message, optional, tag = "13")]
+    pub projection_exprs: ::core::option::Option<ProjectionExprs>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct ParquetScanExecNode {
@@ -1823,6 +1856,8 @@ pub struct AggregateExecNode {
     pub filter_expr: ::prost::alloc::vec::Vec<MaybeFilter>,
     #[prost(message, optional, tag = "11")]
     pub limit: ::core::option::Option<AggLimit>,
+    #[prost(bool, tag = "12")]
+    pub has_grouping_set: bool,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct GlobalLimitExecNode {
@@ -2090,6 +2125,15 @@ pub struct SortMergeJoinExecNode {
     #[prost(enumeration = "super::datafusion_common::NullEquality", tag = "7")]
     pub null_equality: i32,
 }
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct AsyncFuncExecNode {
+    #[prost(message, optional, boxed, tag = "1")]
+    pub input: ::core::option::Option<::prost::alloc::boxed::Box<PhysicalPlanNode>>,
+    #[prost(message, repeated, tag = "2")]
+    pub async_exprs: ::prost::alloc::vec::Vec<PhysicalExprNode>,
+    #[prost(string, repeated, tag = "3")]
+    pub async_expr_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+}
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
 pub enum WindowFrameUnits {
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index b16b12bc05162..e30d2a22348cd 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! Serialize / Deserialize DataFusion Plans to bytes
 //!
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index d32bfb22ffddd..87ce4d524f61b 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -20,8 +20,8 @@ use std::sync::Arc;
 use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
 use datafusion_common::config::{CsvOptions, JsonOptions};
 use datafusion_common::{
-    exec_datafusion_err, exec_err, not_impl_err, parsers::CompressionTypeVariant,
-    TableReference,
+    TableReference, exec_datafusion_err, exec_err, not_impl_err,
+    parsers::CompressionTypeVariant,
 };
 use datafusion_datasource::file_format::FileFormatFactory;
 use datafusion_datasource_arrow::file_format::ArrowFormatFactory;
@@ -62,6 +62,7 @@ impl CsvOptionsProto {
                     .newlines_in_values
                     .map_or(vec![], |v| vec![v as u8]),
                 truncated_rows: options.truncated_rows.map_or(vec![], |v| vec![v as u8]),
+                compression_level: options.compression_level,
             }
         } else {
             CsvOptionsProto::default()
@@ -152,6 +153,7 @@ impl From<&CsvOptionsProto> for CsvOptions {
             } else {
                 Some(proto.truncated_rows[0] != 0)
             },
+            compression_level: proto.compression_level,
         }
     }
 }
@@ -238,6 +240,7 @@ impl JsonOptionsProto {
             JsonOptionsProto {
                 compression: options.compression as i32,
                 schema_infer_max_rec: options.schema_infer_max_rec.map(|v| v as u64),
+                compression_level: options.compression_level,
             }
         } else {
             JsonOptionsProto::default()
@@ -256,6 +259,7 @@ impl From<&JsonOptionsProto> for JsonOptions {
                 _ => CompressionTypeVariant::UNCOMPRESSED,
             },
             schema_infer_max_rec: proto.schema_infer_max_rec.map(|v| v as usize),
+            compression_level: proto.compression_level,
         }
     }
 }
@@ -345,10 +349,10 @@ mod parquet {
     use super::*;
 
     use crate::protobuf::{
-        parquet_column_options, parquet_options,
         ParquetColumnOptions as ParquetColumnOptionsProto, ParquetColumnSpecificOptions,
         ParquetOptions as ParquetOptionsProto,
-        TableParquetOptions as TableParquetOptionsProto,
+        TableParquetOptions as TableParquetOptionsProto, parquet_column_options,
+        parquet_options,
     };
     use datafusion_common::config::{
         ParquetColumnOptions, ParquetOptions, TableParquetOptions,
@@ -364,8 +368,7 @@ mod parquet {
             };
 
             let column_specific_options = global_options.column_specific_options;
-            #[allow(deprecated)] // max_statistics_size
-        TableParquetOptionsProto {
+            TableParquetOptionsProto {
             global: Some(ParquetOptionsProto {
                 enable_page_index: global_options.global.enable_page_index,
                 pruning: global_options.global.pruning,
@@ -375,6 +378,7 @@ mod parquet {
                 }),
                 pushdown_filters: global_options.global.pushdown_filters,
                 reorder_filters: global_options.global.reorder_filters,
+                force_filter_selections: global_options.global.force_filter_selections,
                 data_pagesize_limit: global_options.global.data_pagesize_limit as u64,
                 write_batch_size: global_options.global.write_batch_size as u64,
                 writer_version: global_options.global.writer_version.clone(),
@@ -461,8 +465,7 @@ mod parquet {
 
     impl From<&ParquetOptionsProto> for ParquetOptions {
         fn from(proto: &ParquetOptionsProto) -> Self {
-            #[allow(deprecated)] // max_statistics_size
-        ParquetOptions {
+            ParquetOptions {
             enable_page_index: proto.enable_page_index,
             pruning: proto.pruning,
             skip_metadata: proto.skip_metadata,
@@ -471,6 +474,7 @@ mod parquet {
             }),
             pushdown_filters: proto.pushdown_filters,
             reorder_filters: proto.reorder_filters,
+            force_filter_selections: proto.force_filter_selections,
             data_pagesize_limit: proto.data_pagesize_limit as usize,
             write_batch_size: proto.write_batch_size as usize,
             writer_version: proto.writer_version.clone(),
@@ -522,8 +526,7 @@ mod parquet {
 
     impl From<ParquetColumnOptionsProto> for ParquetColumnOptions {
         fn from(proto: ParquetColumnOptionsProto) -> Self {
-            #[allow(deprecated)] // max_statistics_size
-        ParquetColumnOptions {
+            ParquetColumnOptions {
             bloom_filter_enabled: proto.bloom_filter_enabled_opt.map(
                 |parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v)| v,
             ),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 598a77f5420e2..179fe8bb7d7fe 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -19,37 +19,36 @@ use std::sync::Arc;
 
 use arrow::datatypes::Field;
 use datafusion_common::{
-    exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality,
-    RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions,
+    NullEquality, RecursionUnnestOption, Result, ScalarValue, TableReference,
+    UnnestOptions, exec_datafusion_err, internal_err, plan_datafusion_err,
 };
 use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::expr::{Alias, NullTreatment, Placeholder, Sort};
 use datafusion_expr::expr::{Unnest, WildcardOptions};
 use datafusion_expr::{
-    expr::{self, InList, WindowFunction},
-    logical_plan::{PlanType, StringifiedPlan},
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
+    expr::{self, InList, WindowFunction},
+    logical_plan::{PlanType, StringifiedPlan},
 };
 use datafusion_expr::{ExprFunctionExt, WriteOp};
-use datafusion_proto_common::{from_proto::FromOptionalField, FromProtoError as Error};
+use datafusion_proto_common::{FromProtoError as Error, from_proto::FromOptionalField};
 
 use crate::protobuf::plan_type::PlanTypeEnum::{
     FinalPhysicalPlanWithSchema, InitialPhysicalPlanWithSchema,
 };
 use crate::protobuf::{
-    self,
+    self, AnalyzedLogicalPlanType, CubeNode, GroupingSetNode, OptimizedLogicalPlanType,
+    OptimizedPhysicalPlanType, PlaceholderNode, RollupNode,
     plan_type::PlanTypeEnum::{
         AnalyzedLogicalPlan, FinalAnalyzedLogicalPlan, FinalLogicalPlan,
         FinalPhysicalPlan, FinalPhysicalPlanWithStats, InitialLogicalPlan,
         InitialPhysicalPlan, InitialPhysicalPlanWithStats, OptimizedLogicalPlan,
         OptimizedPhysicalPlan, PhysicalPlanError,
     },
-    AnalyzedLogicalPlanType, CubeNode, GroupingSetNode, OptimizedLogicalPlanType,
-    OptimizedPhysicalPlanType, PlaceholderNode, RollupNode,
 };
 
 use super::LogicalExtensionCodec;
@@ -729,6 +728,10 @@ pub fn from_proto_binary_op(op: &str) -> Result<Operator, Error> {
         "RegexMatch" => Ok(Operator::RegexMatch),
         "RegexNotIMatch" => Ok(Operator::RegexNotIMatch),
         "RegexNotMatch" => Ok(Operator::RegexNotMatch),
+        "LikeMatch" => Ok(Operator::LikeMatch),
+        "ILikeMatch" => Ok(Operator::ILikeMatch),
+        "NotLikeMatch" => Ok(Operator::NotLikeMatch),
+        "NotILikeMatch" => Ok(Operator::NotILikeMatch),
         "StringConcat" => Ok(Operator::StringConcat),
         "AtArrow" => Ok(Operator::AtArrow),
         "ArrowAt" => Ok(Operator::ArrowAt),
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 9644c9f69feae..218c2e4e47d04 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -21,28 +21,28 @@ use std::sync::Arc;
 
 use crate::protobuf::logical_plan_node::LogicalPlanType::CustomScan;
 use crate::protobuf::{
-    dml_node, ColumnUnnestListItem, ColumnUnnestListRecursion, CteWorkTableScanNode,
-    CustomTableScanNode, DmlNode, SortExprNodeCollection,
+    ColumnUnnestListItem, ColumnUnnestListRecursion, CteWorkTableScanNode,
+    CustomTableScanNode, DmlNode, SortExprNodeCollection, dml_node,
 };
 use crate::{
     convert_required, into_required,
     protobuf::{
-        self, listing_table_scan_node::FileFormatType,
-        logical_plan_node::LogicalPlanType, LogicalExtensionNode, LogicalPlanNode,
+        self, LogicalExtensionNode, LogicalPlanNode,
+        listing_table_scan_node::FileFormatType, logical_plan_node::LogicalPlanType,
     },
 };
 
-use crate::protobuf::{proto_error, ToProtoError};
+use crate::protobuf::{ToProtoError, proto_error};
 use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, SchemaRef};
 use datafusion_catalog::cte_worktable::CteWorkTable;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
-    context, internal_datafusion_err, internal_err, not_impl_err, plan_err, Result,
-    TableReference, ToDFSchema,
+    Result, TableReference, ToDFSchema, assert_or_internal_err, context,
+    internal_datafusion_err, internal_err, not_impl_err, plan_err,
 };
 use datafusion_datasource::file_format::FileFormat;
 use datafusion_datasource::file_format::{
-    file_type_to_format, format_as_file_type, FileFormatFactory,
+    FileFormatFactory, file_type_to_format, format_as_file_type,
 };
 use datafusion_datasource_arrow::file_format::ArrowFormat;
 #[cfg(feature = "avro")]
@@ -52,30 +52,29 @@ use datafusion_datasource_json::file_format::JsonFormat as OtherNdJsonFormat;
 #[cfg(feature = "parquet")]
 use datafusion_datasource_parquet::file_format::ParquetFormat;
 use datafusion_expr::{
-    dml,
-    logical_plan::{
-        builder::project, Aggregate, CreateCatalog, CreateCatalogSchema,
-        CreateExternalTable, CreateView, DdlStatement, Distinct, EmptyRelation,
-        Extension, Join, JoinConstraint, Prepare, Projection, Repartition, Sort,
-        SubqueryAlias, TableScan, Values, Window,
-    },
-    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder, ScalarUDF, SortExpr,
-    Statement, WindowUDF,
+    AggregateUDF, DmlStatement, FetchType, RecursiveQuery, SkipType, TableSource, Unnest,
 };
 use datafusion_expr::{
-    AggregateUDF, DmlStatement, FetchType, RecursiveQuery, SkipType, TableSource, Unnest,
+    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder, ScalarUDF, SortExpr,
+    Statement, WindowUDF, dml,
+    logical_plan::{
+        Aggregate, CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateView,
+        DdlStatement, Distinct, EmptyRelation, Extension, Join, JoinConstraint, Prepare,
+        Projection, Repartition, Sort, SubqueryAlias, TableScan, Values, Window,
+        builder::project,
+    },
 };
 
 use self::to_proto::{serialize_expr, serialize_exprs};
 use crate::logical_plan::to_proto::serialize_sorts;
+use datafusion_catalog::TableProvider;
 use datafusion_catalog::default_table_source::{provider_as_source, source_as_provider};
 use datafusion_catalog::view::ViewTable;
-use datafusion_catalog::TableProvider;
 use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
 use datafusion_datasource::ListingTableUrl;
 use datafusion_execution::TaskContext;
-use prost::bytes::BufMut;
 use prost::Message;
+use prost::bytes::BufMut;
 
 pub mod file_formats;
 pub mod from_proto;
@@ -423,14 +422,17 @@ impl AsLogicalPlan for LogicalPlanNode {
                             }
                             Arc::new(json)
                         }
-                        #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
                         FileFormatType::Avro(..) => {
                             #[cfg(feature = "avro")]
                             {
                                 Arc::new(AvroFormat)
                             }
                             #[cfg(not(feature = "avro"))]
-                            panic!("Unable to process avro file since `avro` feature is not enabled");
+                            {
+                                panic!(
+                                    "Unable to process avro file since `avro` feature is not enabled"
+                                );
+                            }
                         }
                         FileFormatType::Arrow(..) => {
                             Arc::new(ArrowFormat)
@@ -606,27 +608,26 @@ impl AsLogicalPlan for LogicalPlanNode {
                 }
 
                 Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
-                    CreateExternalTable {
-                        schema: pb_schema.try_into()?,
-                        name: from_table_reference(
+                    CreateExternalTable::builder(
+                        from_table_reference(
                             create_extern_table.name.as_ref(),
                             "CreateExternalTable",
                         )?,
-                        location: create_extern_table.location.clone(),
-                        file_type: create_extern_table.file_type.clone(),
-                        table_partition_cols: create_extern_table
-                            .table_partition_cols
-                            .clone(),
-                        order_exprs,
-                        if_not_exists: create_extern_table.if_not_exists,
-                        or_replace: create_extern_table.or_replace,
-                        temporary: create_extern_table.temporary,
-                        definition,
-                        unbounded: create_extern_table.unbounded,
-                        options: create_extern_table.options.clone(),
-                        constraints: constraints.into(),
-                        column_defaults,
-                    },
+                        create_extern_table.location.clone(),
+                        create_extern_table.file_type.clone(),
+                        pb_schema.try_into()?,
+                    )
+                    .with_partition_cols(create_extern_table.table_partition_cols.clone())
+                    .with_order_exprs(order_exprs)
+                    .with_if_not_exists(create_extern_table.if_not_exists)
+                    .with_or_replace(create_extern_table.or_replace)
+                    .with_temporary(create_extern_table.temporary)
+                    .with_definition(definition)
+                    .with_unbounded(create_extern_table.unbounded)
+                    .with_options(create_extern_table.options.clone())
+                    .with_constraints(constraints.into())
+                    .with_column_defaults(column_defaults)
+                    .build(),
                 )))
             }
             LogicalPlanType::CreateView(create_view) => {
@@ -776,11 +777,10 @@ impl AsLogicalPlan for LogicalPlanNode {
                 builder.build()
             }
             LogicalPlanType::Union(union) => {
-                if union.inputs.len() < 2 {
-                    return internal_err!(
-                        "Protobuf deserialization error, Union was require at least two input."
-                    );
-                }
+                assert_or_internal_err!(
+                    union.inputs.len() >= 2,
+                    "Protobuf deserialization error, Union requires at least two inputs."
+                );
                 let (first, rest) = union.inputs.split_first().unwrap();
                 let mut builder = LogicalPlanBuilder::from(
                     first.try_into_logical_plan(ctx, extension_codec)?,
@@ -1451,7 +1451,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         PartitionMethod::RoundRobin(*partition_count as u64)
                     }
                     Partitioning::DistributeBy(_) => {
-                        return not_impl_err!("DistributeBy")
+                        return not_impl_err!("DistributeBy");
                     }
                 };
 
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 2774b5b6ba7c3..6e4e5d0b6eea4 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -22,21 +22,23 @@
 use std::collections::HashMap;
 
 use datafusion_common::{NullEquality, TableReference, UnnestOptions};
+use datafusion_expr::WriteOp;
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::expr::{
     self, AggregateFunctionParams, Alias, Between, BinaryExpr, Cast, GroupingSet, InList,
     Like, NullTreatment, Placeholder, ScalarFunction, Unnest,
 };
-use datafusion_expr::WriteOp;
 use datafusion_expr::{
-    logical_plan::PlanType, logical_plan::StringifiedPlan, Expr, JoinConstraint,
-    JoinType, SortExpr, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits,
-    WindowFunctionDefinition,
+    Expr, JoinConstraint, JoinType, SortExpr, TryCast, WindowFrame, WindowFrameBound,
+    WindowFrameUnits, WindowFunctionDefinition, logical_plan::PlanType,
+    logical_plan::StringifiedPlan,
 };
 
 use crate::protobuf::RecursionUnnestOption;
 use crate::protobuf::{
-    self,
+    self, AnalyzedLogicalPlanType, CubeNode, EmptyMessage, GroupingSetNode,
+    LogicalExprList, OptimizedLogicalPlanType, OptimizedPhysicalPlanType,
+    PlaceholderNode, RollupNode, ToProtoError as Error,
     plan_type::PlanTypeEnum::{
         AnalyzedLogicalPlan, FinalAnalyzedLogicalPlan, FinalLogicalPlan,
         FinalPhysicalPlan, FinalPhysicalPlanWithSchema, FinalPhysicalPlanWithStats,
@@ -44,9 +46,6 @@ use crate::protobuf::{
         InitialPhysicalPlanWithStats, OptimizedLogicalPlan, OptimizedPhysicalPlan,
         PhysicalPlanError,
     },
-    AnalyzedLogicalPlanType, CubeNode, EmptyMessage, GroupingSetNode, LogicalExprList,
-    OptimizedLogicalPlanType, OptimizedPhysicalPlanType, PlaceholderNode, RollupNode,
-    ToProtoError as Error,
 };
 
 use super::LogicalExtensionCodec;
@@ -307,16 +306,16 @@ pub fn serialize_expr(
         }
         Expr::WindowFunction(window_fun) => {
             let expr::WindowFunction {
-                ref fun,
+                fun,
                 params:
                     expr::WindowFunctionParams {
-                        ref args,
-                        ref partition_by,
-                        ref order_by,
-                        ref window_frame,
-                        ref null_treatment,
-                        ref distinct,
-                        ref filter,
+                        args,
+                        partition_by,
+                        order_by,
+                        window_frame,
+                        null_treatment,
+                        distinct,
+                        filter,
                     },
             } = window_fun.as_ref();
             let mut buf = Vec::new();
@@ -361,14 +360,14 @@ pub fn serialize_expr(
             }
         }
         Expr::AggregateFunction(expr::AggregateFunction {
-            ref func,
+            func,
             params:
                 AggregateFunctionParams {
-                    ref args,
-                    ref distinct,
-                    ref filter,
-                    ref order_by,
-                    ref null_treatment,
+                    args,
+                    distinct,
+                    filter,
+                    order_by,
+                    null_treatment,
                 },
         }) => {
             let mut buf = Vec::new();
@@ -395,7 +394,7 @@ pub fn serialize_expr(
         Expr::ScalarVariable(_, _) => {
             return Err(Error::General(
                 "Proto serialization error: Scalar Variable not supported".to_string(),
-            ))
+            ));
         }
         Expr::ScalarFunction(ScalarFunction { func, args }) => {
             let mut buf = Vec::new();
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 349ed79ddb4ad..073fdd858cdd3 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -25,16 +25,16 @@ use arrow::datatypes::Field;
 use arrow::ipc::reader::StreamReader;
 use chrono::{TimeZone, Utc};
 use datafusion_expr::dml::InsertOp;
-use object_store::path::Path;
 use object_store::ObjectMeta;
+use object_store::path::Path;
 
 use arrow::datatypes::Schema;
-use datafusion_common::{internal_datafusion_err, not_impl_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, internal_datafusion_err, not_impl_err};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::file_sink_config::FileSinkConfig;
-use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile};
+use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile, TableSchema};
 use datafusion_datasource_csv::file_format::CsvSink;
 use datafusion_datasource_json::file_format::JsonSink;
 #[cfg(feature = "parquet")]
@@ -42,11 +42,13 @@ use datafusion_datasource_parquet::file_format::ParquetSink;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::{FunctionRegistry, TaskContext};
 use datafusion_expr::WindowFunctionDefinition;
+use datafusion_physical_expr::projection::{ProjectionExpr, ProjectionExprs};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion_physical_plan::expressions::{
-    in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
-    Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
+    BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr, Literal,
+    NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn, in_list,
 };
+use datafusion_physical_plan::joins::{HashExpr, SeededRandomState};
 use datafusion_physical_plan::windows::{create_window_expr, schema_add_window_field};
 use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 use datafusion_proto_common::common::proto_error;
@@ -398,6 +400,20 @@ pub fn parse_physical_expr(
                 codec,
             )?,
         )),
+        ExprType::HashExpr(hash_expr) => {
+            let on_columns =
+                parse_physical_exprs(&hash_expr.on_columns, ctx, input_schema, codec)?;
+            Arc::new(HashExpr::new(
+                on_columns,
+                SeededRandomState::with_seeds(
+                    hash_expr.seed0,
+                    hash_expr.seed1,
+                    hash_expr.seed2,
+                    hash_expr.seed3,
+                ),
+                hash_expr.description.clone(),
+            ))
+        }
         ExprType::Extension(extension) => {
             let inputs: Vec<Arc<dyn PhysicalExpr>> = extension
                 .inputs
@@ -481,38 +497,17 @@ pub fn parse_protobuf_file_scan_schema(
     Ok(Arc::new(convert_required!(proto.schema)?))
 }
 
-pub fn parse_protobuf_file_scan_config(
+/// Parses a TableSchema from protobuf, extracting the file schema and partition columns
+pub fn parse_table_schema_from_proto(
     proto: &protobuf::FileScanExecConf,
-    ctx: &TaskContext,
-    codec: &dyn PhysicalExtensionCodec,
-    file_source: Arc<dyn FileSource>,
-) -> Result<FileScanConfig> {
+) -> Result<TableSchema> {
     let schema: Arc<Schema> = parse_protobuf_file_scan_schema(proto)?;
-    let projection = proto
-        .projection
-        .iter()
-        .map(|i| *i as usize)
-        .collect::<Vec<_>>();
-
-    let constraints = convert_required!(proto.constraints)?;
-    let statistics = convert_required!(proto.statistics)?;
-
-    let file_groups = proto
-        .file_groups
-        .iter()
-        .map(|f| f.try_into())
-        .collect::<Result<Vec<_>, _>>()?;
-
-    let object_store_url = match proto.object_store_url.is_empty() {
-        false => ObjectStoreUrl::parse(&proto.object_store_url)?,
-        true => ObjectStoreUrl::local_filesystem(),
-    };
 
     // Reacquire the partition column types from the schema before removing them below.
     let table_partition_cols = proto
         .table_partition_cols
         .iter()
-        .map(|col| Ok(schema.field_with_name(col)?.clone()))
+        .map(|col| Ok(Arc::new(schema.field_with_name(col)?.clone())))
         .collect::<Result<Vec<_>>>()?;
 
     // Remove partition columns from the schema after recreating table_partition_cols
@@ -530,6 +525,31 @@ pub fn parse_protobuf_file_scan_config(
         .with_metadata(schema.metadata.clone()),
     );
 
+    Ok(TableSchema::new(file_schema, table_partition_cols))
+}
+
+pub fn parse_protobuf_file_scan_config(
+    proto: &protobuf::FileScanExecConf,
+    ctx: &TaskContext,
+    codec: &dyn PhysicalExtensionCodec,
+    file_source: Arc<dyn FileSource>,
+) -> Result<FileScanConfig> {
+    let schema: Arc<Schema> = parse_protobuf_file_scan_schema(proto)?;
+
+    let constraints = convert_required!(proto.constraints)?;
+    let statistics = convert_required!(proto.statistics)?;
+
+    let file_groups = proto
+        .file_groups
+        .iter()
+        .map(|f| f.try_into())
+        .collect::<Result<Vec<_>, _>>()?;
+
+    let object_store_url = match proto.object_store_url.is_empty() {
+        false => ObjectStoreUrl::parse(&proto.object_store_url)?,
+        true => ObjectStoreUrl::local_filesystem(),
+    };
+
     let mut output_ordering = vec![];
     for node_collection in &proto.output_ordering {
         let sort_exprs = parse_physical_sort_exprs(
@@ -541,13 +561,39 @@ pub fn parse_protobuf_file_scan_config(
         output_ordering.extend(LexOrdering::new(sort_exprs));
     }
 
-    let config = FileScanConfigBuilder::new(object_store_url, file_schema, file_source)
+    // Parse projection expressions if present and apply to file source
+    let file_source = if let Some(proto_projection_exprs) = &proto.projection_exprs {
+        let projection_exprs: Vec<ProjectionExpr> = proto_projection_exprs
+            .projections
+            .iter()
+            .map(|proto_expr| {
+                let expr = parse_physical_expr(
+                    proto_expr.expr.as_ref().ok_or_else(|| {
+                        internal_datafusion_err!("ProjectionExpr missing expr field")
+                    })?,
+                    ctx,
+                    &schema,
+                    codec,
+                )?;
+                Ok(ProjectionExpr::new(expr, proto_expr.alias.clone()))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let projection_exprs = ProjectionExprs::new(projection_exprs);
+
+        // Apply projection to file source
+        file_source
+            .try_pushdown_projection(&projection_exprs)?
+            .unwrap_or(file_source)
+    } else {
+        file_source
+    };
+
+    let config = FileScanConfigBuilder::new(object_store_url, file_source)
         .with_file_groups(file_groups)
         .with_constraints(constraints)
         .with_statistics(statistics)
-        .with_projection_indices(Some(projection))
         .with_limit(proto.limit.as_ref().map(|sl| sl.limit as usize))
-        .with_table_partition_cols(table_partition_cols)
         .with_output_ordering(output_ordering)
         .with_batch_size(proto.batch_size.map(|s| s as usize))
         .build();
@@ -702,8 +748,8 @@ mod tests {
     use super::*;
     use chrono::{TimeZone, Utc};
     use datafusion_datasource::PartitionedFile;
-    use object_store::path::Path;
     use object_store::ObjectMeta;
+    use object_store::path::Path;
 
     #[test]
     fn partitioned_file_path_roundtrip_percent_encoded() {
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index e5f4a1f7d0267..4ff90b61eed9c 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -24,6 +24,7 @@ use crate::common::{byte_to_string, str_to_byte};
 use crate::physical_plan::from_proto::{
     parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
     parse_physical_window_expr, parse_protobuf_file_scan_config, parse_record_batches,
+    parse_table_schema_from_proto,
 };
 use crate::physical_plan::to_proto::{
     serialize_file_scan_config, serialize_maybe_filter, serialize_physical_aggr_expr,
@@ -34,16 +35,17 @@ use crate::protobuf::physical_aggregate_expr_node::AggregateFunction;
 use crate::protobuf::physical_expr_node::ExprType;
 use crate::protobuf::physical_plan_node::PhysicalPlanType;
 use crate::protobuf::{
-    self, proto_error, window_agg_exec_node, ListUnnest as ProtoListUnnest, SortExprNode,
-    SortMergeJoinExecNode,
+    self, ListUnnest as ProtoListUnnest, SortExprNode, SortMergeJoinExecNode,
+    proto_error, window_agg_exec_node,
 };
 use crate::{convert_required, into_required};
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::{IntervalMonthDayNanoType, SchemaRef};
 use datafusion_catalog::memory::MemorySourceConfig;
+use datafusion_common::config::CsvOptions;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
+    DataFusionError, Result, internal_datafusion_err, internal_err, not_impl_err,
 };
 #[cfg(feature = "parquet")]
 use datafusion_datasource::file::FileSource;
@@ -98,8 +100,10 @@ use datafusion_physical_plan::unnest::{ListUnnest, UnnestExec};
 use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use datafusion_physical_plan::{ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr};
 
-use prost::bytes::BufMut;
+use datafusion_physical_expr::async_scalar_function::AsyncFuncExpr;
+use datafusion_physical_plan::async_func::AsyncFuncExec;
 use prost::Message;
+use prost::bytes::BufMut;
 
 pub mod from_proto;
 pub mod to_proto;
@@ -151,11 +155,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             PhysicalPlanType::JsonScan(scan) => {
                 self.try_into_json_scan_physical_plan(scan, ctx, extension_codec)
             }
-            #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
             PhysicalPlanType::ParquetScan(scan) => {
                 self.try_into_parquet_scan_physical_plan(scan, ctx, extension_codec)
             }
-            #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
             PhysicalPlanType::AvroScan(scan) => {
                 self.try_into_avro_scan_physical_plan(scan, ctx, extension_codec)
             }
@@ -249,6 +251,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             PhysicalPlanType::SortMergeJoin(sort_join) => {
                 self.try_into_sort_join(sort_join, ctx, extension_codec)
             }
+            PhysicalPlanType::AsyncFunc(async_func) => {
+                self.try_into_async_func_physical_plan(async_func, ctx, extension_codec)
+            }
         }
     }
 
@@ -360,13 +365,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             );
         }
 
-        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>() {
-            if let Some(node) = protobuf::PhysicalPlanNode::try_from_data_source_exec(
+        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>()
+            && let Some(node) = protobuf::PhysicalPlanNode::try_from_data_source_exec(
                 data_source_exec,
                 extension_codec,
-            )? {
-                return Ok(node);
-            }
+            )?
+        {
+            return Ok(node);
         }
 
         if let Some(exec) = plan.downcast_ref::<CoalescePartitionsExec>() {
@@ -429,13 +434,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             );
         }
 
-        if let Some(exec) = plan.downcast_ref::<DataSinkExec>() {
-            if let Some(node) = protobuf::PhysicalPlanNode::try_from_data_sink_exec(
+        if let Some(exec) = plan.downcast_ref::<DataSinkExec>()
+            && let Some(node) = protobuf::PhysicalPlanNode::try_from_data_sink_exec(
                 exec,
                 extension_codec,
-            )? {
-                return Ok(node);
-            }
+            )?
+        {
+            return Ok(node);
         }
 
         if let Some(exec) = plan.downcast_ref::<UnnestExec>() {
@@ -452,12 +457,18 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             );
         }
 
-        if let Some(exec) = plan.downcast_ref::<LazyMemoryExec>() {
-            if let Some(node) =
+        if let Some(exec) = plan.downcast_ref::<LazyMemoryExec>()
+            && let Some(node) =
                 protobuf::PhysicalPlanNode::try_from_lazy_memory_exec(exec)?
-            {
-                return Ok(node);
-            }
+        {
+            return Ok(node);
+        }
+
+        if let Some(exec) = plan.downcast_ref::<AsyncFuncExec>() {
+            return protobuf::PhysicalPlanNode::try_from_async_func_exec(
+                exec,
+                extension_codec,
+            );
         }
 
         let mut buf: Vec<u8> = vec![];
@@ -612,14 +623,22 @@ impl protobuf::PhysicalPlanNode {
             None
         };
 
+        // Parse table schema with partition columns
+        let table_schema =
+            parse_table_schema_from_proto(scan.base_conf.as_ref().unwrap())?;
+
+        let csv_options = CsvOptions {
+            has_header: Some(scan.has_header),
+            delimiter: str_to_byte(&scan.delimiter, "delimiter")?,
+            quote: str_to_byte(&scan.quote, "quote")?,
+            newlines_in_values: Some(scan.newlines_in_values),
+            ..Default::default()
+        };
         let source = Arc::new(
-            CsvSource::new(
-                scan.has_header,
-                str_to_byte(&scan.delimiter, "delimiter")?,
-                0,
-            )
-            .with_escape(escape)
-            .with_comment(comment),
+            CsvSource::new(table_schema)
+                .with_csv_options(csv_options)
+                .with_escape(escape)
+                .with_comment(comment),
         );
 
         let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config(
@@ -628,7 +647,6 @@ impl protobuf::PhysicalPlanNode {
             extension_codec,
             source,
         )?)
-        .with_newlines_in_values(scan.newlines_in_values)
         .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
         .build();
         Ok(DataSourceExec::from_data_source(conf))
@@ -641,21 +659,22 @@ impl protobuf::PhysicalPlanNode {
 
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        let base_conf = scan.base_conf.as_ref().unwrap();
+        let table_schema = parse_table_schema_from_proto(base_conf)?;
         let scan_conf = parse_protobuf_file_scan_config(
-            scan.base_conf.as_ref().unwrap(),
+            base_conf,
             ctx,
             extension_codec,
-            Arc::new(JsonSource::new()),
+            Arc::new(JsonSource::new(table_schema)),
         )?;
         Ok(DataSourceExec::from_data_source(scan_conf))
     }
 
-    #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
+    #[cfg_attr(not(feature = "parquet"), expect(unused_variables))]
     fn try_into_parquet_scan_physical_plan(
         &self,
         scan: &protobuf::ParquetScanExecNode,
         ctx: &TaskContext,
-
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "parquet")]
@@ -695,7 +714,12 @@ impl protobuf::PhysicalPlanNode {
             if let Some(table_options) = scan.parquet_options.as_ref() {
                 options = table_options.try_into()?;
             }
-            let mut source = ParquetSource::new(options);
+
+            // Parse table schema with partition columns
+            let table_schema = parse_table_schema_from_proto(base_conf)?;
+
+            let mut source =
+                ParquetSource::new(table_schema).with_table_parquet_options(options);
 
             if let Some(predicate) = predicate {
                 source = source.with_predicate(predicate);
@@ -709,27 +733,31 @@ impl protobuf::PhysicalPlanNode {
             Ok(DataSourceExec::from_data_source(base_config))
         }
         #[cfg(not(feature = "parquet"))]
-        panic!("Unable to process a Parquet PhysicalPlan when `parquet` feature is not enabled")
+        panic!(
+            "Unable to process a Parquet PhysicalPlan when `parquet` feature is not enabled"
+        )
     }
 
-    #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
+    #[cfg_attr(not(feature = "avro"), expect(unused_variables))]
     fn try_into_avro_scan_physical_plan(
         &self,
         scan: &protobuf::AvroScanExecNode,
         ctx: &TaskContext,
-
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "avro")]
         {
+            let table_schema =
+                parse_table_schema_from_proto(scan.base_conf.as_ref().unwrap())?;
             let conf = parse_protobuf_file_scan_config(
                 scan.base_conf.as_ref().unwrap(),
                 ctx,
                 extension_codec,
-                Arc::new(AvroSource::new()),
+                Arc::new(AvroSource::new(table_schema)),
             )?;
             Ok(DataSourceExec::from_data_source(conf))
         }
+
         #[cfg(not(feature = "avro"))]
         panic!("Unable to process a Avro PhysicalPlan when `avro` feature is not enabled")
     }
@@ -982,6 +1010,8 @@ impl protobuf::PhysicalPlanNode {
             vec![]
         };
 
+        let has_grouping_set = hash_agg.has_grouping_set;
+
         let input_schema = hash_agg.input_schema.as_ref().ok_or_else(|| {
             internal_datafusion_err!("input_schema in AggregateNode is missing.")
         })?;
@@ -1079,7 +1109,7 @@ impl protobuf::PhysicalPlanNode {
 
         let agg = AggregateExec::try_new(
             agg_mode,
-            PhysicalGroupBy::new(group_expr, null_expr, groups),
+            PhysicalGroupBy::new(group_expr, null_expr, groups, has_grouping_set),
             physical_aggr_expr,
             physical_filter_expr,
             input,
@@ -1956,6 +1986,44 @@ impl protobuf::PhysicalPlanNode {
         Ok(Arc::new(CooperativeExec::new(input)))
     }
 
+    fn try_into_async_func_physical_plan(
+        &self,
+        async_func: &protobuf::AsyncFuncExecNode,
+        ctx: &TaskContext,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&async_func.input, ctx, extension_codec)?;
+
+        if async_func.async_exprs.len() != async_func.async_expr_names.len() {
+            return internal_err!(
+                "AsyncFuncExecNode async_exprs length does not match async_expr_names"
+            );
+        }
+
+        let async_exprs = async_func
+            .async_exprs
+            .iter()
+            .zip(async_func.async_expr_names.iter())
+            .map(|(expr, name)| {
+                let physical_expr = parse_physical_expr(
+                    expr,
+                    ctx,
+                    input.schema().as_ref(),
+                    extension_codec,
+                )?;
+
+                Ok(Arc::new(AsyncFuncExpr::try_new(
+                    name.clone(),
+                    physical_expr,
+                    input.schema().as_ref(),
+                )?))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(Arc::new(AsyncFuncExec::try_new(async_exprs, input)?))
+    }
+
     fn try_from_explain_exec(
         exec: &ExplainExec,
         _extension_codec: &dyn PhysicalExtensionCodec,
@@ -2474,6 +2542,7 @@ impl protobuf::PhysicalPlanNode {
                     null_expr,
                     groups,
                     limit,
+                    has_grouping_set: exec.group_expr().has_grouping_set(),
                 },
             ))),
         })
@@ -2562,7 +2631,7 @@ impl protobuf::PhysicalPlanNode {
                             } else {
                                 None
                             },
-                            newlines_in_values: maybe_csv.newlines_in_values(),
+                            newlines_in_values: csv_config.newlines_in_values(),
                             truncate_rows: csv_config.truncate_rows(),
                         },
                     )),
@@ -3206,6 +3275,34 @@ impl protobuf::PhysicalPlanNode {
 
         Ok(None)
     }
+
+    fn try_from_async_func_exec(
+        exec: &AsyncFuncExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            Arc::clone(exec.input()),
+            extension_codec,
+        )?;
+
+        let mut async_exprs = vec![];
+        let mut async_expr_names = vec![];
+
+        for async_expr in exec.async_exprs() {
+            async_exprs.push(serialize_physical_expr(&async_expr.func, extension_codec)?);
+            async_expr_names.push(async_expr.name.clone())
+        }
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::AsyncFunc(Box::new(
+                protobuf::AsyncFuncExecNode {
+                    input: Some(Box::new(input)),
+                    async_exprs,
+                    async_expr_names,
+                },
+            ))),
+        })
+    }
 }
 
 pub trait AsExecutionPlan: Debug + Send + Sync + Clone {
@@ -3423,7 +3520,6 @@ impl PhysicalExtensionCodec for ComposedPhysicalExtensionCodec {
 fn into_physical_plan(
     node: &Option<Box<protobuf::PhysicalPlanNode>>,
     ctx: &TaskContext,
-
     extension_codec: &dyn PhysicalExtensionCodec,
 ) -> Result<Arc<dyn ExecutionPlan>> {
     if let Some(field) = node {
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index dc0a78dbccf11..9558effb8a2a6 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -21,7 +21,7 @@ use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
 use arrow::ipc::writer::StreamWriter;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
+    DataFusionError, Result, internal_datafusion_err, internal_err, not_impl_err,
 };
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::file_sink_config::FileSink;
@@ -32,8 +32,8 @@ use datafusion_datasource_json::file_format::JsonSink;
 #[cfg(feature = "parquet")]
 use datafusion_datasource_parquet::file_format::ParquetSink;
 use datafusion_expr::WindowFrame;
-use datafusion_physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
 use datafusion_physical_expr::ScalarFunctionExpr;
+use datafusion_physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
 use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr;
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 use datafusion_physical_plan::expressions::LikeExpr;
@@ -41,17 +41,19 @@ use datafusion_physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
     Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
 };
+use datafusion_physical_plan::joins::{HashExpr, HashTableLookupExpr};
 use datafusion_physical_plan::udaf::AggregateFunctionExpr;
 use datafusion_physical_plan::windows::{PlainAggregateWindowExpr, WindowUDFExpr};
 use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 
 use crate::protobuf::{
-    self, physical_aggregate_expr_node, physical_window_expr_node, PhysicalSortExprNode,
-    PhysicalSortExprNodeCollection,
+    self, PhysicalSortExprNode, PhysicalSortExprNodeCollection,
+    physical_aggregate_expr_node, physical_window_expr_node,
 };
 
 use super::PhysicalExtensionCodec;
 
+#[expect(clippy::needless_pass_by_value)]
 pub fn serialize_physical_aggr_expr(
     aggr_expr: Arc<AggregateFunctionExpr>,
     codec: &dyn PhysicalExtensionCodec,
@@ -226,6 +228,30 @@ pub fn serialize_physical_expr(
     let value = snapshot_physical_expr(Arc::clone(value))?;
     let expr = value.as_any();
 
+    // HashTableLookupExpr is used for dynamic filter pushdown in hash joins.
+    // It contains an Arc<dyn JoinHashMapType> (the build-side hash table) which
+    // cannot be serialized - the hash table is a runtime structure built during
+    // execution on the build side.
+    //
+    // We replace it with lit(true) which is safe because:
+    // 1. The filter is a performance optimization, not a correctness requirement
+    // 2. lit(true) passes all rows, so no valid rows are incorrectly filtered out
+    // 3. The join itself will still produce correct results, just without the
+    //    benefit of early filtering on the probe side
+    //
+    // In distributed execution, the remote worker won't have access to the hash
+    // table anyway, so the best we can do is skip this optimization.
+    if expr.downcast_ref::<HashTableLookupExpr>().is_some() {
+        let value = datafusion_proto_common::ScalarValue {
+            value: Some(datafusion_proto_common::scalar_value::Value::BoolValue(
+                true,
+            )),
+        };
+        return Ok(protobuf::PhysicalExprNode {
+            expr_type: Some(protobuf::physical_expr_node::ExprType::Literal(value)),
+        });
+    }
+
     if let Some(expr) = expr.downcast_ref::<Column>() {
         Ok(protobuf::PhysicalExprNode {
             expr_type: Some(protobuf::physical_expr_node::ExprType::Column(
@@ -384,6 +410,20 @@ pub fn serialize_physical_expr(
                 },
             ))),
         })
+    } else if let Some(expr) = expr.downcast_ref::<HashExpr>() {
+        let (s0, s1, s2, s3) = expr.seeds();
+        Ok(protobuf::PhysicalExprNode {
+            expr_type: Some(protobuf::physical_expr_node::ExprType::HashExpr(
+                protobuf::PhysicalHashExprNode {
+                    on_columns: serialize_physical_exprs(expr.on_columns(), codec)?,
+                    seed0: s0,
+                    seed1: s1,
+                    seed2: s2,
+                    seed3: s3,
+                    description: expr.description().to_string(),
+                },
+            )),
+        })
     } else {
         let mut buf: Vec<u8> = vec![];
         match codec.try_encode_expr(&value, &mut buf) {
@@ -527,18 +567,31 @@ pub fn serialize_file_scan_config(
             .with_metadata(conf.file_schema().metadata.clone()),
     );
 
+    let projection_exprs = conf
+        .file_source
+        .projection()
+        .as_ref()
+        .map(|projection_exprs| {
+            let projections = projection_exprs.iter().cloned().collect::<Vec<_>>();
+            Ok::<_, DataFusionError>(protobuf::ProjectionExprs {
+                projections: projections
+                    .into_iter()
+                    .map(|expr| {
+                        Ok(protobuf::ProjectionExpr {
+                            alias: expr.alias.to_string(),
+                            expr: Some(serialize_physical_expr(&expr.expr, codec)?),
+                        })
+                    })
+                    .collect::<Result<Vec<_>>>()?,
+            })
+        })
+        .transpose()?;
+
     Ok(protobuf::FileScanExecConf {
         file_groups,
-        statistics: Some((&conf.file_source.statistics().unwrap()).into()),
+        statistics: Some((&conf.statistics()).into()),
         limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
-        projection: conf
-            .projection_exprs
-            .as_ref()
-            .map(|p| p.column_indices())
-            .unwrap_or((0..schema.fields().len()).collect::<Vec<_>>())
-            .iter()
-            .map(|n| *n as u32)
-            .collect(),
+        projection: vec![],
         schema: Some(schema.as_ref().try_into()?),
         table_partition_cols: conf
             .table_partition_cols()
@@ -554,6 +607,7 @@ pub fn serialize_file_scan_config(
             .collect::<Vec<_>>(),
         constraints: Some(conf.constraints.clone().into()),
         batch_size: conf.batch_size.map(|s| s as u64),
+        projection_exprs,
     })
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index bfd693e6a0f83..77676fc2fd2d9 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -19,9 +19,9 @@ use arrow::array::{
     ArrayRef, FixedSizeListArray, Int32Builder, MapArray, MapBuilder, StringBuilder,
 };
 use arrow::datatypes::{
-    DataType, Field, FieldRef, Fields, Int32Type, IntervalDayTimeType,
-    IntervalMonthDayNanoType, IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields,
-    UnionMode, DECIMAL256_MAX_PRECISION,
+    DECIMAL256_MAX_PRECISION, DataType, Field, FieldRef, Fields, Int32Type,
+    IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, Schema, SchemaRef,
+    TimeUnit, UnionFields, UnionMode,
 };
 use arrow::util::pretty::pretty_format_batches;
 use datafusion::datasource::file_format::json::{JsonFormat, JsonFormatFactory};
@@ -29,8 +29,8 @@ use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::execution::options::ArrowReadOptions;
-use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion;
 use datafusion::optimizer::Optimizer;
+use datafusion::optimizer::optimize_unions::OptimizeUnions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_functions_aggregate::sum::sum_distinct;
 use prost::Message;
@@ -42,13 +42,13 @@ use std::sync::Arc;
 use std::vec;
 
 use datafusion::catalog::{TableProvider, TableProviderFactory};
+use datafusion::datasource::DefaultTableSource;
 use datafusion::datasource::file_format::arrow::ArrowFormatFactory;
 use datafusion::datasource::file_format::csv::CsvFormatFactory;
 use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
-use datafusion::datasource::file_format::{format_as_file_type, DefaultFileType};
-use datafusion::datasource::DefaultTableSource;
-use datafusion::execution::session_state::SessionStateBuilder;
+use datafusion::datasource::file_format::{DefaultFileType, format_as_file_type};
 use datafusion::execution::FunctionRegistry;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::functions_aggregate::count::count_udaf;
 use datafusion::functions_aggregate::expr_fn::{
     approx_median, approx_percentile_cont, approx_percentile_cont_with_weight, count,
@@ -68,8 +68,8 @@ use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::TableOptions;
 use datafusion_common::scalar::ScalarStructBuilder;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, DFSchemaRef,
-    DataFusionError, Result, ScalarValue, TableReference,
+    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference,
+    internal_datafusion_err, internal_err, not_impl_err, plan_err,
 };
 use datafusion_execution::TaskContext;
 use datafusion_expr::dml::CopyTo;
@@ -102,7 +102,7 @@ use datafusion_proto::logical_plan::file_formats::{
 };
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
 use datafusion_proto::logical_plan::{
-    from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+    DefaultLogicalExtensionCodec, LogicalExtensionCodec, from_proto,
 };
 use datafusion_proto::protobuf;
 
@@ -549,6 +549,8 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     csv_format.timestamp_format = Some("HH:mm:ss.SSSSSS".to_string());
     csv_format.time_format = Some("HH:mm:ss".to_string());
     csv_format.null_value = Some("NIL".to_string());
+    csv_format.compression = CompressionTypeVariant::GZIP;
+    csv_format.compression_level = Some(6);
 
     let file_type = format_as_file_type(Arc::new(CsvFormatFactory::new_with_options(
         csv_format.clone(),
@@ -593,7 +595,9 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
             assert_eq!(csv_format.datetime_format, csv_config.datetime_format);
             assert_eq!(csv_format.timestamp_format, csv_config.timestamp_format);
             assert_eq!(csv_format.time_format, csv_config.time_format);
-            assert_eq!(csv_format.null_value, csv_config.null_value)
+            assert_eq!(csv_format.null_value, csv_config.null_value);
+            assert_eq!(csv_format.compression, csv_config.compression);
+            assert_eq!(csv_format.compression_level, csv_config.compression_level);
         }
         _ => panic!(),
     }
@@ -1088,11 +1092,13 @@ async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()>
     let prepared = LogicalPlanBuilder::new(plan)
         .prepare(
             "".to_string(),
-            vec![Field::new("", DataType::Int32, true)
-                .with_metadata(
-                    [("some_key".to_string(), "some_value".to_string())].into(),
-                )
-                .into()],
+            vec![
+                Field::new("", DataType::Int32, true)
+                    .with_metadata(
+                        [("some_key".to_string(), "some_value".to_string())].into(),
+                    )
+                    .into(),
+            ],
         )
         .unwrap()
         .plan()
@@ -1981,6 +1987,10 @@ fn roundtrip_binary_op() {
     test(Operator::RegexNotMatch);
     test(Operator::RegexIMatch);
     test(Operator::RegexMatch);
+    test(Operator::LikeMatch);
+    test(Operator::ILikeMatch);
+    test(Operator::NotLikeMatch);
+    test(Operator::NotILikeMatch);
     test(Operator::BitwiseShiftRight);
     test(Operator::BitwiseShiftLeft);
     test(Operator::BitwiseAnd);
@@ -2744,7 +2754,7 @@ async fn roundtrip_union_query() -> Result<()> {
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // proto deserialization only supports 2-way union, hence this plan has nested unions
     // apply the flatten unions optimizer rule to be able to compare
-    let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
+    let optimizer = Optimizer::with_rules(vec![Arc::new(OptimizeUnions::new())]);
     let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?;
     assert_eq!(
         format!("{}", plan.display_indent_schema()),
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index c8b2bc02e447b..aa5458849330f 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -33,6 +33,7 @@ use arrow::datatypes::{Fields, TimeUnit};
 use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion::physical_plan::metrics::MetricType;
+use datafusion_datasource::TableSchema;
 use datafusion_expr::dml::InsertOp;
 use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
@@ -51,8 +52,8 @@ use datafusion::datasource::listing::{
 };
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{
-    wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileGroup,
-    FileScanConfigBuilder, FileSinkConfig, FileSource, ParquetSource,
+    FileGroup, FileScanConfigBuilder, FileSinkConfig, ParquetSource,
+    wrap_partition_type_in_dict, wrap_partition_value_in_dict,
 };
 use datafusion::datasource::sink::DataSinkExec;
 use datafusion::datasource::source::DataSourceExec;
@@ -61,7 +62,7 @@ use datafusion::functions_aggregate::count::count_udaf;
 use datafusion::functions_aggregate::sum::sum_udaf;
 use datafusion::functions_window::nth_value::nth_value_udwf;
 use datafusion::functions_window::row_number::row_number_udwf;
-use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
+use datafusion::logical_expr::{JoinType, Operator, Volatility, create_udf};
 use datafusion::physical_expr::expressions::Literal;
 use datafusion::physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
 use datafusion::physical_expr::{
@@ -74,12 +75,12 @@ use datafusion::physical_plan::analyze::AnalyzeExec;
 use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::physical_plan::empty::EmptyExec;
 use datafusion::physical_plan::expressions::{
-    binary, cast, col, in_list, like, lit, BinaryExpr, Column, NotExpr, PhysicalSortExpr,
+    BinaryExpr, Column, NotExpr, PhysicalSortExpr, binary, cast, col, in_list, like, lit,
 };
 use datafusion::physical_plan::filter::FilterExec;
 use datafusion::physical_plan::joins::{
-    HashJoinExec, NestedLoopJoinExec, PartitionMode, SortMergeJoinExec,
-    StreamJoinPartitionMode, SymmetricHashJoinExec,
+    HashJoinExec, HashTableLookupExpr, NestedLoopJoinExec, PartitionMode,
+    SortMergeJoinExec, StreamJoinPartitionMode, SymmetricHashJoinExec,
 };
 use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
@@ -89,11 +90,11 @@ use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
 use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
 use datafusion::physical_plan::windows::{
-    create_udwf_window_expr, BoundedWindowAggExec, PlainAggregateWindowExpr,
-    WindowAggExec,
+    BoundedWindowAggExec, PlainAggregateWindowExpr, WindowAggExec,
+    create_udwf_window_expr,
 };
 use datafusion::physical_plan::{
-    displayable, ExecutionPlan, InputOrderMode, Partitioning, PhysicalExpr, Statistics,
+    ExecutionPlan, InputOrderMode, Partitioning, PhysicalExpr, Statistics, displayable,
 };
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use datafusion::scalar::ScalarValue;
@@ -103,16 +104,19 @@ use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, NullEquality,
-    Result, UnnestOptions,
+    DataFusionError, NullEquality, Result, UnnestOptions, internal_datafusion_err,
+    internal_err, not_impl_err,
 };
+use datafusion_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl};
 use datafusion_expr::{
-    Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarUDF,
-    Signature, SimpleAggregateUDF, WindowFrame, WindowFrameBound, WindowUDF,
+    Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue,
+    ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF,
+    WindowFrame, WindowFrameBound, WindowUDF,
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::nth_value::nth_value_udaf;
 use datafusion_functions_aggregate::string_agg::string_agg_udaf;
+use datafusion_physical_plan::joins::join_hash_map::JoinHashMapU32;
 use datafusion_proto::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
@@ -595,14 +599,13 @@ fn roundtrip_aggregate_with_limit() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates =
-        vec![
-            AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
-                .schema(Arc::clone(&schema))
-                .alias("AVG(b)")
-                .build()
-                .map(Arc::new)?,
-        ];
+    let aggregates = vec![
+        AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+            .schema(Arc::clone(&schema))
+            .alias("AVG(b)")
+            .build()
+            .map(Arc::new)?,
+    ];
 
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
@@ -625,14 +628,16 @@ fn roundtrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates = vec![AggregateExprBuilder::new(
-        approx_percentile_cont_udaf(),
-        vec![col("b", &schema)?, lit(0.5)],
-    )
-    .schema(Arc::clone(&schema))
-    .alias("APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY b)")
-    .build()
-    .map(Arc::new)?];
+    let aggregates = vec![
+        AggregateExprBuilder::new(
+            approx_percentile_cont_udaf(),
+            vec![col("b", &schema)?, lit(0.5)],
+        )
+        .schema(Arc::clone(&schema))
+        .alias("APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY b)")
+        .build()
+        .map(Arc::new)?,
+    ];
 
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
@@ -661,15 +666,14 @@ fn roundtrip_aggregate_with_sort() -> Result<()> {
         },
     }];
 
-    let aggregates =
-        vec![
-            AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?])
-                .schema(Arc::clone(&schema))
-                .alias("ARRAY_AGG(b)")
-                .order_by(sort_exprs)
-                .build()
-                .map(Arc::new)?,
-        ];
+    let aggregates = vec![
+        AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?])
+            .schema(Arc::clone(&schema))
+            .alias("ARRAY_AGG(b)")
+            .order_by(sort_exprs)
+            .build()
+            .map(Arc::new)?,
+    ];
 
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
@@ -729,14 +733,13 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates =
-        vec![
-            AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?])
-                .schema(Arc::clone(&schema))
-                .alias("example_agg")
-                .build()
-                .map(Arc::new)?,
-        ];
+    let aggregates = vec![
+        AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?])
+            .schema(Arc::clone(&schema))
+            .alias("example_agg")
+            .build()
+            .map(Arc::new)?,
+    ];
 
     roundtrip_test_with_context(
         Arc::new(AggregateExec::try_new(
@@ -883,25 +886,26 @@ fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> {
     let mut options = TableParquetOptions::new();
     options.global.pushdown_filters = true;
 
-    let file_source = Arc::new(ParquetSource::new(options).with_predicate(predicate));
+    let file_source = Arc::new(
+        ParquetSource::new(Arc::clone(&file_schema))
+            .with_table_parquet_options(options)
+            .with_predicate(predicate),
+    );
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(Statistics {
-        num_rows: Precision::Inexact(100),
-        total_byte_size: Precision::Inexact(1024),
-        column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(vec![
-            Field::new("col", DataType::Utf8, false),
-        ]))),
-    })
-    .build();
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(Statistics {
+                num_rows: Precision::Inexact(100),
+                total_byte_size: Precision::Inexact(1024),
+                column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(
+                    vec![Field::new("col", DataType::Utf8, false)],
+                ))),
+            })
+            .build();
 
     roundtrip_test(DataSourceExec::from_data_source(scan_config))
 }
@@ -914,21 +918,21 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> {
         vec![wrap_partition_value_in_dict(ScalarValue::Int64(Some(0)))];
     let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, false)]));
 
-    let file_source = Arc::new(ParquetSource::default());
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema,
-        file_source,
-    )
-    .with_projection_indices(Some(vec![0, 1]))
-    .with_file_group(FileGroup::new(vec![file_group]))
-    .with_table_partition_cols(vec![Field::new(
-        "part".to_string(),
-        wrap_partition_type_in_dict(DataType::Int16),
-        false,
-    )])
-    .with_newlines_in_values(false)
-    .build();
+    let table_schema = TableSchema::new(
+        schema.clone(),
+        vec![Arc::new(Field::new(
+            "part".to_string(),
+            wrap_partition_type_in_dict(DataType::Int16),
+            false,
+        ))],
+    );
+
+    let file_source = Arc::new(ParquetSource::new(table_schema.clone()));
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_projection_indices(Some(vec![0, 1]))?
+            .with_file_group(FileGroup::new(vec![file_group]))
+            .build();
 
     roundtrip_test(DataSourceExec::from_data_source(scan_config))
 }
@@ -942,26 +946,25 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> {
         inner: Arc::new(Column::new("col", 1)),
     });
 
-    let file_source =
-        Arc::new(ParquetSource::default().with_predicate(custom_predicate_expr));
+    let file_source = Arc::new(
+        ParquetSource::new(Arc::clone(&file_schema))
+            .with_predicate(custom_predicate_expr),
+    );
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(Statistics {
-        num_rows: Precision::Inexact(100),
-        total_byte_size: Precision::Inexact(1024),
-        column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(vec![
-            Field::new("col", DataType::Utf8, false),
-        ]))),
-    })
-    .build();
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(Statistics {
+                num_rows: Precision::Inexact(100),
+                total_byte_size: Precision::Inexact(1024),
+                column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(
+                    vec![Field::new("col", DataType::Utf8, false)],
+                ))),
+            })
+            .build();
 
     #[derive(Debug, Clone, Eq)]
     struct CustomPredicateExpr {
@@ -1273,7 +1276,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
 
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![aggr_expr],
         vec![None],
         window,
@@ -1391,7 +1394,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
 
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![aggr_expr],
         vec![None],
         window,
@@ -1803,19 +1806,16 @@ async fn roundtrip_projection_source() -> Result<()> {
 
     let statistics = Statistics::new_unknown(&schema);
 
-    let file_source = ParquetSource::default().with_statistics(statistics.clone());
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(statistics)
-    .with_projection_indices(Some(vec![0, 1, 2]))
-    .build();
+    let file_source = Arc::new(ParquetSource::new(Arc::clone(&schema)));
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(statistics)
+            .with_projection_indices(Some(vec![0, 1, 2]))?
+            .build();
 
     let filter = Arc::new(
         FilterExec::try_new(
@@ -2264,3 +2264,143 @@ async fn roundtrip_listing_table_with_schema_metadata() -> Result<()> {
 
     roundtrip_test(plan)
 }
+
+#[tokio::test]
+async fn roundtrip_async_func_exec() -> Result<()> {
+    #[derive(Debug, PartialEq, Eq, Hash)]
+    struct TestAsyncUDF {
+        signature: Signature,
+    }
+
+    impl TestAsyncUDF {
+        fn new() -> Self {
+            Self {
+                signature: Signature::exact(vec![DataType::Int64], Volatility::Volatile),
+            }
+        }
+    }
+
+    impl ScalarUDFImpl for TestAsyncUDF {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn name(&self) -> &str {
+            "test_async_udf"
+        }
+
+        fn signature(&self) -> &Signature {
+            &self.signature
+        }
+
+        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+            Ok(DataType::Int64)
+        }
+
+        fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+            not_impl_err!("Must call from `invoke_async_with_args`")
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl AsyncScalarUDFImpl for TestAsyncUDF {
+        async fn invoke_async_with_args(
+            &self,
+            args: ScalarFunctionArgs,
+        ) -> Result<ColumnarValue> {
+            Ok(args.args[0].clone())
+        }
+    }
+
+    let ctx = SessionContext::new();
+    let async_udf = AsyncScalarUDF::new(Arc::new(TestAsyncUDF::new()));
+    ctx.register_udf(async_udf.into_scalar_udf());
+
+    let physical_plan = ctx
+        .sql("select test_async_udf(1)")
+        .await?
+        .create_physical_plan()
+        .await?;
+
+    roundtrip_test_with_context(physical_plan, &ctx)?;
+
+    Ok(())
+}
+
+/// Test that HashTableLookupExpr serializes to lit(true)
+///
+/// HashTableLookupExpr contains a runtime hash table that cannot be serialized.
+/// The serialization code replaces it with lit(true) which is safe because
+/// it's a performance optimization filter, not a correctness requirement.
+#[test]
+fn roundtrip_hash_table_lookup_expr_to_lit() -> Result<()> {
+    // Create a simple schema and input plan
+    let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Int64, false)]));
+    let input = Arc::new(EmptyExec::new(schema.clone()));
+
+    // Create a HashTableLookupExpr - it will be replaced with lit(true) during serialization
+    let hash_map = Arc::new(JoinHashMapU32::with_capacity(0));
+    let hash_expr: Arc<dyn PhysicalExpr> = Arc::new(Column::new("col", 0));
+    let lookup_expr: Arc<dyn PhysicalExpr> = Arc::new(HashTableLookupExpr::new(
+        hash_expr,
+        hash_map,
+        "test_lookup".to_string(),
+    ));
+
+    // Create a filter with the lookup expression
+    let filter = Arc::new(FilterExec::try_new(lookup_expr, input)?);
+
+    // Serialize
+    let ctx = SessionContext::new();
+    let codec = DefaultPhysicalExtensionCodec {};
+    let proto: protobuf::PhysicalPlanNode =
+        protobuf::PhysicalPlanNode::try_from_physical_plan(filter.clone(), &codec)
+            .expect("serialization should succeed");
+
+    // Deserialize
+    let result: Arc<dyn ExecutionPlan> = proto
+        .try_into_physical_plan(&ctx.task_ctx(), &codec)
+        .expect("deserialization should succeed");
+
+    // The deserialized plan should have lit(true) instead of HashTableLookupExpr
+    // Verify the filter predicate is a Literal(true)
+    let result_filter = result.as_any().downcast_ref::<FilterExec>().unwrap();
+    let predicate = result_filter.predicate();
+    let literal = predicate.as_any().downcast_ref::<Literal>().unwrap();
+    assert_eq!(*literal.value(), ScalarValue::Boolean(Some(true)));
+
+    Ok(())
+}
+
+#[test]
+fn roundtrip_hash_expr() -> Result<()> {
+    use datafusion::physical_plan::joins::{HashExpr, SeededRandomState};
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int64, false),
+        Field::new("b", DataType::Utf8, false),
+    ]));
+
+    // Create a HashExpr with test columns and seeds
+    let on_columns = vec![col("a", &schema)?, col("b", &schema)?];
+    let hash_expr: Arc<dyn PhysicalExpr> = Arc::new(HashExpr::new(
+        on_columns,
+        SeededRandomState::with_seeds(0, 1, 2, 3), // arbitrary random seeds for testing
+        "test_hash".to_string(),
+    ));
+
+    // Wrap in a filter by comparing hash value to a literal
+    // hash_expr > 0 is always boolean
+    let filter_expr = binary(hash_expr, Operator::Gt, lit(0u64), &schema)?;
+    let filter = Arc::new(FilterExec::try_new(
+        filter_expr,
+        Arc::new(EmptyExec::new(schema)),
+    )?);
+
+    // Confirm that the debug string contains the random state seeds
+    assert!(
+        format!("{filter:?}").contains("test_hash(a@0, b@1, [0,1,2,3])"),
+        "Debug string missing seeds: {filter:?}"
+    );
+    roundtrip_test(filter)
+}
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index f45a62e948740..bb955a426ca78 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -23,12 +23,12 @@ use arrow::datatypes::{DataType, Field};
 use datafusion::execution::FunctionRegistry;
 use datafusion::prelude::SessionContext;
 use datafusion_expr::expr::Placeholder;
-use datafusion_expr::{col, create_udf, lit, ColumnarValue};
+use datafusion_expr::{ColumnarValue, col, create_udf, lit};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_functions::string;
 use datafusion_proto::bytes::Serializeable;
-use datafusion_proto::logical_plan::to_proto::serialize_expr;
 use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
+use datafusion_proto::logical_plan::to_proto::serialize_expr;
 
 #[test]
 #[should_panic(
@@ -42,7 +42,7 @@ fn bad_decode() {
 #[cfg(feature = "json")]
 fn plan_to_json() {
     use datafusion_common::DFSchema;
-    use datafusion_expr::{logical_plan::EmptyRelation, LogicalPlan};
+    use datafusion_expr::{LogicalPlan, logical_plan::EmptyRelation};
     use datafusion_proto::bytes::logical_plan_to_json;
 
     let plan = LogicalPlan::EmptyRelation(EmptyRelation {
diff --git a/datafusion/pruning/src/file_pruner.rs b/datafusion/pruning/src/file_pruner.rs
index ee86a8cc8cd58..f850e0c0114fb 100644
--- a/datafusion/pruning/src/file_pruner.rs
+++ b/datafusion/pruning/src/file_pruner.rs
@@ -19,69 +19,84 @@
 
 use std::sync::Arc;
 
-use arrow::datatypes::{FieldRef, Schema, SchemaRef};
-use datafusion_common::{
-    pruning::{
-        CompositePruningStatistics, PartitionPruningStatistics, PrunableStatistics,
-        PruningStatistics,
-    },
-    Result,
-};
+use arrow::datatypes::{FieldRef, SchemaRef};
+use datafusion_common::{Result, internal_datafusion_err, pruning::PrunableStatistics};
 use datafusion_datasource::PartitionedFile;
-use datafusion_physical_expr_common::physical_expr::{snapshot_generation, PhysicalExpr};
+use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, snapshot_generation};
 use datafusion_physical_plan::metrics::Count;
-use itertools::Itertools;
 use log::debug;
 
 use crate::build_pruning_predicate;
 
-/// Prune based on partition values and file-level statistics.
+/// Prune based on file-level statistics.
+///
+/// Note: Partition column pruning is handled earlier via `replace_columns_with_literals`
+/// which substitutes partition column references with their literal values before
+/// the predicate reaches this pruner.
 pub struct FilePruner {
     predicate_generation: Option<u64>,
     predicate: Arc<dyn PhysicalExpr>,
-    /// Schema used for pruning, which combines the file schema and partition fields.
-    /// Partition fields are always at the end, as they are during scans.
-    pruning_schema: Arc<Schema>,
-    partitioned_file: PartitionedFile,
-    partition_fields: Vec<FieldRef>,
+    /// Schema used for pruning (the logical file schema).
+    file_schema: SchemaRef,
+    file_stats_pruning: PrunableStatistics,
     predicate_creation_errors: Count,
 }
 
 impl FilePruner {
+    #[deprecated(
+        since = "52.0.0",
+        note = "Use `try_new` instead which returns None if no statistics are available"
+    )]
+    #[expect(clippy::needless_pass_by_value)]
     pub fn new(
         predicate: Arc<dyn PhysicalExpr>,
         logical_file_schema: &SchemaRef,
-        partition_fields: Vec<FieldRef>,
+        _partition_fields: Vec<FieldRef>,
         partitioned_file: PartitionedFile,
         predicate_creation_errors: Count,
     ) -> Result<Self> {
-        // Build a pruning schema that combines the file fields and partition fields.
-        // Partition fields are always at the end.
-        let pruning_schema = Arc::new(
-            Schema::new(
-                logical_file_schema
-                    .fields()
-                    .iter()
-                    .cloned()
-                    .chain(partition_fields.iter().cloned())
-                    .collect_vec(),
+        Self::try_new(
+            predicate,
+            logical_file_schema,
+            &partitioned_file,
+            predicate_creation_errors,
+        )
+        .ok_or_else(|| {
+            internal_datafusion_err!(
+                "FilePruner::new called on a file without statistics: {:?}",
+                partitioned_file
             )
-            .with_metadata(logical_file_schema.metadata().clone()),
-        );
-        Ok(Self {
-            // Initialize the predicate generation to None so that the first time we call `should_prune` we actually check the predicate
-            // Subsequent calls will only do work if the predicate itself has changed.
-            // See `snapshot_generation` for more info.
+        })
+    }
+
+    /// Create a new file pruner if statistics are available.
+    /// Returns None if this file does not have statistics.
+    pub fn try_new(
+        predicate: Arc<dyn PhysicalExpr>,
+        file_schema: &SchemaRef,
+        partitioned_file: &PartitionedFile,
+        predicate_creation_errors: Count,
+    ) -> Option<Self> {
+        let file_stats = partitioned_file.statistics.as_ref()?;
+        let file_stats_pruning =
+            PrunableStatistics::new(vec![file_stats.clone()], Arc::clone(file_schema));
+        Some(Self {
             predicate_generation: None,
             predicate,
-            pruning_schema,
-            partitioned_file,
-            partition_fields,
+            file_schema: Arc::clone(file_schema),
+            file_stats_pruning,
             predicate_creation_errors,
         })
     }
 
     pub fn should_prune(&mut self) -> Result<bool> {
+        // Check if the predicate has changed since last invocation by tracking
+        // its "generation". Dynamic filter expressions can change their values
+        // during query execution, so we use generation tracking to detect when
+        // the predicate has been updated and needs to be rebuilt.
+        //
+        // If the generation hasn't changed, we can skip rebuilding the pruning
+        // predicate, which is an expensive operation involving expression analysis.
         let new_generation = snapshot_generation(&self.predicate);
         if let Some(current_generation) = self.predicate_generation.as_mut() {
             if *current_generation == new_generation {
@@ -93,39 +108,25 @@ impl FilePruner {
         }
         let pruning_predicate = build_pruning_predicate(
             Arc::clone(&self.predicate),
-            &self.pruning_schema,
+            &self.file_schema,
             &self.predicate_creation_errors,
         );
-        if let Some(pruning_predicate) = pruning_predicate {
-            // The partition column schema is the schema of the table - the schema of the file
-            let mut pruning = Box::new(PartitionPruningStatistics::try_new(
-                vec![self.partitioned_file.partition_values.clone()],
-                self.partition_fields.clone(),
-            )?) as Box<dyn PruningStatistics>;
-            if let Some(stats) = &self.partitioned_file.statistics {
-                let stats_pruning = Box::new(PrunableStatistics::new(
-                    vec![Arc::clone(stats)],
-                    Arc::clone(&self.pruning_schema),
-                ));
-                pruning = Box::new(CompositePruningStatistics::new(vec![
-                    pruning,
-                    stats_pruning,
-                ]));
-            }
-            match pruning_predicate.prune(pruning.as_ref()) {
-                Ok(values) => {
-                    assert!(values.len() == 1);
-                    // We expect a single container -> if all containers are false skip this file
-                    if values.into_iter().all(|v| !v) {
-                        return Ok(true);
-                    }
-                }
-                // Stats filter array could not be built, so we can't prune
-                Err(e) => {
-                    debug!("Ignoring error building pruning predicate for file: {e}");
-                    self.predicate_creation_errors.add(1);
+        let Some(pruning_predicate) = pruning_predicate else {
+            return Ok(false);
+        };
+        match pruning_predicate.prune(&self.file_stats_pruning) {
+            Ok(values) => {
+                assert!(values.len() == 1);
+                // We expect a single container -> if all containers are false skip this file
+                if values.into_iter().all(|v| !v) {
+                    return Ok(true);
                 }
             }
+            // Stats filter array could not be built, so we can't prune
+            Err(e) => {
+                debug!("Ignoring error building pruning predicate for file: {e}");
+                self.predicate_creation_errors.add(1);
+            }
         }
 
         Ok(false)
diff --git a/datafusion/pruning/src/lib.rs b/datafusion/pruning/src/lib.rs
index cec4fab2262f8..9f8142447ba69 100644
--- a/datafusion/pruning/src/lib.rs
+++ b/datafusion/pruning/src/lib.rs
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
+
 mod file_pruner;
 mod pruning_predicate;
 
 pub use file_pruner::FilePruner;
 pub use pruning_predicate::{
-    build_pruning_predicate, PredicateRewriter, PruningPredicate, PruningStatistics,
-    RequiredColumns, UnhandledPredicateHook,
+    PredicateRewriter, PruningPredicate, PruningStatistics, RequiredColumns,
+    UnhandledPredicateHook, build_pruning_predicate,
 };
diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs
index 380ada10df6e1..b5b8267d7f93f 100644
--- a/datafusion/pruning/src/pruning_predicate.rs
+++ b/datafusion/pruning/src/pruning_predicate.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 
 use arrow::array::AsArray;
 use arrow::{
-    array::{new_null_array, ArrayRef, BooleanArray},
+    array::{ArrayRef, BooleanArray, new_null_array},
     datatypes::{DataType, Field, Schema, SchemaRef},
     record_batch::{RecordBatch, RecordBatchOptions},
 };
@@ -35,17 +35,17 @@ use datafusion_physical_plan::metrics::Count;
 use log::{debug, trace};
 
 use datafusion_common::error::Result;
-use datafusion_common::tree_node::TransformedResult;
+use datafusion_common::tree_node::{TransformedResult, TreeNodeRecursion};
+use datafusion_common::{Column, DFSchema, assert_eq_or_internal_err};
 use datafusion_common::{
-    internal_datafusion_err, internal_err, plan_datafusion_err, plan_err,
+    ScalarValue, internal_datafusion_err, plan_datafusion_err, plan_err,
     tree_node::{Transformed, TreeNode},
-    ScalarValue,
 };
-use datafusion_common::{Column, DFSchema};
 use datafusion_expr_common::operator::Operator;
-use datafusion_physical_expr::utils::{collect_columns, Guarantee, LiteralGuarantee};
-use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef};
-use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr;
+use datafusion_physical_expr::expressions::CastColumnExpr;
+use datafusion_physical_expr::utils::{Guarantee, LiteralGuarantee};
+use datafusion_physical_expr::{PhysicalExprRef, expressions as phys_expr};
+use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr_opt;
 use datafusion_physical_plan::{ColumnarValue, PhysicalExpr};
 
 /// Used to prove that arbitrary predicates (boolean expression) can not
@@ -86,7 +86,7 @@ use datafusion_physical_plan::{ColumnarValue, PhysicalExpr};
 /// example of how to use `PruningPredicate` to prune files based on min/max
 /// values.
 ///
-/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/pruning.rs
+/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/pruning.rs
 ///
 /// Given an expression like `x = 5` and statistics for 3 containers (Row
 /// Groups, files, etc) `A`, `B`, and `C`:
@@ -238,7 +238,7 @@ use datafusion_physical_plan::{ColumnarValue, PhysicalExpr};
 /// Original Predicate | Rewritten Predicate
 /// ------------------ | --------------------
 /// `x = 5` | `x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max)`
-/// `x < 5` | `x_null_count != x_row_count THEN false (x_max < 5)`
+/// `x < 5` | `x_null_count != x_row_count AND (x_min < 5)`
 /// `x = 5 AND y = 10` | `x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max) AND y_null_count != y_row_count (y_min <= 10 AND 10 <= y_max)`
 /// `x IS NULL`  | `x_null_count > 0`
 /// `x IS NOT NULL`  | `x_null_count != row_count`
@@ -455,10 +455,29 @@ impl PruningPredicate {
     ///
     /// See the struct level documentation on [`PruningPredicate`] for more
     /// details.
-    pub fn try_new(expr: Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Result<Self> {
-        // Get a (simpler) snapshot of the physical expr here to use with `PruningPredicate`
-        // which does not handle dynamic exprs in general
-        let expr = snapshot_physical_expr(expr)?;
+    ///
+    /// Note that `PruningPredicate` does not attempt to normalize or simplify
+    /// the input expression unless calling [`snapshot_physical_expr_opt`]
+    /// returns a new expression.
+    /// It is recommended that you pass the expressions through [`PhysicalExprSimplifier`]
+    /// before calling this method to make sure the expressions can be used for pruning.
+    pub fn try_new(mut expr: Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Result<Self> {
+        // Get a (simpler) snapshot of the physical expr here to use with `PruningPredicate`.
+        // In particular this unravels any `DynamicFilterPhysicalExpr`s by snapshotting them
+        // so that PruningPredicate can work with a static expression.
+        let tf = snapshot_physical_expr_opt(expr)?;
+        if tf.transformed {
+            // If we had an expression such as Dynamic(part_col < 5 and col < 10)
+            // (this could come from something like `select * from t order by part_col, col, limit 10`)
+            // after snapshotting and because `DynamicFilterPhysicalExpr` applies child replacements to its
+            // children after snapshotting and previously `replace_columns_with_literals` may have been called with partition values
+            // the expression we have now is `8 < 5 and col < 10`.
+            // Thus we need as simplifier pass to get `false and col < 10` => `false` here.
+            let simplifier = PhysicalExprSimplifier::new(&schema);
+            expr = simplifier.simplify(tf.data)?;
+        } else {
+            expr = tf.data;
+        }
         let unhandled_hook = Arc::new(ConstantUnhandledPredicateHook::default()) as _;
 
         // build predicate expression once
@@ -585,8 +604,6 @@ impl PruningPredicate {
         is_always_true(&self.predicate_expr) && self.literal_guarantees.is_empty()
     }
 
-    // this is only used by `parquet` feature right now
-    #[allow(dead_code)]
     pub fn required_columns(&self) -> &RequiredColumns {
         &self.required_columns
     }
@@ -725,8 +742,6 @@ impl RequiredColumns {
     /// * `a > 5 OR a < 10` returns `Some(a)`
     /// * `a > 5 OR b < 10` returns `None`
     /// * `true` returns None
-    #[allow(dead_code)]
-    // this fn is only used by `parquet` feature right now, thus the `allow(dead_code)`
     pub fn single_column(&self) -> Option<&phys_expr::Column> {
         if self.columns.windows(2).all(|w| {
             // check if all columns are the same (ignoring statistics and field)
@@ -919,13 +934,13 @@ fn build_statistics_record_batch<S: PruningStatistics + ?Sized>(
         };
         let array = array.unwrap_or_else(|| new_null_array(data_type, num_containers));
 
-        if num_containers != array.len() {
-            return internal_err!(
-                "mismatched statistics length. Expected {}, got {}",
-                num_containers,
-                array.len()
-            );
-        }
+        assert_eq_or_internal_err!(
+            num_containers,
+            array.len(),
+            "mismatched statistics length. Expected {}, got {}",
+            num_containers,
+            array.len()
+        );
 
         // cast statistics array to required data type (e.g. parquet
         // provides timestamp statistics as "Int64")
@@ -959,24 +974,41 @@ impl<'a> PruningExpressionBuilder<'a> {
     fn try_new(
         left: &'a Arc<dyn PhysicalExpr>,
         right: &'a Arc<dyn PhysicalExpr>,
+        left_columns: ColumnReferenceCount,
+        right_columns: ColumnReferenceCount,
         op: Operator,
         schema: &'a SchemaRef,
         required_columns: &'a mut RequiredColumns,
     ) -> Result<Self> {
         // find column name; input could be a more complicated expression
-        let left_columns = collect_columns(left);
-        let right_columns = collect_columns(right);
-        let (column_expr, scalar_expr, columns, correct_operator) =
-            match (left_columns.len(), right_columns.len()) {
-                (1, 0) => (left, right, left_columns, op),
-                (0, 1) => (right, left, right_columns, reverse_operator(op)?),
-                _ => {
-                    // if more than one column used in expression - not supported
-                    return plan_err!(
-                        "Multi-column expressions are not currently supported"
-                    );
-                }
-            };
+        let (column_expr, scalar_expr, column, correct_operator) = match (
+            left_columns,
+            right_columns,
+        ) {
+            (ColumnReferenceCount::One(column), ColumnReferenceCount::Zero) => {
+                (left, right, column, op)
+            }
+            (ColumnReferenceCount::Zero, ColumnReferenceCount::One(column)) => {
+                (right, left, column, reverse_operator(op)?)
+            }
+            (ColumnReferenceCount::One(_), ColumnReferenceCount::One(_)) => {
+                // both sides have one column - not supported
+                return plan_err!(
+                    "Expression not supported for pruning: left has 1 column, right has 1 column"
+                );
+            }
+            (ColumnReferenceCount::Zero, ColumnReferenceCount::Zero) => {
+                // both sides are literals - should be handled before calling try_new
+                return plan_err!(
+                    "Pruning literal expressions is not supported, please call PhysicalExprSimplifier first"
+                );
+            }
+            (ColumnReferenceCount::Many, _) | (_, ColumnReferenceCount::Many) => {
+                return plan_err!(
+                    "Expression not supported for pruning: left or right has multiple columns"
+                );
+            }
+        };
 
         let df_schema = DFSchema::try_from(Arc::clone(schema))?;
         let (column_expr, correct_operator, scalar_expr) = rewrite_expr_to_prunable(
@@ -985,7 +1017,6 @@ impl<'a> PruningExpressionBuilder<'a> {
             scalar_expr,
             df_schema,
         )?;
-        let column = columns.iter().next().unwrap().clone();
         let field = match schema.column_with_name(column.name()) {
             Some((_, f)) => f,
             _ => {
@@ -1105,6 +1136,20 @@ fn rewrite_expr_to_prunable(
             None,
         ));
         Ok((left, op, right))
+    } else if let Some(cast_col) = column_expr_any.downcast_ref::<CastColumnExpr>() {
+        // `cast_column(col) op lit()` - same as CastExpr but uses CastColumnExpr
+        let arrow_schema = schema.as_arrow();
+        let from_type = cast_col.expr().data_type(arrow_schema)?;
+        let to_type = cast_col.target_field().data_type();
+        verify_support_type_for_prune(&from_type, to_type)?;
+        let (left, op, right) =
+            rewrite_expr_to_prunable(cast_col.expr(), op, scalar_expr, schema)?;
+        // Predicate pruning / statistics generally don't support struct columns yet.
+        // In the future we may want to support pruning on nested fields, in which case we probably need to
+        // do something more sophisticated here.
+        // But for now since we don't support pruning on nested fields, we can just cast to the target type directly.
+        let left = Arc::new(phys_expr::CastExpr::new(left, to_type.clone(), None));
+        Ok((left, op, right))
     } else if let Some(try_cast) =
         column_expr_any.downcast_ref::<phys_expr::TryCastExpr>()
     {
@@ -1176,13 +1221,13 @@ fn verify_support_type_for_prune(from_type: &DataType, to_type: &DataType) -> Re
     // Dictionary casts are always supported as long as the value types are supported
     let from_type = match from_type {
         DataType::Dictionary(_, t) => {
-            return verify_support_type_for_prune(t.as_ref(), to_type)
+            return verify_support_type_for_prune(t.as_ref(), to_type);
         }
         _ => from_type,
     };
     let to_type = match to_type {
         DataType::Dictionary(_, t) => {
-            return verify_support_type_for_prune(from_type, t.as_ref())
+            return verify_support_type_for_prune(from_type, t.as_ref());
         }
         _ => to_type,
     };
@@ -1205,10 +1250,10 @@ fn rewrite_column_expr(
     column_new: &phys_expr::Column,
 ) -> Result<Arc<dyn PhysicalExpr>> {
     e.transform(|expr| {
-        if let Some(column) = expr.as_any().downcast_ref::<phys_expr::Column>() {
-            if column == column_old {
-                return Ok(Transformed::yes(Arc::new(column_new.clone())));
-            }
+        if let Some(column) = expr.as_any().downcast_ref::<phys_expr::Column>()
+            && column == column_old
+        {
+            return Ok(Transformed::yes(Arc::new(column_new.clone())));
         }
 
         Ok(Transformed::no(expr))
@@ -1514,8 +1559,17 @@ fn build_predicate_expression(
         return expr;
     }
 
-    let expr_builder =
-        PruningExpressionBuilder::try_new(&left, &right, op, schema, required_columns);
+    let left_columns = ColumnReferenceCount::from_expression(&left);
+    let right_columns = ColumnReferenceCount::from_expression(&right);
+    let expr_builder = PruningExpressionBuilder::try_new(
+        &left,
+        &right,
+        left_columns,
+        right_columns,
+        op,
+        schema,
+        required_columns,
+    );
     let mut expr_builder = match expr_builder {
         Ok(builder) => builder,
         // allow partial failure in predicate expression generation
@@ -1530,6 +1584,50 @@ fn build_predicate_expression(
         .unwrap_or_else(|_| unhandled_hook.handle(expr))
 }
 
+/// Count of distinct column references in an expression.
+/// This is the same as [`collect_columns`] but optimized to stop counting
+/// once more than one distinct column is found.
+///
+/// For example, in expression `col1 + col2`, the count is `Many`.
+/// In expression `col1 + 5`, the count is `One`.
+/// In expression `5 + 10`, the count is `Zero`.
+///
+/// [`collect_columns`]: datafusion_physical_expr::utils::collect_columns
+#[derive(Debug, PartialEq, Eq)]
+enum ColumnReferenceCount {
+    /// no column references
+    Zero,
+    /// Only one column reference
+    One(phys_expr::Column),
+    /// More than one column reference
+    Many,
+}
+
+impl ColumnReferenceCount {
+    /// Count the number of distinct column references in an expression
+    fn from_expression(expr: &Arc<dyn PhysicalExpr>) -> Self {
+        let mut seen = HashSet::<phys_expr::Column>::new();
+        expr.apply(|expr| {
+            if let Some(column) = expr.as_any().downcast_ref::<phys_expr::Column>() {
+                seen.insert(column.clone());
+                if seen.len() > 1 {
+                    return Ok(TreeNodeRecursion::Stop);
+                }
+            }
+            Ok(TreeNodeRecursion::Continue)
+        })
+        // pre_visit always returns OK, so this will always too
+        .expect("no way to return error during recursion");
+        match seen.len() {
+            0 => ColumnReferenceCount::Zero,
+            1 => ColumnReferenceCount::One(
+                seen.into_iter().next().expect("just checked len==1"),
+            ),
+            _ => ColumnReferenceCount::Many,
+        }
+    }
+}
+
 fn build_statistics_expr(
     expr_builder: &mut PruningExpressionBuilder,
 ) -> Result<Arc<dyn PhysicalExpr>> {
@@ -1801,13 +1899,13 @@ fn increment_utf8(data: &str) -> Option<String> {
         let original = code_points[idx] as u32;
 
         // Try incrementing the code point
-        if let Some(next_char) = char::from_u32(original + 1) {
-            if is_valid_unicode(next_char) {
-                code_points[idx] = next_char;
-                // truncate the string to the current index
-                code_points.truncate(idx + 1);
-                return Some(code_points.into_iter().collect());
-            }
+        if let Some(next_char) = char::from_u32(original + 1)
+            && is_valid_unicode(next_char)
+        {
+            code_points[idx] = next_char;
+            // truncate the string to the current index
+            code_points.truncate(idx + 1);
+            return Some(code_points.into_iter().collect());
         }
     }
 
@@ -1869,6 +1967,7 @@ mod tests {
     use super::*;
     use datafusion_common::test_util::batches_to_string;
     use datafusion_expr::{and, col, lit, or};
+    use datafusion_physical_expr::utils::collect_columns;
     use insta::assert_snapshot;
 
     use arrow::array::Decimal128Array;
@@ -1877,10 +1976,13 @@ mod tests {
         datatypes::TimeUnit,
     };
     use datafusion_expr::expr::InList;
-    use datafusion_expr::{cast, is_null, try_cast, Expr};
+    use datafusion_expr::{Expr, cast, is_null, try_cast};
     use datafusion_functions_nested::expr_fn::{array_has, make_array};
-    use datafusion_physical_expr::expressions as phys_expr;
+    use datafusion_physical_expr::expressions::{
+        self as phys_expr, DynamicFilterPhysicalExpr,
+    };
     use datafusion_physical_expr::planner::logical2physical;
+    use itertools::Itertools;
 
     #[derive(Debug, Default)]
     /// Mock statistic provider for tests
@@ -2759,6 +2861,164 @@ mod tests {
         Ok(())
     }
 
+    /// Test that non-boolean literal expressions don't prune any containers and error gracefully by not pruning anything instead of e.g. panicking
+    #[test]
+    fn row_group_predicate_non_boolean() {
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], vec![Some(10)]));
+        let expected_ret = &[true];
+        prune_with_expr(lit(1), &schema, &statistics, expected_ret);
+    }
+
+    // Test that literal-to-literal comparisons are correctly evaluated.
+    // When both sides are constants, the expression should be evaluated directly
+    // and if it's false, all containers should be pruned.
+    #[test]
+    fn row_group_predicate_literal_false() {
+        // lit(1) = lit(2) is always false, so all containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], vec![Some(10)]));
+        let expected_ret = &[false];
+        prune_with_simplified_expr(lit(1).eq(lit(2)), &schema, &statistics, expected_ret);
+    }
+
+    /// Test nested/complex literal expression trees.
+    /// This is an integration test that PhysicalExprSimplifier + PruningPredicate work together as expected.
+    #[test]
+    fn row_group_predicate_literal_true() {
+        // lit(1) = lit(1) is always true, so no containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], vec![Some(10)]));
+        let expected_ret = &[true];
+        prune_with_simplified_expr(lit(1).eq(lit(1)), &schema, &statistics, expected_ret);
+    }
+
+    /// Test nested/complex literal expression trees.
+    /// This is an integration test that PhysicalExprSimplifier + PruningPredicate work together as expected.
+    #[test]
+    fn row_group_predicate_literal_null() {
+        // lit(1) = null is always null, so no containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], vec![Some(10)]));
+        let expected_ret = &[true];
+        prune_with_simplified_expr(
+            lit(1).eq(lit(ScalarValue::Null)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
+    }
+
+    /// Test nested/complex literal expression trees.
+    /// This is an integration test that PhysicalExprSimplifier + PruningPredicate work together as expected.
+    #[test]
+    fn row_group_predicate_complex_literals() {
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], vec![Some(10)]));
+
+        // (1 + 2) > 0 is always true
+        prune_with_simplified_expr(
+            (lit(1) + lit(2)).gt(lit(0)),
+            &schema,
+            &statistics,
+            &[true],
+        );
+
+        // (1 + 2) < 0 is always false
+        prune_with_simplified_expr(
+            (lit(1) + lit(2)).lt(lit(0)),
+            &schema,
+            &statistics,
+            &[false],
+        );
+
+        // Nested AND of literals: true AND false = false
+        prune_with_simplified_expr(
+            lit(true).and(lit(false)),
+            &schema,
+            &statistics,
+            &[false],
+        );
+
+        // Nested OR of literals: true OR false = true
+        prune_with_simplified_expr(
+            lit(true).or(lit(false)),
+            &schema,
+            &statistics,
+            &[true],
+        );
+
+        // Complex nested: (1 < 2) AND (3 > 1) = true AND true = true
+        prune_with_simplified_expr(
+            lit(1).lt(lit(2)).and(lit(3).gt(lit(1))),
+            &schema,
+            &statistics,
+            &[true],
+        );
+
+        // Complex nested: (1 > 2) OR (3 < 1) = false OR false = false
+        prune_with_simplified_expr(
+            lit(1).gt(lit(2)).or(lit(3).lt(lit(1))),
+            &schema,
+            &statistics,
+            &[false],
+        );
+    }
+
+    /// Integration test demonstrating that a dynamic filter with replaced children as literals will be snapshotted, simplified and then pruned correctly.
+    #[test]
+    fn row_group_predicate_dynamic_filter_with_literals() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Int32, true),
+            Field::new("part", DataType::Utf8, true),
+        ]));
+        let statistics = TestStatistics::new()
+            // Note that we have no stats, pruning can only happen via partition value pruning from the dynamic filter
+            .with_row_counts("c1", vec![Some(10)]);
+        let dynamic_filter_expr = col("c1").gt(lit(5)).and(col("part").eq(lit("B")));
+        let phys_expr = logical2physical(&dynamic_filter_expr, &schema);
+        let children = collect_columns(&phys_expr)
+            .iter()
+            .map(|c| Arc::new(c.clone()) as Arc<dyn PhysicalExpr>)
+            .collect_vec();
+        let dynamic_phys_expr =
+            Arc::new(DynamicFilterPhysicalExpr::new(children, phys_expr))
+                as Arc<dyn PhysicalExpr>;
+        // Simulate the partition value substitution that would happen in ParquetOpener
+        let remapped_expr = dynamic_phys_expr
+            .children()
+            .into_iter()
+            .map(|child_expr| {
+                let Some(col_expr) =
+                    child_expr.as_any().downcast_ref::<phys_expr::Column>()
+                else {
+                    return Arc::clone(child_expr);
+                };
+                if col_expr.name() == "part" {
+                    // simulate dynamic filter replacement with literal "A"
+                    Arc::new(phys_expr::Literal::new(ScalarValue::Utf8(Some(
+                        "A".to_string(),
+                    )))) as Arc<dyn PhysicalExpr>
+                } else {
+                    Arc::clone(child_expr)
+                }
+            })
+            .collect_vec();
+        let dynamic_filter_expr =
+            dynamic_phys_expr.with_new_children(remapped_expr).unwrap();
+        // After substitution the expression is c1 > 5 AND part = "B" which should prune the file since the partition value is "A"
+        let expected = &[false];
+        let p =
+            PruningPredicate::try_new(dynamic_filter_expr, Arc::clone(&schema)).unwrap();
+        let result = p.prune(&statistics).unwrap();
+        assert_eq!(result, expected);
+    }
+
     #[test]
     fn row_group_predicate_lt_bool() -> Result<()> {
         let schema = Schema::new(vec![Field::new("c1", DataType::Boolean, false)]);
@@ -2790,7 +3050,7 @@ mod tests {
             test_build_predicate_expression(&expr, &schema, &mut required_columns);
         assert_eq!(predicate_expr.to_string(), expected_expr);
         println!("required_columns: {required_columns:#?}"); // for debugging assertions below
-                                                             // c1 < 1 should add c1_min
+        // c1 < 1 should add c1_min
         let c1_min_field = Field::new("c1_min", DataType::Int32, false);
         assert_eq!(
             required_columns.columns[0],
@@ -5122,6 +5382,21 @@ mod tests {
         assert_eq!(result, expected);
     }
 
+    fn prune_with_simplified_expr(
+        expr: Expr,
+        schema: &SchemaRef,
+        statistics: &TestStatistics,
+        expected: &[bool],
+    ) {
+        println!("Pruning with expr: {expr}");
+        let expr = logical2physical(&expr, schema);
+        let simplifier = PhysicalExprSimplifier::new(schema);
+        let expr = simplifier.simplify(expr).unwrap();
+        let p = PruningPredicate::try_new(expr, Arc::<Schema>::clone(schema)).unwrap();
+        let result = p.prune(statistics).unwrap();
+        assert_eq!(result, expected);
+    }
+
     fn test_build_predicate_expression(
         expr: &Expr,
         schema: &Schema,
diff --git a/datafusion/session/src/lib.rs b/datafusion/session/src/lib.rs
index a2e1d9ca3ae8f..3d3cb541b5a5e 100644
--- a/datafusion/session/src/lib.rs
+++ b/datafusion/session/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+#![deny(clippy::allow_attributes)]
+
 //! Session management for DataFusion query execution environment
 //!
 //! This module provides the core session management functionality for DataFusion,
diff --git a/datafusion/session/src/session.rs b/datafusion/session/src/session.rs
index fd033172f224f..2593e8cd71f4c 100644
--- a/datafusion/session/src/session.rs
+++ b/datafusion/session/src/session.rs
@@ -18,9 +18,9 @@
 use async_trait::async_trait;
 use datafusion_common::config::{ConfigOptions, TableOptions};
 use datafusion_common::{DFSchema, Result};
+use datafusion_execution::TaskContext;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_execution::TaskContext;
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
 use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr};
@@ -100,7 +100,7 @@ pub trait Session: Send + Sync {
     /// + 2` will not be simplified to `a = 3` as this is a more involved process.
     /// See the [expr_api] example for how to simplify expressions.
     ///
-    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
     fn create_physical_expr(
         &self,
         expr: Expr,
diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml
index 279c88b525d3c..09959db41fe60 100644
--- a/datafusion/spark/Cargo.toml
+++ b/datafusion/spark/Cargo.toml
@@ -48,7 +48,9 @@ datafusion-common = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true, features = ["crypto_expressions"] }
+datafusion-functions-nested = { workspace = true }
 log = { workspace = true }
+percent-encoding = "2.3.2"
 rand = { workspace = true }
 sha1 = "0.10"
 url = { workspace = true }
diff --git a/datafusion/spark/benches/char.rs b/datafusion/spark/benches/char.rs
index 02eab7630d070..b5f87857ae9c6 100644
--- a/datafusion/spark/benches/char.rs
+++ b/datafusion/spark/benches/char.rs
@@ -19,7 +19,7 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::{array::PrimitiveArray, datatypes::Int64Type};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_spark::function::string::char;
diff --git a/datafusion/spark/src/function/aggregate/avg.rs b/datafusion/spark/src/function/aggregate/avg.rs
index 65736815fec5c..bbcda9b0f8c7f 100644
--- a/datafusion/spark/src/function/aggregate/avg.rs
+++ b/datafusion/spark/src/function/aggregate/avg.rs
@@ -15,22 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::ArrowNativeTypeOp;
 use arrow::array::{
+    Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, Int64Array, PrimitiveArray,
     builder::PrimitiveBuilder,
     cast::AsArray,
     types::{Float64Type, Int64Type},
-    Array, ArrayRef, ArrowNumericType, Int64Array, PrimitiveArray,
 };
 use arrow::compute::sum;
 use arrow::datatypes::{DataType, Field, FieldRef};
-use datafusion_common::utils::take_function_args;
-use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
+use datafusion_common::types::{NativeType, logical_float64};
+use datafusion_common::{Result, ScalarValue, not_impl_err};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
+    Accumulator, AggregateUDFImpl, Coercion, EmitTo, GroupsAccumulator, ReversedUDAF,
+    Signature, TypeSignatureClass, Volatility,
 };
 use std::{any::Any, sync::Arc};
 
@@ -56,7 +55,14 @@ impl SparkAvg {
     /// Implement AVG aggregate function
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Immutable),
+            signature: Signature::coercible(
+                vec![Coercion::new_implicit(
+                    TypeSignatureClass::Native(logical_float64()),
+                    vec![TypeSignatureClass::Numeric],
+                    NativeType::Float64,
+                )],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -66,21 +72,6 @@ impl AggregateUDFImpl for SparkAvg {
         self
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [args] = take_function_args(self.name(), arg_types)?;
-
-        fn coerced_type(data_type: &DataType) -> Result<DataType> {
-            match &data_type {
-                d if d.is_numeric() => Ok(DataType::Float64),
-                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
-                _ => {
-                    plan_err!("Avg does not support inputs of type {data_type}.")
-                }
-            }
-        }
-        Ok(vec![coerced_type(args)?])
-    }
-
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
         Ok(DataType::Float64)
     }
diff --git a/datafusion/spark/src/function/aggregate/mod.rs b/datafusion/spark/src/function/aggregate/mod.rs
index d765d9c82f068..3db72669d42bd 100644
--- a/datafusion/spark/src/function/aggregate/mod.rs
+++ b/datafusion/spark/src/function/aggregate/mod.rs
@@ -19,17 +19,27 @@ use datafusion_expr::AggregateUDF;
 use std::sync::Arc;
 
 pub mod avg;
+pub mod try_sum;
+
 pub mod expr_fn {
     use datafusion_functions::export_functions;
 
     export_functions!((avg, "Returns the average value of a given column", arg1));
+    export_functions!((
+        try_sum,
+        "Returns the sum of values for a column, or NULL if overflow occurs",
+        arg1
+    ));
 }
 
 // TODO: try use something like datafusion_functions_aggregate::create_func!()
 pub fn avg() -> Arc<AggregateUDF> {
     Arc::new(AggregateUDF::new_from_impl(avg::SparkAvg::new()))
 }
+pub fn try_sum() -> Arc<AggregateUDF> {
+    Arc::new(AggregateUDF::new_from_impl(try_sum::SparkTrySum::new()))
+}
 
 pub fn functions() -> Vec<Arc<AggregateUDF>> {
-    vec![avg()]
+    vec![avg(), try_sum()]
 }
diff --git a/datafusion/spark/src/function/aggregate/try_sum.rs b/datafusion/spark/src/function/aggregate/try_sum.rs
new file mode 100644
index 0000000000000..6509cea26b716
--- /dev/null
+++ b/datafusion/spark/src/function/aggregate/try_sum.rs
@@ -0,0 +1,660 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, ArrowNumericType, AsArray, BooleanArray, PrimitiveArray};
+use arrow::datatypes::{
+    DECIMAL128_MAX_PRECISION, DataType, Decimal128Type, Field, FieldRef, Float64Type,
+    Int64Type,
+};
+use datafusion_common::{Result, ScalarValue, downcast_value, exec_err, not_impl_err};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::utils::format_state_name;
+use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::fmt::{Debug, Formatter};
+use std::mem::size_of_val;
+
+#[derive(PartialEq, Eq, Hash)]
+pub struct SparkTrySum {
+    signature: Signature,
+}
+
+impl Default for SparkTrySum {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkTrySum {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::user_defined(Volatility::Immutable),
+        }
+    }
+}
+
+impl Debug for SparkTrySum {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SparkTrySum")
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+/// Accumulator for try_sum that detects overflow
+struct TrySumAccumulator<T: ArrowNumericType> {
+    sum: Option<T::Native>,
+    data_type: DataType,
+    failed: bool,
+    // Only used if data_type is Decimal128(p, s)
+    dec_precision: Option<u8>,
+}
+
+impl<T: ArrowNumericType> Debug for TrySumAccumulator<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "TrySumAccumulator({})", self.data_type)
+    }
+}
+
+impl<T: ArrowNumericType> TrySumAccumulator<T> {
+    fn new(data_type: DataType) -> Self {
+        let dec_precision = match &data_type {
+            DataType::Decimal128(p, _) => Some(*p),
+            _ => None,
+        };
+        Self {
+            sum: None,
+            data_type,
+            failed: false,
+            dec_precision,
+        }
+    }
+}
+
+impl<T: ArrowNumericType> Accumulator for TrySumAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![
+            self.evaluate()?,
+            ScalarValue::Boolean(Some(self.failed)),
+        ])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        update_batch_internal(self, values)
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        // Check if any partition has failed
+        if downcast_value!(states[1], BooleanArray)
+            .iter()
+            .flatten()
+            .any(|f| f)
+        {
+            self.failed = true;
+            return Ok(());
+        }
+
+        // Merge the sum values using the same logic as update_batch
+        update_batch_internal(self, states)
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        evaluate_internal(self)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self)
+    }
+}
+
+// Specialized implementations for update_batch for each type
+
+fn update_batch_internal<T: ArrowNumericType>(
+    acc: &mut TrySumAccumulator<T>,
+    values: &[ArrayRef],
+) -> Result<()> {
+    if values.is_empty() || acc.failed {
+        return Ok(());
+    }
+
+    let array: &PrimitiveArray<T> = values[0].as_primitive::<T>();
+
+    match acc.data_type {
+        DataType::Int64 => update_int64(acc, array),
+        DataType::Float64 => update_float64(acc, array),
+        DataType::Decimal128(_, _) => update_decimal128(acc, array),
+        _ => exec_err!(
+            "try_sum: unsupported type in update_batch: {:?}",
+            acc.data_type
+        ),
+    }
+}
+
+fn update_int64<T: ArrowNumericType>(
+    acc: &mut TrySumAccumulator<T>,
+    array: &PrimitiveArray<T>,
+) -> Result<()> {
+    for v in array.iter().flatten() {
+        // Cast to i64 for checked_add
+        let v_i64 = unsafe { std::mem::transmute_copy::<T::Native, i64>(&v) };
+        let sum_i64 = acc
+            .sum
+            .map(|s| unsafe { std::mem::transmute_copy::<T::Native, i64>(&s) });
+
+        let new_sum = match sum_i64 {
+            None => v_i64,
+            Some(s) => match s.checked_add(v_i64) {
+                Some(result) => result,
+                None => {
+                    acc.failed = true;
+                    return Ok(());
+                }
+            },
+        };
+
+        acc.sum = Some(unsafe { std::mem::transmute_copy::<i64, T::Native>(&new_sum) });
+    }
+    Ok(())
+}
+
+fn update_float64<T: ArrowNumericType>(
+    acc: &mut TrySumAccumulator<T>,
+    array: &PrimitiveArray<T>,
+) -> Result<()> {
+    for v in array.iter().flatten() {
+        let v_f64 = unsafe { std::mem::transmute_copy::<T::Native, f64>(&v) };
+        let sum_f64 = acc
+            .sum
+            .map(|s| unsafe { std::mem::transmute_copy::<T::Native, f64>(&s) })
+            .unwrap_or(0.0);
+        let new_sum = sum_f64 + v_f64;
+        acc.sum = Some(unsafe { std::mem::transmute_copy::<f64, T::Native>(&new_sum) });
+    }
+    Ok(())
+}
+
+fn update_decimal128<T: ArrowNumericType>(
+    acc: &mut TrySumAccumulator<T>,
+    array: &PrimitiveArray<T>,
+) -> Result<()> {
+    let precision = acc.dec_precision.unwrap_or(38);
+
+    for v in array.iter().flatten() {
+        let v_i128 = unsafe { std::mem::transmute_copy::<T::Native, i128>(&v) };
+        let sum_i128 = acc
+            .sum
+            .map(|s| unsafe { std::mem::transmute_copy::<T::Native, i128>(&s) });
+
+        let new_sum = match sum_i128 {
+            None => v_i128,
+            Some(s) => match s.checked_add(v_i128) {
+                Some(result) => result,
+                None => {
+                    acc.failed = true;
+                    return Ok(());
+                }
+            },
+        };
+
+        if exceeds_decimal128_precision(new_sum, precision) {
+            acc.failed = true;
+            return Ok(());
+        }
+
+        acc.sum = Some(unsafe { std::mem::transmute_copy::<i128, T::Native>(&new_sum) });
+    }
+    Ok(())
+}
+
+fn evaluate_internal<T: ArrowNumericType>(
+    acc: &mut TrySumAccumulator<T>,
+) -> Result<ScalarValue> {
+    if acc.failed {
+        return ScalarValue::new_primitive::<T>(None, &acc.data_type);
+    }
+    ScalarValue::new_primitive::<T>(acc.sum, &acc.data_type)
+}
+
+// Helpers to determine if it exceeds decimal precision
+fn pow10_i128(p: u8) -> Option<i128> {
+    let mut v: i128 = 1;
+    for _ in 0..p {
+        v = v.checked_mul(10)?;
+    }
+    Some(v)
+}
+
+fn exceeds_decimal128_precision(sum: i128, p: u8) -> bool {
+    if let Some(max_plus_one) = pow10_i128(p) {
+        let max = max_plus_one - 1;
+        sum > max || sum < -max
+    } else {
+        true
+    }
+}
+
+impl AggregateUDFImpl for SparkTrySum {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "try_sum"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+
+        let dt = &arg_types[0];
+        let result_type = match dt {
+            Null => Float64,
+            Decimal128(p, s) => {
+                let new_precision = DECIMAL128_MAX_PRECISION.min(p + 10);
+                Decimal128(new_precision, *s)
+            }
+            Int8 | Int16 | Int32 | Int64 => Int64,
+            Float16 | Float32 | Float64 => Float64,
+
+            other => return exec_err!("try_sum: unsupported type: {other:?}"),
+        };
+
+        Ok(result_type)
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        macro_rules! helper {
+            ($t:ty, $dt:expr) => {
+                Ok(Box::new(TrySumAccumulator::<$t>::new($dt.clone())))
+            };
+        }
+
+        match acc_args.return_field.data_type() {
+            DataType::Int64 => helper!(Int64Type, acc_args.return_field.data_type()),
+            DataType::Float64 => helper!(Float64Type, acc_args.return_field.data_type()),
+            DataType::Decimal128(_, _) => {
+                helper!(Decimal128Type, acc_args.return_field.data_type())
+            }
+            _ => not_impl_err!(
+                "try_sum: unsupported type for accumulator: {}",
+                acc_args.return_field.data_type()
+            ),
+        }
+    }
+
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+        let sum_dt = args.return_field.data_type().clone();
+        Ok(vec![
+            Field::new(format_state_name(args.name, "sum"), sum_dt, true).into(),
+            Field::new(
+                format_state_name(args.name, "failed"),
+                DataType::Boolean,
+                false,
+            )
+            .into(),
+        ])
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        use DataType::*;
+        if arg_types.len() != 1 {
+            return exec_err!(
+                "try_sum: exactly 1 argument expected, got {}",
+                arg_types.len()
+            );
+        }
+
+        let dt = &arg_types[0];
+        let coerced = match dt {
+            Null => Float64,
+            Decimal128(p, s) => Decimal128(*p, *s),
+            Int8 | Int16 | Int32 | Int64 => Int64,
+            Float16 | Float32 | Float64 => Float64,
+            other => return exec_err!("try_sum: unsupported type: {other:?}"),
+        };
+        Ok(vec![coerced])
+    }
+
+    fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
+        Ok(ScalarValue::Null)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::{BooleanArray, Decimal128Array, Float64Array, Int64Array};
+    use datafusion_common::{DataFusionError, ScalarValue};
+    use std::sync::Arc;
+
+    use super::*;
+    // -------- Helpers --------
+
+    fn int64(values: Vec<Option<i64>>) -> ArrayRef {
+        Arc::new(Int64Array::from(values)) as ArrayRef
+    }
+
+    fn f64(values: Vec<Option<f64>>) -> ArrayRef {
+        Arc::new(Float64Array::from(values)) as ArrayRef
+    }
+
+    fn dec128(p: u8, s: i8, vals: Vec<Option<i128>>) -> Result<ArrayRef> {
+        let base = Decimal128Array::from(vals);
+        let arr = base.with_precision_and_scale(p, s).map_err(|e| {
+            DataFusionError::Execution(format!("invalid precision/scale ({p},{s}): {e}"))
+        })?;
+        Ok(Arc::new(arr) as ArrayRef)
+    }
+
+    // -------- update_batch + evaluate --------
+
+    #[test]
+    fn try_sum_int_basic() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc.update_batch(&[int64((0..10).map(Some).collect())])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Int64(Some(45)));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_int_with_nulls() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc.update_batch(&[int64(vec![None, Some(2), Some(3), None, Some(5)])])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Int64(Some(10)));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_float_basic() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Float64Type>::new(DataType::Float64);
+        acc.update_batch(&[f64(vec![Some(1.5), Some(2.5), None, Some(3.0)])])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Float64(Some(7.0)));
+        Ok(())
+    }
+
+    #[test]
+    fn float_overflow_behaves_like_spark_sum_infinite() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Float64Type>::new(DataType::Float64);
+        acc.update_batch(&[f64(vec![Some(1e308), Some(1e308)])])?;
+
+        let out = acc.evaluate()?;
+        assert!(
+            matches!(out, ScalarValue::Float64(Some(v)) if v.is_infinite() && v.is_sign_positive()),
+            "waiting +Infinity, got: {out:?}"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_float_negative_zero_normalizes_to_positive_zero() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Float64Type>::new(DataType::Float64);
+        // -0.0 + 0.0 should normalize to 0.0 (positive zero), not -0.0
+        acc.update_batch(&[f64(vec![Some(-0.0), Some(0.0)])])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Float64(Some(0.0)));
+        // Verify it's positive zero using is_sign_positive
+        if let ScalarValue::Float64(Some(v)) = out {
+            assert!(v.is_sign_positive() || v == 0.0);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_decimal_basic() -> Result<()> {
+        let p = 10u8;
+        let s = 2i8;
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        acc.update_batch(&[dec128(p, s, vec![Some(123), Some(477)])?])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Decimal128(Some(600), p, s));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_decimal_with_nulls() -> Result<()> {
+        let p = 10u8;
+        let s = 2i8;
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        acc.update_batch(&[dec128(p, s, vec![Some(150), None, Some(200)])?])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Decimal128(Some(350), p, s));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_decimal_overflow_sets_failed() -> Result<()> {
+        let p = 5u8;
+        let s = 0i8;
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        acc.update_batch(&[dec128(p, s, vec![Some(90_000), Some(20_000)])?])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Decimal128(None, p, s));
+        assert!(acc.failed);
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_decimal_merge_ok_and_failure_propagation() -> Result<()> {
+        let p = 10u8;
+        let s = 2i8;
+
+        let mut p_ok =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        p_ok.update_batch(&[dec128(p, s, vec![Some(100), Some(200)])?])?;
+        let s_ok = p_ok
+            .state()?
+            .into_iter()
+            .map(|sv| sv.to_array())
+            .collect::<Result<Vec<_>>>()?;
+
+        let mut p_fail =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        p_fail.update_batch(&[dec128(p, s, vec![Some(i128::MAX), Some(1)])?])?;
+        let s_fail = p_fail
+            .state()?
+            .into_iter()
+            .map(|sv| sv.to_array())
+            .collect::<Result<Vec<_>>>()?;
+
+        let mut final_acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(p, s));
+        final_acc.merge_batch(&s_ok)?;
+        final_acc.merge_batch(&s_fail)?;
+
+        assert!(final_acc.failed);
+        assert_eq!(final_acc.evaluate()?, ScalarValue::Decimal128(None, p, s));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_int_overflow_sets_failed() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        // i64::MAX + 1 => overflow => failed => result NULL
+        acc.update_batch(&[int64(vec![Some(i64::MAX), Some(1)])])?;
+        let out = acc.evaluate()?;
+        assert_eq!(out, ScalarValue::Int64(None));
+        assert!(acc.failed);
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_int_negative_overflow_sets_failed() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        // i64::MIN - 1 → overflow negative
+        acc.update_batch(&[int64(vec![Some(i64::MIN), Some(-1)])])?;
+        assert_eq!(acc.evaluate()?, ScalarValue::Int64(None));
+        assert!(acc.failed);
+        Ok(())
+    }
+
+    // -------- state + merge_batch --------
+
+    #[test]
+    fn try_sum_state_two_fields_and_merge_ok() -> Result<()> {
+        // acumulador 1 [10, 5] -> sum=15
+        let mut acc1 = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc1.update_batch(&[int64(vec![Some(10), Some(5)])])?;
+        let state1 = acc1.state()?; // [sum, failed]
+        assert_eq!(state1.len(), 2);
+
+        // acumulador 2 [20, NULL] -> sum=20
+        let mut acc2 = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc2.update_batch(&[int64(vec![Some(20), None])])?;
+        let state2 = acc2.state()?; // [sum, failed]
+
+        let state1_arrays: Vec<ArrayRef> = state1
+            .into_iter()
+            .map(|sv| sv.to_array())
+            .collect::<Result<_>>()?;
+
+        let state2_arrays: Vec<ArrayRef> = state2
+            .into_iter()
+            .map(|sv| sv.to_array())
+            .collect::<Result<_>>()?;
+
+        // final accumulator
+        let mut final_acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+
+        final_acc.merge_batch(&state1_arrays)?;
+        final_acc.merge_batch(&state2_arrays)?;
+
+        // sum total = 15 + 20 = 35
+        assert!(!final_acc.failed);
+        assert_eq!(final_acc.evaluate()?, ScalarValue::Int64(Some(35)));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_merge_propagates_failure() -> Result<()> {
+        // sum=NULL, failed=true
+        let failed_sum = Arc::new(Int64Array::from(vec![None])) as ArrayRef;
+        let failed_flag = Arc::new(BooleanArray::from(vec![Some(true)])) as ArrayRef;
+
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc.merge_batch(&[failed_sum, failed_flag])?;
+
+        assert!(acc.failed);
+        assert_eq!(acc.evaluate()?, ScalarValue::Int64(None));
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_merge_empty_partition_is_not_failure() -> Result<()> {
+        // sum=NULL, failed=false
+        let empty_sum = Arc::new(Int64Array::from(vec![None])) as ArrayRef;
+        let ok_flag = Arc::new(BooleanArray::from(vec![Some(false)])) as ArrayRef;
+
+        let mut acc = TrySumAccumulator::<Int64Type>::new(DataType::Int64);
+        acc.update_batch(&[int64(vec![Some(7), Some(8)])])?; // 15
+
+        acc.merge_batch(&[empty_sum, ok_flag])?;
+
+        assert!(!acc.failed);
+        assert_eq!(acc.evaluate()?, ScalarValue::Int64(Some(15)));
+        Ok(())
+    }
+
+    // -------- signature --------
+
+    #[test]
+    fn try_sum_return_type_matches_input() -> Result<()> {
+        let f = SparkTrySum::new();
+        assert_eq!(f.return_type(&[DataType::Int64])?, DataType::Int64);
+        assert_eq!(f.return_type(&[DataType::Float64])?, DataType::Float64);
+        Ok(())
+    }
+
+    #[test]
+    fn try_sum_state_and_evaluate_consistency() -> Result<()> {
+        let mut acc = TrySumAccumulator::<Float64Type>::new(DataType::Float64);
+        acc.update_batch(&[f64(vec![Some(1.0), Some(2.0)])])?;
+        let eval = acc.evaluate()?;
+        let state = acc.state()?;
+        assert_eq!(state[0], eval);
+        assert_eq!(state[1], ScalarValue::Boolean(Some(false)));
+        Ok(())
+    }
+
+    // -------------------------
+    // DECIMAL
+    // -------------------------
+
+    #[test]
+    fn decimal_10_2_sum_and_schema_widened() -> Result<()> {
+        // input: DECIMAL(10,2)  -> result: DECIMAL(20,2)
+        let f = SparkTrySum::new();
+        assert_eq!(
+            f.return_type(&[DataType::Decimal128(10, 2)])?,
+            DataType::Decimal128(20, 2),
+            "Spark needs +10 more digits of precision"
+        );
+
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(20, 2));
+        acc.update_batch(&[dec128(10, 2, vec![Some(123), Some(477)])?])?;
+        assert_eq!(acc.evaluate()?, ScalarValue::Decimal128(Some(600), 20, 2));
+        Ok(())
+    }
+
+    #[test]
+    fn decimal_5_0_fits_after_widening() -> Result<()> {
+        // input: DECIMAL(5,0) -> result: DECIMAL(15,0)
+        let f = SparkTrySum::new();
+        assert_eq!(
+            f.return_type(&[DataType::Decimal128(5, 0)])?,
+            DataType::Decimal128(15, 0)
+        );
+
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(15, 0));
+        acc.update_batch(&[dec128(5, 0, vec![Some(90_000), Some(20_000)])?])?;
+        assert_eq!(
+            acc.evaluate()?,
+            ScalarValue::Decimal128(Some(110_000), 15, 0)
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn decimal_38_0_max_precision_overflows_to_null() -> Result<()> {
+        let f = SparkTrySum::new();
+        assert_eq!(
+            f.return_type(&[DataType::Decimal128(38, 0)])?,
+            DataType::Decimal128(38, 0)
+        );
+        let ten_pow_38_minus_1 = {
+            let p10 = pow10_i128(38)
+                .ok_or_else(|| DataFusionError::Internal("10^38 overflow".into()))?;
+            p10 - 1
+        };
+        let mut acc =
+            TrySumAccumulator::<Decimal128Type>::new(DataType::Decimal128(38, 0));
+        acc.update_batch(&[dec128(38, 0, vec![Some(ten_pow_38_minus_1), Some(1)])?])?;
+
+        assert!(acc.failed, "need fail in overflow p=38");
+        assert_eq!(acc.evaluate()?, ScalarValue::Decimal128(None, 38, 0));
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/array/shuffle.rs b/datafusion/spark/src/function/array/shuffle.rs
index 9f345b53b89a7..eaeff6538c328 100644
--- a/datafusion/spark/src/function/array/shuffle.rs
+++ b/datafusion/spark/src/function/array/shuffle.rs
@@ -26,14 +26,16 @@ use arrow::datatypes::FieldRef;
 use datafusion_common::cast::{
     as_fixed_size_list_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
+use datafusion_common::{
+    Result, ScalarValue, exec_err, internal_err, utils::take_function_args,
+};
 use datafusion_expr::{
     ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl,
     Signature, TypeSignature, Volatility,
 };
 use rand::rng;
 use rand::rngs::StdRng;
-use rand::{seq::SliceRandom, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, seq::SliceRandom};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -87,8 +89,16 @@ impl ScalarUDFImpl for SparkShuffle {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        // Shuffle returns an array with the same type and nullability as the input
+        Ok(Arc::clone(&args.arg_fields[0]))
     }
 
     fn invoke_with_args(
@@ -263,3 +273,51 @@ fn fixed_size_array_shuffle(
         Some(nulls.into()),
     )?))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::Field;
+    use datafusion_expr::ReturnFieldArgs;
+
+    #[test]
+    fn test_shuffle_nullability() {
+        let shuffle = SparkShuffle::new();
+
+        // Test with non-nullable array
+        let non_nullable_field = Arc::new(Field::new(
+            "arr",
+            List(Arc::new(Field::new("item", DataType::Int32, true))),
+            false, // not nullable
+        ));
+
+        let result = shuffle
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_field)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // The result should not be nullable (same as input)
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), non_nullable_field.data_type());
+
+        // Test with nullable array
+        let nullable_field = Arc::new(Field::new(
+            "arr",
+            List(Arc::new(Field::new("item", DataType::Int32, true))),
+            true, // nullable
+        ));
+
+        let result = shuffle
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_field)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // The result should be nullable (same as input)
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), nullable_field.data_type());
+    }
+}
diff --git a/datafusion/spark/src/function/array/spark_array.rs b/datafusion/spark/src/function/array/spark_array.rs
index bb9665613de9b..6d9f9a1695e1b 100644
--- a/datafusion/spark/src/function/array/spark_array.rs
+++ b/datafusion/spark/src/function/array/spark_array.rs
@@ -17,19 +17,15 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::array::{
-    make_array, new_null_array, Array, ArrayData, ArrayRef, Capacities, GenericListArray,
-    MutableArrayData, NullArray, OffsetSizeTrait,
-};
-use arrow::buffer::OffsetBuffer;
+use arrow::array::{Array, ArrayRef, new_null_array};
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::{internal_err, plan_datafusion_err, plan_err, Result};
-use datafusion_expr::type_coercion::binary::comparison_coercion;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
     ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     TypeSignature, Volatility,
 };
+use datafusion_functions_nested::make_array::{array_array, coerce_types_inner};
 
 use crate::function::functions_nested_utils::make_scalar_function;
 
@@ -38,7 +34,6 @@ const ARRAY_FIELD_DEFAULT_NAME: &str = "element";
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct SparkArray {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl Default for SparkArray {
@@ -54,7 +49,6 @@ impl SparkArray {
                 vec![TypeSignature::UserDefined, TypeSignature::Nullary],
                 Volatility::Immutable,
             ),
-            aliases: vec![String::from("spark_make_array")],
         }
     }
 }
@@ -92,10 +86,6 @@ impl ScalarUDFImpl for SparkArray {
             }
         }
 
-        if expr_type.is_null() {
-            expr_type = DataType::Int32;
-        }
-
         let return_type = DataType::List(Arc::new(Field::new(
             ARRAY_FIELD_DEFAULT_NAME,
             expr_type,
@@ -115,30 +105,11 @@ impl ScalarUDFImpl for SparkArray {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let first_type = arg_types.first().ok_or_else(|| {
-            plan_datafusion_err!("Spark array function requires at least one argument")
-        })?;
-        let new_type =
-            arg_types
-                .iter()
-                .skip(1)
-                .try_fold(first_type.clone(), |acc, x| {
-                    // The coerced types found by `comparison_coercion` are not guaranteed to be
-                    // coercible for the arguments. `comparison_coercion` returns more loose
-                    // types that can be coerced to both `acc` and `x` for comparison purpose.
-                    // See `maybe_data_types` for the actual coercion.
-                    let coerced_type = comparison_coercion(&acc, x);
-                    if let Some(coerced_type) = coerced_type {
-                        Ok(coerced_type)
-                    } else {
-                        plan_err!("Coercion from {acc} to {x} failed.")
-                    }
-                })?;
-        Ok(vec![new_type; arg_types.len()])
+        coerce_types_inner(arg_types, self.name())
     }
 }
 
@@ -160,7 +131,7 @@ pub fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
         DataType::Null => {
             let length = arrays.iter().map(|a| a.len()).sum();
             // By default Int32
-            let array = new_null_array(&DataType::Int32, length);
+            let array = new_null_array(&DataType::Null, length);
             Ok(Arc::new(
                 SingleRowListArrayBuilder::new(array)
                     .with_nullable(true)
@@ -168,98 +139,6 @@ pub fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
                     .build_list_array(),
             ))
         }
-        _ => array_array::<i32>(arrays, data_type),
-    }
-}
-
-/// Convert one or more [`ArrayRef`] of the same type into a
-/// `ListArray` or 'LargeListArray' depending on the offset size.
-///
-/// # Example (non nested)
-///
-/// Calling `array(col1, col2)` where col1 and col2 are non nested
-/// would return a single new `ListArray`, where each row was a list
-/// of 2 elements:
-///
-/// ```text
-/// ┌─────────┐   ┌─────────┐           ┌──────────────┐
-/// │ ┌─────┐ │   │ ┌─────┐ │           │ ┌──────────┐ │
-/// │ │  A  │ │   │ │  X  │ │           │ │  [A, X]  │ │
-/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
-/// │ │NULL │ │   │ │  Y  │ │──────────▶│ │[NULL, Y] │ │
-/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
-/// │ │  C  │ │   │ │  Z  │ │           │ │  [C, Z]  │ │
-/// │ └─────┘ │   │ └─────┘ │           │ └──────────┘ │
-/// └─────────┘   └─────────┘           └──────────────┘
-///   col1           col2                    output
-/// ```
-///
-/// # Example (nested)
-///
-/// Calling `array(col1, col2)` where col1 and col2 are lists
-/// would return a single new `ListArray`, where each row was a list
-/// of the corresponding elements of col1 and col2.
-///
-/// ``` text
-/// ┌──────────────┐   ┌──────────────┐        ┌─────────────────────────────┐
-/// │ ┌──────────┐ │   │ ┌──────────┐ │        │ ┌────────────────────────┐  │
-/// │ │  [A, X]  │ │   │ │    []    │ │        │ │    [[A, X], []]        │  │
-/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────────┤  │
-/// │ │[NULL, Y] │ │   │ │[Q, R, S] │ │───────▶│ │ [[NULL, Y], [Q, R, S]] │  │
-/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────────│  │
-/// │ │  [C, Z]  │ │   │ │   NULL   │ │        │ │    [[C, Z], NULL]      │  │
-/// │ └──────────┘ │   │ └──────────┘ │        │ └────────────────────────┘  │
-/// └──────────────┘   └──────────────┘        └─────────────────────────────┘
-///      col1               col2                         output
-/// ```
-fn array_array<O: OffsetSizeTrait>(
-    args: &[ArrayRef],
-    data_type: DataType,
-) -> Result<ArrayRef> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return plan_err!("Array requires at least one argument");
-    }
-
-    let mut data = vec![];
-    let mut total_len = 0;
-    for arg in args {
-        let arg_data = if arg.as_any().is::<NullArray>() {
-            ArrayData::new_empty(&data_type)
-        } else {
-            arg.to_data()
-        };
-        total_len += arg_data.len();
-        data.push(arg_data);
+        _ => array_array::<i32>(arrays, data_type, ARRAY_FIELD_DEFAULT_NAME),
     }
-
-    let mut offsets: Vec<O> = Vec::with_capacity(total_len);
-    offsets.push(O::usize_as(0));
-
-    let capacity = Capacities::Array(total_len);
-    let data_ref = data.iter().collect::<Vec<_>>();
-    let mut mutable = MutableArrayData::with_capacities(data_ref, true, capacity);
-
-    let num_rows = args[0].len();
-    for row_idx in 0..num_rows {
-        for (arr_idx, arg) in args.iter().enumerate() {
-            if !arg.as_any().is::<NullArray>()
-                && !arg.is_null(row_idx)
-                && arg.is_valid(row_idx)
-            {
-                mutable.extend(arr_idx, row_idx, row_idx + 1);
-            } else {
-                mutable.extend_nulls(1);
-            }
-        }
-        offsets.push(O::usize_as(mutable.len()));
-    }
-    let data = mutable.freeze();
-
-    Ok(Arc::new(GenericListArray::<O>::try_new(
-        Arc::new(Field::new(ARRAY_FIELD_DEFAULT_NAME, data_type, true)),
-        OffsetBuffer::new(offsets.into()),
-        make_array(data),
-        None,
-    )?))
 }
diff --git a/datafusion/spark/src/function/bitmap/bitmap_count.rs b/datafusion/spark/src/function/bitmap/bitmap_count.rs
index 56a9c5edb812c..e59bc5f529317 100644
--- a/datafusion/spark/src/function/bitmap/bitmap_count.rs
+++ b/datafusion/spark/src/function/bitmap/bitmap_count.rs
@@ -19,15 +19,15 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    as_dictionary_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
-    FixedSizeBinaryArray, Int64Array, LargeBinaryArray,
+    Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
+    LargeBinaryArray, as_dictionary_array,
 };
 use arrow::datatypes::DataType::{
     Binary, BinaryView, Dictionary, FixedSizeBinary, LargeBinary,
 };
-use arrow::datatypes::{DataType, Int16Type, Int32Type, Int64Type, Int8Type};
+use arrow::datatypes::{DataType, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
     Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature,
     TypeSignatureClass, Volatility,
@@ -71,7 +71,20 @@ impl ScalarUDFImpl for BitmapCount {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Int64)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        use arrow::datatypes::Field;
+        // bitmap_count returns Int64 with the same nullability as the input
+        Ok(Arc::new(Field::new(
+            args.arg_fields[0].name(),
+            DataType::Int64,
+            args.arg_fields[0].is_nullable(),
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -205,12 +218,17 @@ mod tests {
             Box::new(ScalarValue::Binary(Some(vec![0xFFu8, 0xFFu8]))),
         ));
 
-        let arg_fields = vec![Field::new(
-            "a",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)),
-            true,
-        )
-        .into()];
+        let arg_fields = vec![
+            Field::new(
+                "a",
+                DataType::Dictionary(
+                    Box::new(DataType::Int32),
+                    Box::new(DataType::Binary),
+                ),
+                true,
+            )
+            .into(),
+        ];
         let args = ScalarFunctionArgs {
             args: vec![dict.clone()],
             arg_fields,
@@ -224,4 +242,37 @@ mod tests {
         assert_eq!(*actual.into_array(1)?, *expect.into_array(1)?);
         Ok(())
     }
+
+    #[test]
+    fn test_bitmap_count_nullability() -> Result<()> {
+        use datafusion_expr::ReturnFieldArgs;
+
+        let bitmap_count = BitmapCount::new();
+
+        // Test with non-nullable binary field
+        let non_nullable_field = Arc::new(Field::new("bin", DataType::Binary, false));
+
+        let result = bitmap_count.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&non_nullable_field)],
+            scalar_arguments: &[None],
+        })?;
+
+        // The result should not be nullable (same as input)
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &Int64);
+
+        // Test with nullable binary field
+        let nullable_field = Arc::new(Field::new("bin", DataType::Binary, true));
+
+        let result = bitmap_count.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&nullable_field)],
+            scalar_arguments: &[None],
+        })?;
+
+        // The result should be nullable (same as input)
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &Int64);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs b/datafusion/spark/src/function/bitwise/bit_count.rs
index 4b414b57cb776..00170293dc391 100644
--- a/datafusion/spark/src/function/bitwise/bit_count.rs
+++ b/datafusion/spark/src/function/bitwise/bit_count.rs
@@ -20,11 +20,11 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, AsArray, Int32Array};
 use arrow::datatypes::{
-    DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
-    UInt64Type, UInt8Type,
+    DataType, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
+    UInt32Type, UInt64Type,
 };
 use datafusion_common::cast::as_boolean_array;
-use datafusion_common::{plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
     Volatility,
@@ -77,7 +77,20 @@ impl ScalarUDFImpl for SparkBitCount {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Int32) // Spark returns int (Int32)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        use arrow::datatypes::Field;
+        // bit_count returns Int32 with the same nullability as the input
+        Ok(Arc::new(Field::new(
+            args.arg_fields[0].name(),
+            DataType::Int32,
+            args.arg_fields[0].is_nullable(),
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -102,24 +115,25 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
         DataType::Int8 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int8Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int16 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int16Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int32 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int32Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int64 => {
-            let result: Int32Array =
-                value_array.as_primitive::<Int64Type>().unary(bit_count);
+            let result: Int32Array = value_array
+                .as_primitive::<Int64Type>()
+                .unary(|v| v.count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::UInt8 => {
@@ -155,28 +169,14 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-// Here’s the equivalent Rust implementation of the bitCount function (similar to Apache Spark's bitCount for LongType)
-// Spark: https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
-// Java impl: https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
-fn bit_count(i: i64) -> i32 {
-    let mut u = i as u64;
-    u = u - ((u >> 1) & 0x5555555555555555);
-    u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
-    u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
-    u = u + (u >> 8);
-    u = u + (u >> 16);
-    u = u + (u >> 32);
-    (u as i32) & 0x7f
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use arrow::array::{
-        Array, BooleanArray, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array,
-        UInt32Array, UInt64Array, UInt8Array,
+        Array, BooleanArray, Int8Array, Int16Array, Int32Array, Int64Array, UInt8Array,
+        UInt16Array, UInt32Array, UInt64Array,
     };
-    use arrow::datatypes::Int32Type;
+    use arrow::datatypes::{Field, Int32Type};
 
     #[test]
     fn test_bit_count_basic() {
@@ -349,4 +349,37 @@ mod tests {
         assert!(arr.is_null(1));
         assert_eq!(arr.value(2), 3); // 0b111
     }
+
+    #[test]
+    fn test_bit_count_nullability() -> Result<()> {
+        use datafusion_expr::ReturnFieldArgs;
+
+        let bit_count = SparkBitCount::new();
+
+        // Test with non-nullable Int32 field
+        let non_nullable_field = Arc::new(Field::new("num", DataType::Int32, false));
+
+        let result = bit_count.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&non_nullable_field)],
+            scalar_arguments: &[None],
+        })?;
+
+        // The result should not be nullable (same as input)
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Int32);
+
+        // Test with nullable Int32 field
+        let nullable_field = Arc::new(Field::new("num", DataType::Int32, true));
+
+        let result = bit_count.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&nullable_field)],
+            scalar_arguments: &[None],
+        })?;
+
+        // The result should be nullable (same as input)
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Int32);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/bitwise/bit_get.rs b/datafusion/spark/src/function/bitwise/bit_get.rs
index a8562618cb8cb..3343c6c61de0b 100644
--- a/datafusion/spark/src/function/bitwise/bit_get.rs
+++ b/datafusion/spark/src/function/bitwise/bit_get.rs
@@ -19,25 +19,21 @@ use std::any::Any;
 use std::mem::size_of;
 use std::sync::Arc;
 
-use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
-use arrow::compute::try_binary;
-use arrow::datatypes::DataType::{
-    Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8,
-};
-use arrow::datatypes::{
-    ArrowNativeType, DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type,
+use arrow::array::{
+    Array, ArrayRef, ArrowPrimitiveType, AsArray, Int8Array, Int32Array, PrimitiveArray,
+    downcast_integer_array,
 };
-use datafusion_common::{exec_err, Result};
+use arrow::compute::try_binary;
+use arrow::datatypes::{ArrowNativeType, DataType, Field, FieldRef, Int8Type, Int32Type};
+use datafusion_common::types::{NativeType, logical_int32};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
-use crate::function::error_utils::{
-    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
-};
-
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct SparkBitGet {
     signature: Signature,
@@ -53,7 +49,17 @@ impl Default for SparkBitGet {
 impl SparkBitGet {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_exact(TypeSignatureClass::Integer),
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_int32()),
+                        vec![TypeSignatureClass::Integer],
+                        NativeType::Int32,
+                    ),
+                ],
+                Volatility::Immutable,
+            ),
             aliases: vec!["getbit".to_string()],
         }
     }
@@ -64,34 +70,6 @@ impl ScalarUDFImpl for SparkBitGet {
         self
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 2 {
-            return Err(invalid_arg_count_exec_err(
-                "bit_get",
-                (2, 2),
-                arg_types.len(),
-            ));
-        }
-        if !arg_types[0].is_integer() && !arg_types[0].is_null() {
-            return Err(unsupported_data_type_exec_err(
-                "bit_get",
-                "Integer Type",
-                &arg_types[0],
-            ));
-        }
-        if !arg_types[1].is_integer() && !arg_types[1].is_null() {
-            return Err(unsupported_data_type_exec_err(
-                "bit_get",
-                "Integer Type",
-                &arg_types[1],
-            ));
-        }
-        if arg_types[0].is_null() {
-            return Ok(vec![Int8, Int32]);
-        }
-        Ok(vec![arg_types[0].clone(), Int32])
-    }
-
     fn name(&self) -> &str {
         "bit_get"
     }
@@ -105,7 +83,13 @@ impl ScalarUDFImpl for SparkBitGet {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Int8)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        // Spark derives nullability for BinaryExpression from its children
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(self.name(), DataType::Int8, nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -115,8 +99,8 @@ impl ScalarUDFImpl for SparkBitGet {
 
 fn spark_bit_get_inner<T: ArrowPrimitiveType>(
     value: &PrimitiveArray<T>,
-    pos: &PrimitiveArray<Int32Type>,
-) -> Result<PrimitiveArray<Int8Type>> {
+    pos: &Int32Array,
+) -> Result<Int8Array> {
     let bit_length = (size_of::<T::Native>() * 8) as i32;
 
     let result: PrimitiveArray<Int8Type> = try_binary(value, pos, |value, pos| {
@@ -130,164 +114,53 @@ fn spark_bit_get_inner<T: ArrowPrimitiveType>(
     Ok(result)
 }
 
-pub fn spark_bit_get(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 2 {
-        return exec_err!("`bit_get` expects exactly two arguments");
-    }
-
-    if args[1].data_type() != &Int32 {
-        return exec_err!("`bit_get` expects Int32 as the second argument");
-    }
-
-    let pos_arg = args[1].as_primitive::<Int32Type>();
-
-    let ret = match &args[0].data_type() {
-        Int64 => {
-            let value_arg = args[0].as_primitive::<Int64Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        Int32 => {
-            let value_arg = args[0].as_primitive::<Int32Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        Int16 => {
-            let value_arg = args[0].as_primitive::<Int16Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        Int8 => {
-            let value_arg = args[0].as_primitive::<Int8Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        UInt64 => {
-            let value_arg = args[0].as_primitive::<UInt64Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        UInt32 => {
-            let value_arg = args[0].as_primitive::<UInt32Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        UInt16 => {
-            let value_arg = args[0].as_primitive::<UInt16Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        UInt8 => {
-            let value_arg = args[0].as_primitive::<UInt8Type>();
-            spark_bit_get_inner(value_arg, pos_arg)
-        }
-        _ => {
-            exec_err!(
-                "`bit_get` expects Int64, Int32, Int16, or Int8 as the first argument"
-            )
-        }
-    }?;
+fn spark_bit_get(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let [value, position] = take_function_args("bit_get", args)?;
+    let pos_arg = position.as_primitive::<Int32Type>();
+    let ret = downcast_integer_array!(
+        value => spark_bit_get_inner(value, pos_arg),
+        DataType::Null => Ok(Int8Array::new_null(value.len())),
+        d => internal_err!("Unsupported datatype for bit_get: {d}"),
+    )?;
     Ok(Arc::new(ret))
 }
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{Int32Array, Int64Array};
-
     use super::*;
+    use arrow::datatypes::Field;
 
     #[test]
-    fn test_bit_get_basic() {
-        // Test bit_get(11, 0) - 11 = 1011 in binary, bit 0 = 1
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![0])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 1);
-
-        // Test bit_get(11, 2) - 11 = 1011 in binary, bit 2 = 0
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![2])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 0);
-
-        // Test bit_get(11, 3) - 11 = 1011 in binary, bit 3 = 1
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![3])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 1);
-    }
-
-    #[test]
-    fn test_bit_get_edge_cases() {
-        // Test with 0
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![0])),
-            Arc::new(Int32Array::from(vec![0])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 0);
-
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![-1])),
-        ]);
-        assert_eq!(
-            result.unwrap_err().message().lines().next().unwrap(),
-            "Compute error: bit_get: position -1 is out of bounds. Expected pos < 64 and pos >= 0"
-        );
-
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![64])),
-        ]);
-
-        assert_eq!(
-            result.unwrap_err().message().lines().next().unwrap(),
-            "Compute error: bit_get: position 64 is out of bounds. Expected pos < 64 and pos >= 0"
-        );
-    }
-
-    #[test]
-    fn test_bit_get_null_inputs() {
-        // Test with NULL value
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![None])),
-            Arc::new(Int32Array::from(vec![0])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 0);
-
-        // Test with NULL position
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![11])),
-            Arc::new(Int32Array::from(vec![None])),
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 0);
+    fn test_bit_get_nullability_non_nullable_inputs() {
+        let func = SparkBitGet::new();
+        let value_field = Arc::new(Field::new("value", DataType::Int32, false));
+        let pos_field = Arc::new(Field::new("pos", DataType::Int32, false));
+
+        let out_field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[value_field, pos_field],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert_eq!(out_field.data_type(), &DataType::Int8);
+        assert!(!out_field.is_nullable());
     }
 
     #[test]
-    fn test_bit_get_large_numbers() {
-        // Test with larger number
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![255])), // 11111111 in binary
-            Arc::new(Int32Array::from(vec![7])),   // bit 7 = 1
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 1);
-
-        let result = spark_bit_get(&[
-            Arc::new(Int64Array::from(vec![255])), // 11111111 in binary
-            Arc::new(Int32Array::from(vec![8])),   // bit 8 = 0
-        ])
-        .unwrap();
-
-        assert_eq!(result.as_primitive::<Int8Type>().value(0), 0);
+    fn test_bit_get_nullability_nullable_inputs() {
+        let func = SparkBitGet::new();
+        let value_field = Arc::new(Field::new("value", DataType::Int32, true));
+        let pos_field = Arc::new(Field::new("pos", DataType::Int32, false));
+
+        let out_field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[value_field, pos_field],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert_eq!(out_field.data_type(), &DataType::Int8);
+        assert!(out_field.is_nullable());
     }
 }
diff --git a/datafusion/spark/src/function/bitwise/bit_shift.rs b/datafusion/spark/src/function/bitwise/bit_shift.rs
index 68911b0492c56..fc3df28e968a8 100644
--- a/datafusion/spark/src/function/bitwise/bit_shift.rs
+++ b/datafusion/spark/src/function/bitwise/bit_shift.rs
@@ -18,36 +18,34 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
+use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, Int32Array, PrimitiveArray};
 use arrow::compute;
 use arrow::datatypes::{
-    ArrowNativeType, DataType, Int32Type, Int64Type, UInt32Type, UInt64Type,
+    ArrowNativeType, DataType, Field, FieldRef, Int32Type, Int64Type, UInt32Type,
+    UInt64Type,
 };
-use datafusion_common::{plan_err, Result};
+use datafusion_common::types::{
+    NativeType, logical_int8, logical_int16, logical_int32, logical_int64, logical_uint8,
+    logical_uint16, logical_uint32, logical_uint64,
+};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignature, TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
-use crate::function::error_utils::{
-    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
-};
-
-/// Performs a bitwise left shift on each element of the `value` array by the corresponding amount in the `shift` array.
-/// The shift amount is normalized to the bit width of the type, matching Spark/Java semantics for negative and large shifts.
-///
-/// # Arguments
-/// * `value` - The array of values to shift.
-/// * `shift` - The array of shift amounts (must be Int32).
-///
-/// # Returns
-/// A new array with the shifted values.
-fn shift_left<T: ArrowPrimitiveType>(
+/// Bitwise left shift on elements in `value` by corresponding `shift` amount.
+/// The shift amount is normalized to the bit width of the type, matching Spark/Java
+/// semantics for negative and large shifts.
+fn shift_left<T>(
     value: &PrimitiveArray<T>,
-    shift: &PrimitiveArray<Int32Type>,
+    shift: &Int32Array,
 ) -> Result<PrimitiveArray<T>>
 where
-    T::Native: ArrowNativeType + std::ops::Shl<i32, Output = T::Native>,
+    T: ArrowPrimitiveType,
+    T::Native: std::ops::Shl<i32, Output = T::Native>,
 {
     let bit_num = (T::Native::get_byte_width() * 8) as i32;
     let result = compute::binary::<_, Int32Type, _, _>(
@@ -61,21 +59,16 @@ where
     Ok(result)
 }
 
-/// Performs a bitwise right shift on each element of the `value` array by the corresponding amount in the `shift` array.
-/// The shift amount is normalized to the bit width of the type, matching Spark/Java semantics for negative and large shifts.
-///
-/// # Arguments
-/// * `value` - The array of values to shift.
-/// * `shift` - The array of shift amounts (must be Int32).
-///
-/// # Returns
-/// A new array with the shifted values.
-fn shift_right<T: ArrowPrimitiveType>(
+/// Bitwise right shift on elements in `value` by corresponding `shift` amount.
+/// The shift amount is normalized to the bit width of the type, matching Spark/Java
+/// semantics for negative and large shifts.
+fn shift_right<T>(
     value: &PrimitiveArray<T>,
-    shift: &PrimitiveArray<Int32Type>,
+    shift: &Int32Array,
 ) -> Result<PrimitiveArray<T>>
 where
-    T::Native: ArrowNativeType + std::ops::Shr<i32, Output = T::Native>,
+    T: ArrowPrimitiveType,
+    T::Native: std::ops::Shr<i32, Output = T::Native>,
 {
     let bit_num = (T::Native::get_byte_width() * 8) as i32;
     let result = compute::binary::<_, Int32Type, _, _>(
@@ -93,49 +86,44 @@ where
 /// This is used to mimic Java's `>>>` operator, which does not exist in Rust.
 /// For unsigned types, this is just the normal right shift.
 /// For signed types, this casts to the unsigned type, shifts, then casts back.
-trait UShr<Rhs> {
-    fn ushr(self, rhs: Rhs) -> Self;
+trait UShr {
+    fn ushr(self, rhs: i32) -> Self;
 }
 
-impl UShr<i32> for u32 {
+impl UShr for u32 {
     fn ushr(self, rhs: i32) -> Self {
         self >> rhs
     }
 }
 
-impl UShr<i32> for u64 {
+impl UShr for u64 {
     fn ushr(self, rhs: i32) -> Self {
         self >> rhs
     }
 }
 
-impl UShr<i32> for i32 {
+impl UShr for i32 {
     fn ushr(self, rhs: i32) -> Self {
         ((self as u32) >> rhs) as i32
     }
 }
 
-impl UShr<i32> for i64 {
+impl UShr for i64 {
     fn ushr(self, rhs: i32) -> Self {
         ((self as u64) >> rhs) as i64
     }
 }
 
-/// Performs a bitwise unsigned right shift on each element of the `value` array by the corresponding amount in the `shift` array.
-/// The shift amount is normalized to the bit width of the type, matching Spark/Java semantics for negative and large shifts.
-///
-/// # Arguments
-/// * `value` - The array of values to shift.
-/// * `shift` - The array of shift amounts (must be Int32).
-///
-/// # Returns
-/// A new array with the shifted values.
-fn shift_right_unsigned<T: ArrowPrimitiveType>(
+/// Bitwise unsigned right shift on elements in `value` by corresponding `shift`
+/// amount. The shift amount is normalized to the bit width of the type, matching
+/// Spark/Java semantics for negative and large shifts.
+fn shift_right_unsigned<T>(
     value: &PrimitiveArray<T>,
-    shift: &PrimitiveArray<Int32Type>,
+    shift: &Int32Array,
 ) -> Result<PrimitiveArray<T>>
 where
-    T::Native: ArrowNativeType + UShr<i32>,
+    T: ArrowPrimitiveType,
+    T::Native: UShr,
 {
     let bit_num = (T::Native::get_byte_width() * 8) as i32;
     let result = compute::binary::<_, Int32Type, _, _>(
@@ -149,289 +137,159 @@ where
     Ok(result)
 }
 
-trait BitShiftUDF: ScalarUDFImpl {
-    fn shift<T: ArrowPrimitiveType>(
-        &self,
-        value: &PrimitiveArray<T>,
-        shift: &PrimitiveArray<Int32Type>,
-    ) -> Result<PrimitiveArray<T>>
-    where
-        T::Native: ArrowNativeType
-            + std::ops::Shl<i32, Output = T::Native>
-            + std::ops::Shr<i32, Output = T::Native>
-            + UShr<i32>;
-
-    fn spark_shift(&self, arrays: &[ArrayRef]) -> Result<ArrayRef> {
-        let value_array = arrays[0].as_ref();
-        let shift_array = arrays[1].as_ref();
-
-        // Ensure shift array is Int32
-        let shift_array = if shift_array.data_type() != &DataType::Int32 {
-            return plan_err!("{} shift amount must be Int32", self.name());
-        } else {
-            shift_array.as_primitive::<Int32Type>()
-        };
-
-        match value_array.data_type() {
-            DataType::Int32 => {
-                let value_array = value_array.as_primitive::<Int32Type>();
-                Ok(Arc::new(self.shift(value_array, shift_array)?))
-            }
-            DataType::Int64 => {
-                let value_array = value_array.as_primitive::<Int64Type>();
-                Ok(Arc::new(self.shift(value_array, shift_array)?))
-            }
-            DataType::UInt32 => {
-                let value_array = value_array.as_primitive::<UInt32Type>();
-                Ok(Arc::new(self.shift(value_array, shift_array)?))
-            }
-            DataType::UInt64 => {
-                let value_array = value_array.as_primitive::<UInt64Type>();
-                Ok(Arc::new(self.shift(value_array, shift_array)?))
-            }
-            _ => {
-                plan_err!(
-                    "{} function does not support data type: {}",
-                    self.name(),
-                    value_array.data_type()
-                )
-            }
-        }
-    }
-}
-
-fn bit_shift_coerce_types(arg_types: &[DataType], func: &str) -> Result<Vec<DataType>> {
-    if arg_types.len() != 2 {
-        return Err(invalid_arg_count_exec_err(func, (2, 2), arg_types.len()));
-    }
-    if !arg_types[0].is_integer() && !arg_types[0].is_null() {
-        return Err(unsupported_data_type_exec_err(
-            func,
-            "Integer Type",
-            &arg_types[0],
-        ));
-    }
-    if !arg_types[1].is_integer() && !arg_types[1].is_null() {
-        return Err(unsupported_data_type_exec_err(
-            func,
-            "Integer Type",
-            &arg_types[1],
-        ));
-    }
-
-    // Coerce smaller integer types to Int32
-    let coerced_first = match &arg_types[0] {
-        DataType::Int8 | DataType::Int16 | DataType::Null => DataType::Int32,
-        DataType::UInt8 | DataType::UInt16 => DataType::UInt32,
-        _ => arg_types[0].clone(),
-    };
+fn shift_inner(
+    arrays: &[ArrayRef],
+    name: &str,
+    bit_shift_type: BitShiftType,
+) -> Result<ArrayRef> {
+    let [value_array, shift_array] = take_function_args(name, arrays)?;
+    let shift_array = shift_array.as_primitive::<Int32Type>();
 
-    Ok(vec![coerced_first, DataType::Int32])
-}
-
-#[derive(Debug, Hash, Eq, PartialEq)]
-pub struct SparkShiftLeft {
-    signature: Signature,
-}
-
-impl Default for SparkShiftLeft {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SparkShiftLeft {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-        }
-    }
-}
-
-impl BitShiftUDF for SparkShiftLeft {
-    fn shift<T: ArrowPrimitiveType>(
-        &self,
+    fn shift<T>(
         value: &PrimitiveArray<T>,
-        shift: &PrimitiveArray<Int32Type>,
+        shift: &Int32Array,
+        bit_shift_type: BitShiftType,
     ) -> Result<PrimitiveArray<T>>
     where
-        T::Native: ArrowNativeType
-            + std::ops::Shl<i32, Output = T::Native>
+        T: ArrowPrimitiveType,
+        T::Native: std::ops::Shl<i32, Output = T::Native>
             + std::ops::Shr<i32, Output = T::Native>
-            + UShr<i32>,
+            + UShr,
     {
-        shift_left(value, shift)
-    }
-}
-
-impl ScalarUDFImpl for SparkShiftLeft {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "shiftleft"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        bit_shift_coerce_types(arg_types, "shiftleft")
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.len() != 2 {
-            return plan_err!("shiftleft expects exactly 2 arguments");
+        match bit_shift_type {
+            BitShiftType::Left => shift_left(value, shift),
+            BitShiftType::Right => shift_right(value, shift),
+            BitShiftType::RightUnsigned => shift_right_unsigned(value, shift),
         }
-        // Return type is the same as the first argument (the value to shift)
-        Ok(arg_types[0].clone())
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if args.args.len() != 2 {
-            return plan_err!("shiftleft expects exactly 2 arguments");
+    match value_array.data_type() {
+        DataType::Int32 => {
+            let value_array = value_array.as_primitive::<Int32Type>();
+            Ok(Arc::new(shift(value_array, shift_array, bit_shift_type)?))
+        }
+        DataType::Int64 => {
+            let value_array = value_array.as_primitive::<Int64Type>();
+            Ok(Arc::new(shift(value_array, shift_array, bit_shift_type)?))
+        }
+        DataType::UInt32 => {
+            let value_array = value_array.as_primitive::<UInt32Type>();
+            Ok(Arc::new(shift(value_array, shift_array, bit_shift_type)?))
+        }
+        DataType::UInt64 => {
+            let value_array = value_array.as_primitive::<UInt64Type>();
+            Ok(Arc::new(shift(value_array, shift_array, bit_shift_type)?))
+        }
+        dt => {
+            internal_err!("{name} function does not support data type: {dt}")
         }
-        let inner = |arr: &[ArrayRef]| -> Result<ArrayRef> { self.spark_shift(arr) };
-        make_scalar_function(inner, vec![])(&args.args)
     }
 }
 
-#[derive(Debug, Hash, Eq, PartialEq)]
-pub struct SparkShiftRightUnsigned {
-    signature: Signature,
+#[derive(Debug, Hash, Copy, Clone, Eq, PartialEq)]
+enum BitShiftType {
+    Left,
+    Right,
+    RightUnsigned,
 }
 
-impl Default for SparkShiftRightUnsigned {
-    fn default() -> Self {
-        Self::new()
-    }
+#[derive(Debug, Hash, Eq, PartialEq)]
+pub struct SparkBitShift {
+    signature: Signature,
+    name: &'static str,
+    bit_shift_type: BitShiftType,
 }
 
-impl SparkShiftRightUnsigned {
-    pub fn new() -> Self {
+impl SparkBitShift {
+    fn new(name: &'static str, bit_shift_type: BitShiftType) -> Self {
+        let shift_amount = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int32,
+        );
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-        }
-    }
-}
-
-impl BitShiftUDF for SparkShiftRightUnsigned {
-    fn shift<T: ArrowPrimitiveType>(
-        &self,
-        value: &PrimitiveArray<T>,
-        shift: &PrimitiveArray<Int32Type>,
-    ) -> Result<PrimitiveArray<T>>
-    where
-        T::Native: ArrowNativeType
-            + std::ops::Shl<i32, Output = T::Native>
-            + std::ops::Shr<i32, Output = T::Native>
-            + UShr<i32>,
-    {
-        shift_right_unsigned(value, shift)
-    }
-}
-
-impl ScalarUDFImpl for SparkShiftRightUnsigned {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "shiftrightunsigned"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        bit_shift_coerce_types(arg_types, "shiftrightunsigned")
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.len() != 2 {
-            return plan_err!("shiftrightunsigned expects exactly 2 arguments");
+            signature: Signature::one_of(
+                vec![
+                    // Upcast small ints to 32bit
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_implicit(
+                            TypeSignatureClass::Native(logical_int32()),
+                            vec![
+                                TypeSignatureClass::Native(logical_int8()),
+                                TypeSignatureClass::Native(logical_int16()),
+                            ],
+                            NativeType::Int32,
+                        ),
+                        shift_amount.clone(),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_implicit(
+                            TypeSignatureClass::Native(logical_uint32()),
+                            vec![
+                                TypeSignatureClass::Native(logical_uint8()),
+                                TypeSignatureClass::Native(logical_uint16()),
+                            ],
+                            NativeType::UInt32,
+                        ),
+                        shift_amount.clone(),
+                    ]),
+                    // Otherwise accept direct 64 bit integers
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
+                        shift_amount.clone(),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_uint64())),
+                        shift_amount.clone(),
+                    ]),
+                ],
+                Volatility::Immutable,
+            ),
+            name,
+            bit_shift_type,
         }
-        // Return type is the same as the first argument (the value to shift)
-        Ok(arg_types[0].clone())
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if args.args.len() != 2 {
-            return plan_err!("shiftrightunsigned expects exactly 2 arguments");
-        }
-        let inner = |arr: &[ArrayRef]| -> Result<ArrayRef> { self.spark_shift(arr) };
-        make_scalar_function(inner, vec![])(&args.args)
+    pub fn left() -> Self {
+        Self::new("shiftleft", BitShiftType::Left)
     }
-}
-
-#[derive(Debug, Hash, Eq, PartialEq)]
-pub struct SparkShiftRight {
-    signature: Signature,
-}
-
-impl Default for SparkShiftRight {
-    fn default() -> Self {
-        Self::new()
-    }
-}
 
-impl SparkShiftRight {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-        }
+    pub fn right() -> Self {
+        Self::new("shiftright", BitShiftType::Right)
     }
-}
 
-impl BitShiftUDF for SparkShiftRight {
-    fn shift<T: ArrowPrimitiveType>(
-        &self,
-        value: &PrimitiveArray<T>,
-        shift: &PrimitiveArray<Int32Type>,
-    ) -> Result<PrimitiveArray<T>>
-    where
-        T::Native: ArrowNativeType
-            + std::ops::Shl<i32, Output = T::Native>
-            + std::ops::Shr<i32, Output = T::Native>
-            + UShr<i32>,
-    {
-        shift_right(value, shift)
+    pub fn right_unsigned() -> Self {
+        Self::new("shiftrightunsigned", BitShiftType::RightUnsigned)
     }
 }
 
-impl ScalarUDFImpl for SparkShiftRight {
+impl ScalarUDFImpl for SparkBitShift {
     fn as_any(&self) -> &dyn Any {
         self
     }
 
     fn name(&self) -> &str {
-        "shiftright"
+        self.name
     }
 
     fn signature(&self) -> &Signature {
         &self.signature
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        bit_shift_coerce_types(arg_types, "shiftright")
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.len() != 2 {
-            return plan_err!("shiftright expects exactly 2 arguments");
-        }
-        // Return type is the same as the first argument (the value to shift)
-        Ok(arg_types[0].clone())
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        let data_type = args.arg_fields[0].data_type().clone();
+        Ok(Arc::new(Field::new(self.name(), data_type, nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if args.args.len() != 2 {
-            return plan_err!("shiftright expects exactly 2 arguments");
-        }
-        let inner = |arr: &[ArrayRef]| -> Result<ArrayRef> { self.spark_shift(arr) };
+        let inner = |arr: &[ArrayRef]| -> Result<ArrayRef> {
+            shift_inner(arr, self.name(), self.bit_shift_type)
+        };
         make_scalar_function(inner, vec![])(&args.args)
     }
 }
@@ -439,299 +297,54 @@ impl ScalarUDFImpl for SparkShiftRight {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::{Array, Int32Array, Int64Array, UInt32Array, UInt64Array};
-
-    #[test]
-    fn test_shift_right_unsigned_int32() {
-        let value_array = Arc::new(Int32Array::from(vec![4, 8, 16, 32]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >>> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >>> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >>> 3 = 2
-        assert_eq!(arr.value(3), 2); // 32 >>> 4 = 2
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_int64() {
-        let value_array = Arc::new(Int64Array::from(vec![4i64, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int64Type>();
-        assert_eq!(arr.value(0), 2); // 4 >>> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >>> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >>> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_uint32() {
-        let value_array = Arc::new(UInt32Array::from(vec![4u32, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >>> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >>> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >>> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_uint64() {
-        let value_array = Arc::new(UInt64Array::from(vec![4u64, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt64Type>();
-        assert_eq!(arr.value(0), 2); // 4 >>> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >>> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >>> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_nulls() {
-        let value_array = Arc::new(Int32Array::from(vec![Some(4), None, Some(8)]));
-        let shift_array = Arc::new(Int32Array::from(vec![Some(1), Some(2), None]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >>> 1 = 2
-        assert!(arr.is_null(1)); // null >>> 2 = null
-        assert!(arr.is_null(2)); // 8 >>> null = null
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_negative_shift() {
-        let value_array = Arc::new(Int32Array::from(vec![4, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![-1, -2, -3]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 0); // 4 >>> -1 = 0
-        assert_eq!(arr.value(1), 0); // 8 >>> -2 = 0
-        assert_eq!(arr.value(2), 0); // 16 >>> -3 = 0
-    }
-
-    #[test]
-    fn test_shift_right_unsigned_negative_values() {
-        let value_array = Arc::new(Int32Array::from(vec![-4, -8, -16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRightUnsigned::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        // For unsigned right shift, negative values are treated as large positive values
-        // -4 as u32 = 4294967292, -4 >>> 1 = 2147483646
-        assert_eq!(arr.value(0), 2147483646);
-        // -8 as u32 = 4294967288, -8 >>> 2 = 1073741822
-        assert_eq!(arr.value(1), 1073741822);
-        // -16 as u32 = 4294967280, -16 >>> 3 = 536870910
-        assert_eq!(arr.value(2), 536870910);
-    }
-
-    #[test]
-    fn test_shift_right_int32() {
-        let value_array = Arc::new(Int32Array::from(vec![4, 8, 16, 32]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >> 3 = 2
-        assert_eq!(arr.value(3), 2); // 32 >> 4 = 2
-    }
-
-    #[test]
-    fn test_shift_right_int64() {
-        let value_array = Arc::new(Int64Array::from(vec![4i64, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int64Type>();
-        assert_eq!(arr.value(0), 2); // 4 >> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_uint32() {
-        let value_array = Arc::new(UInt32Array::from(vec![4u32, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_uint64() {
-        let value_array = Arc::new(UInt64Array::from(vec![4u64, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt64Type>();
-        assert_eq!(arr.value(0), 2); // 4 >> 1 = 2
-        assert_eq!(arr.value(1), 2); // 8 >> 2 = 2
-        assert_eq!(arr.value(2), 2); // 16 >> 3 = 2
-    }
-
-    #[test]
-    fn test_shift_right_nulls() {
-        let value_array = Arc::new(Int32Array::from(vec![Some(4), None, Some(8)]));
-        let shift_array = Arc::new(Int32Array::from(vec![Some(1), Some(2), None]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 2); // 4 >> 1 = 2
-        assert!(arr.is_null(1)); // null >> 2 = null
-        assert!(arr.is_null(2)); // 8 >> null = null
-    }
-
-    #[test]
-    fn test_shift_right_large_shift() {
-        let value_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let shift_array = Arc::new(Int32Array::from(vec![32, 33, 64]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 1); // 1 >> 32 = 1
-        assert_eq!(arr.value(1), 1); // 2 >> 33 = 1
-        assert_eq!(arr.value(2), 3); // 3 >> 64 = 3
-    }
-
-    #[test]
-    fn test_shift_right_negative_shift() {
-        let value_array = Arc::new(Int32Array::from(vec![4, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![-1, -2, -3]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 0); // 4 >> -1 = 0
-        assert_eq!(arr.value(1), 0); // 8 >> -2 = 0
-        assert_eq!(arr.value(2), 0); // 16 >> -3 = 0
-    }
-
-    #[test]
-    fn test_shift_right_negative_values() {
-        let value_array = Arc::new(Int32Array::from(vec![-4, -8, -16]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftRight::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        // For signed integers, right shift preserves the sign bit
-        assert_eq!(arr.value(0), -2); // -4 >> 1 = -2
-        assert_eq!(arr.value(1), -2); // -8 >> 2 = -2
-        assert_eq!(arr.value(2), -2); // -16 >> 3 = -2
-    }
-
-    #[test]
-    fn test_shift_left_int32() {
-        let value_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 2); // 1 << 1 = 2
-        assert_eq!(arr.value(1), 8); // 2 << 2 = 8
-        assert_eq!(arr.value(2), 24); // 3 << 3 = 24
-        assert_eq!(arr.value(3), 64); // 4 << 4 = 64
-    }
-
-    #[test]
-    fn test_shift_left_int64() {
-        let value_array = Arc::new(Int64Array::from(vec![1i64, 2, 3]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int64Type>();
-        assert_eq!(arr.value(0), 2); // 1 << 1 = 2
-        assert_eq!(arr.value(1), 8); // 2 << 2 = 8
-        assert_eq!(arr.value(2), 24); // 3 << 3 = 24
-    }
-
-    #[test]
-    fn test_shift_left_uint32() {
-        let value_array = Arc::new(UInt32Array::from(vec![1u32, 2, 3]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt32Type>();
-        assert_eq!(arr.value(0), 2); // 1 << 1 = 2
-        assert_eq!(arr.value(1), 8); // 2 << 2 = 8
-        assert_eq!(arr.value(2), 24); // 3 << 3 = 24
-    }
-
-    #[test]
-    fn test_shift_left_uint64() {
-        let value_array = Arc::new(UInt64Array::from(vec![1u64, 2, 3]));
-        let shift_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<UInt64Type>();
-        assert_eq!(arr.value(0), 2); // 1 << 1 = 2
-        assert_eq!(arr.value(1), 8); // 2 << 2 = 8
-        assert_eq!(arr.value(2), 24); // 3 << 3 = 24
-    }
-
-    #[test]
-    fn test_shift_left_nulls() {
-        let value_array = Arc::new(Int32Array::from(vec![Some(2), None, Some(3)]));
-        let shift_array = Arc::new(Int32Array::from(vec![Some(1), Some(2), None]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 4); // 2 << 1 = 4
-        assert!(arr.is_null(1)); // null << 2 = null
-        assert!(arr.is_null(2)); // 3 << null = null
-    }
-
-    #[test]
-    fn test_shift_left_large_shift() {
-        let value_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        let shift_array = Arc::new(Int32Array::from(vec![32, 33, 64]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 1); // 1 << 32 = 0 (overflow)
-        assert_eq!(arr.value(1), 4); // 2 << 33 = 0 (overflow)
-        assert_eq!(arr.value(2), 3); // 3 << 64 = 0 (overflow)
-    }
+    use arrow::datatypes::Field;
+    use datafusion_expr::ReturnFieldArgs;
 
     #[test]
-    fn test_shift_left_negative_shift() {
-        let value_array = Arc::new(Int32Array::from(vec![4, 8, 16]));
-        let shift_array = Arc::new(Int32Array::from(vec![-1, -2, -3]));
-        let result = SparkShiftLeft::new()
-            .spark_shift(&[value_array, shift_array])
-            .unwrap();
-        let arr = result.as_primitive::<Int32Type>();
-        assert_eq!(arr.value(0), 0); // 4 << -1 = 0
-        assert_eq!(arr.value(1), 0); // 8 << -2 = 0
-        assert_eq!(arr.value(2), 0); // 16 << -3 = 0
+    fn test_bit_shift_nullability() -> Result<()> {
+        let func = SparkBitShift::left();
+
+        let non_nullable_value: FieldRef =
+            Arc::new(Field::new("value", DataType::Int64, false));
+        let non_nullable_shift: FieldRef =
+            Arc::new(Field::new("shift", DataType::Int32, false));
+
+        let out = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[
+                Arc::clone(&non_nullable_value),
+                Arc::clone(&non_nullable_shift),
+            ],
+            scalar_arguments: &[None, None],
+        })?;
+
+        assert_eq!(out.data_type(), non_nullable_value.data_type());
+        assert!(
+            !out.is_nullable(),
+            "shift result should be non-nullable when both inputs are non-nullable"
+        );
+
+        let nullable_value: FieldRef =
+            Arc::new(Field::new("value", DataType::Int64, true));
+        let out_nullable_value = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&nullable_value), Arc::clone(&non_nullable_shift)],
+            scalar_arguments: &[None, None],
+        })?;
+        assert!(
+            out_nullable_value.is_nullable(),
+            "shift result should be nullable when value is nullable"
+        );
+
+        let nullable_shift: FieldRef =
+            Arc::new(Field::new("shift", DataType::Int32, true));
+        let out_nullable_shift = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[non_nullable_value, nullable_shift],
+            scalar_arguments: &[None, None],
+        })?;
+        assert!(
+            out_nullable_shift.is_nullable(),
+            "shift result should be nullable when shift is nullable"
+        );
+
+        Ok(())
     }
 }
diff --git a/datafusion/spark/src/function/bitwise/bitwise_not.rs b/datafusion/spark/src/function/bitwise/bitwise_not.rs
index 2f3fe227833b0..5f8cf36911f43 100644
--- a/datafusion/spark/src/function/bitwise/bitwise_not.rs
+++ b/datafusion/spark/src/function/bitwise/bitwise_not.rs
@@ -15,12 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::*;
 use arrow::compute::kernels::bitwise;
-use arrow::datatypes::{Int16Type, Int32Type, Int64Type, Int8Type};
-use arrow::{array::*, datatypes::DataType};
-use datafusion_common::{plan_err, Result};
+use arrow::datatypes::{
+    DataType, Field, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type,
+};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_expr::{ColumnarValue, TypeSignature, Volatility};
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
+use datafusion_expr::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature};
 use datafusion_functions::utils::make_scalar_function;
 use std::{any::Any, sync::Arc};
 
@@ -64,8 +66,32 @@ impl ScalarUDFImpl for SparkBitwiseNot {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!(
+            "SparkBitwiseNot: return_type() is not used; return_field_from_args() is implemented"
+        )
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        if args.arg_fields.len() != 1 {
+            return plan_err!("bitwise_not expects exactly 1 argument");
+        }
+
+        let input_field = &args.arg_fields[0];
+
+        let out_dt = input_field.data_type().clone();
+        let mut out_nullable = input_field.is_nullable();
+
+        let scalar_null_present = args
+            .scalar_arguments
+            .iter()
+            .any(|opt_s| opt_s.is_some_and(|sv| sv.is_null()));
+
+        if scalar_null_present {
+            out_nullable = true;
+        }
+
+        Ok(Arc::new(Field::new(self.name(), out_dt, out_nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -107,3 +133,95 @@ pub fn spark_bitwise_not(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::{DataType, Field};
+    use std::sync::Arc;
+
+    use datafusion_expr::ReturnFieldArgs;
+
+    #[test]
+    fn test_bitwise_not_nullability() {
+        let bitwise_not = SparkBitwiseNot::new();
+
+        // --- non-nullable Int32 input ---
+        let non_nullable_i32 = Arc::new(Field::new("c", DataType::Int32, false));
+        let out_non_null = bitwise_not
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_i32)],
+                // single-argument function -> one scalar_argument slot (None)
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // result should be non-nullable and the same DataType as input
+        assert!(!out_non_null.is_nullable());
+        assert_eq!(out_non_null.data_type(), &DataType::Int32);
+
+        // --- nullable Int32 input ---
+        let nullable_i32 = Arc::new(Field::new("c", DataType::Int32, true));
+        let out_nullable = bitwise_not
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_i32)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // result should be nullable and the same DataType as input
+        assert!(out_nullable.is_nullable());
+        assert_eq!(out_nullable.data_type(), &DataType::Int32);
+
+        // --- also test another integer type (Int64) for completeness ---
+        let non_nullable_i64 = Arc::new(Field::new("c", DataType::Int64, false));
+        let out_i64 = bitwise_not
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_i64)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        assert!(!out_i64.is_nullable());
+        assert_eq!(out_i64.data_type(), &DataType::Int64);
+
+        let nullable_i64 = Arc::new(Field::new("c", DataType::Int64, true));
+        let out_i64_null = bitwise_not
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_i64)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        assert!(out_i64_null.is_nullable());
+        assert_eq!(out_i64_null.data_type(), &DataType::Int64);
+    }
+
+    #[test]
+    fn test_bitwise_not_nullability_with_null_scalar() -> Result<()> {
+        use arrow::datatypes::{DataType, Field};
+        use datafusion_common::ScalarValue;
+        use std::sync::Arc;
+
+        let func = SparkBitwiseNot::new();
+
+        let non_nullable: FieldRef = Arc::new(Field::new("col", DataType::Int32, false));
+
+        let out = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&non_nullable)],
+            scalar_arguments: &[None],
+        })?;
+        assert!(!out.is_nullable());
+        assert_eq!(out.data_type(), &DataType::Int32);
+
+        let null_scalar = ScalarValue::Int32(None);
+        let out_with_null_scalar = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&non_nullable)],
+            scalar_arguments: &[Some(&null_scalar)],
+        })?;
+        assert!(out_with_null_scalar.is_nullable());
+        assert_eq!(out_with_null_scalar.data_type(), &DataType::Int32);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/bitwise/mod.rs b/datafusion/spark/src/function/bitwise/mod.rs
index d729a3ddd09a1..769ecf5c2fef5 100644
--- a/datafusion/spark/src/function/bitwise/mod.rs
+++ b/datafusion/spark/src/function/bitwise/mod.rs
@@ -24,9 +24,21 @@ use datafusion_expr::ScalarUDF;
 use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
-make_udf_function!(bit_shift::SparkShiftLeft, shiftleft);
-make_udf_function!(bit_shift::SparkShiftRight, shiftright);
-make_udf_function!(bit_shift::SparkShiftRightUnsigned, shiftrightunsigned);
+make_udf_function!(
+    bit_shift::SparkBitShift,
+    shiftleft,
+    bit_shift::SparkBitShift::left
+);
+make_udf_function!(
+    bit_shift::SparkBitShift,
+    shiftright,
+    bit_shift::SparkBitShift::right
+);
+make_udf_function!(
+    bit_shift::SparkBitShift,
+    shiftrightunsigned,
+    bit_shift::SparkBitShift::right_unsigned
+);
 make_udf_function!(bit_get::SparkBitGet, bit_get);
 make_udf_function!(bit_count::SparkBitCount, bit_count);
 make_udf_function!(bitwise_not::SparkBitwiseNot, bitwise_not);
diff --git a/datafusion/spark/src/function/conditional/if.rs b/datafusion/spark/src/function/conditional/if.rs
index aee43dd8d0a58..906b0bc312f2f 100644
--- a/datafusion/spark/src/function/conditional/if.rs
+++ b/datafusion/spark/src/function/conditional/if.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{Result, internal_err, plan_err};
 use datafusion_expr::{
-    binary::try_type_union_resolution, simplify::ExprSimplifyResult, when, ColumnarValue,
-    Expr, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    binary::try_type_union_resolution, simplify::ExprSimplifyResult, when,
 };
 
 #[derive(Debug, PartialEq, Eq, Hash)]
diff --git a/datafusion/spark/src/function/datetime/date_add.rs b/datafusion/spark/src/function/datetime/date_add.rs
index a00430febcdb0..b176f51ae6b32 100644
--- a/datafusion/spark/src/function/datetime/date_add.rs
+++ b/datafusion/spark/src/function/datetime/date_add.rs
@@ -20,15 +20,16 @@ use std::sync::Arc;
 
 use arrow::array::ArrayRef;
 use arrow::compute;
-use arrow::datatypes::{DataType, Date32Type};
+use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
 use arrow::error::ArrowError;
 use datafusion_common::cast::{
-    as_date32_array, as_int16_array, as_int32_array, as_int8_array,
+    as_date32_array, as_int8_array, as_int16_array, as_int32_array,
 };
-use datafusion_common::{internal_err, Result};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
-    Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -78,7 +79,21 @@ impl ScalarUDFImpl for SparkDateAdd {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Date32)
+        internal_err!("Use return_field_from_args in this case instead.")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
+            || args
+                .scalar_arguments
+                .iter()
+                .any(|arg| matches!(arg, Some(sv) if sv.is_null()));
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Date32,
+            nullable,
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -87,12 +102,7 @@ impl ScalarUDFImpl for SparkDateAdd {
 }
 
 fn spark_date_add(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let [date_arg, days_arg] = args else {
-        return internal_err!(
-            "Spark `date_add` function requires 2 arguments, got {}",
-            args.len()
-        );
-    };
+    let [date_arg, days_arg] = take_function_args("date_add", args)?;
     let date_array = as_date32_array(date_arg)?;
     let result = match days_arg.data_type() {
         DataType::Int8 => {
@@ -140,3 +150,69 @@ fn spark_date_add(args: &[ArrayRef]) -> Result<ArrayRef> {
     };
     Ok(Arc::new(result))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::Field;
+    use datafusion_common::ScalarValue;
+
+    #[test]
+    fn test_date_add_non_nullable_inputs() {
+        let func = SparkDateAdd::new();
+        let args = &[
+            Arc::new(Field::new("date", DataType::Date32, false)),
+            Arc::new(Field::new("num", DataType::Int8, false)),
+        ];
+
+        let ret_field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: args,
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert_eq!(ret_field.data_type(), &DataType::Date32);
+        assert!(!ret_field.is_nullable());
+    }
+
+    #[test]
+    fn test_date_add_nullable_inputs() {
+        let func = SparkDateAdd::new();
+        let args = &[
+            Arc::new(Field::new("date", DataType::Date32, false)),
+            Arc::new(Field::new("num", DataType::Int16, true)),
+        ];
+
+        let ret_field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: args,
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert_eq!(ret_field.data_type(), &DataType::Date32);
+        assert!(ret_field.is_nullable());
+    }
+
+    #[test]
+    fn test_date_add_null_scalar() {
+        let func = SparkDateAdd::new();
+        let args = &[
+            Arc::new(Field::new("date", DataType::Date32, false)),
+            Arc::new(Field::new("num", DataType::Int32, false)),
+        ];
+
+        let null_scalar = ScalarValue::Int32(None);
+
+        let ret_field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: args,
+                scalar_arguments: &[None, Some(&null_scalar)],
+            })
+            .unwrap();
+
+        assert_eq!(ret_field.data_type(), &DataType::Date32);
+        assert!(ret_field.is_nullable());
+    }
+}
diff --git a/datafusion/spark/src/function/datetime/date_sub.rs b/datafusion/spark/src/function/datetime/date_sub.rs
index a3b26661d196c..7e56670f17d22 100644
--- a/datafusion/spark/src/function/datetime/date_sub.rs
+++ b/datafusion/spark/src/function/datetime/date_sub.rs
@@ -20,15 +20,15 @@ use std::sync::Arc;
 
 use arrow::array::ArrayRef;
 use arrow::compute;
-use arrow::datatypes::{DataType, Date32Type};
+use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
 use arrow::error::ArrowError;
 use datafusion_common::cast::{
-    as_date32_array, as_int16_array, as_int32_array, as_int8_array,
+    as_date32_array, as_int8_array, as_int16_array, as_int32_array,
 };
-use datafusion_common::{internal_err, Result};
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
-    Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -72,7 +72,21 @@ impl ScalarUDFImpl for SparkDateSub {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Date32)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
+            || args
+                .scalar_arguments
+                .iter()
+                .any(|arg| matches!(arg, Some(sv) if sv.is_null()));
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Date32,
+            nullable,
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -134,3 +148,61 @@ fn spark_date_sub(args: &[ArrayRef]) -> Result<ArrayRef> {
     };
     Ok(Arc::new(result))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion_common::ScalarValue;
+
+    #[test]
+    fn test_date_sub_nullability_non_nullable_args() {
+        let udf = SparkDateSub::new();
+        let date_field = Arc::new(Field::new("d", DataType::Date32, false));
+        let days_field = Arc::new(Field::new("n", DataType::Int32, false));
+
+        let result = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[date_field, days_field],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Date32);
+    }
+
+    #[test]
+    fn test_date_sub_nullability_nullable_arg() {
+        let udf = SparkDateSub::new();
+        let date_field = Arc::new(Field::new("d", DataType::Date32, false));
+        let nullable_days_field = Arc::new(Field::new("n", DataType::Int32, true));
+
+        let result = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[date_field, nullable_days_field],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Date32);
+    }
+
+    #[test]
+    fn test_date_sub_nullability_scalar_null_argument() {
+        let udf = SparkDateSub::new();
+        let date_field = Arc::new(Field::new("d", DataType::Date32, false));
+        let days_field = Arc::new(Field::new("n", DataType::Int32, false));
+        let null_scalar = ScalarValue::Int32(None);
+
+        let result = udf
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[date_field, days_field],
+                scalar_arguments: &[None, Some(&null_scalar)],
+            })
+            .unwrap();
+
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Date32);
+    }
+}
diff --git a/datafusion/spark/src/function/datetime/last_day.rs b/datafusion/spark/src/function/datetime/last_day.rs
index c01a6403649c5..40834ec345df5 100644
--- a/datafusion/spark/src/function/datetime/last_day.rs
+++ b/datafusion/spark/src/function/datetime/last_day.rs
@@ -19,11 +19,13 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, AsArray, Date32Array};
-use arrow::datatypes::{DataType, Date32Type};
+use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
 use chrono::{Datelike, Duration, NaiveDate};
-use datafusion_common::{exec_datafusion_err, internal_err, Result, ScalarValue};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, ScalarValue, exec_datafusion_err, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
 };
 
 #[derive(Debug, PartialEq, Eq, Hash)]
@@ -59,22 +61,29 @@ impl ScalarUDFImpl for SparkLastDay {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Date32)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let Some(field) = args.arg_fields.first() else {
+            return internal_err!("Spark `last_day` expects exactly one argument");
+        };
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Date32,
+            field.is_nullable(),
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         let ScalarFunctionArgs { args, .. } = args;
-        let [arg] = args.as_slice() else {
-            return internal_err!(
-                "Spark `last_day` function requires 1 argument, got {}",
-                args.len()
-            );
-        };
+        let [arg] = take_function_args("last_day", args)?;
         match arg {
             ColumnarValue::Scalar(ScalarValue::Date32(days)) => {
                 if let Some(days) = days {
                     Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(
-                        spark_last_day(*days)?,
+                        spark_last_day(days)?,
                     ))))
                 } else {
                     Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
@@ -90,7 +99,9 @@ impl ScalarUDFImpl for SparkLastDay {
                         Ok(Arc::new(result) as ArrayRef)
                     }
                     other => {
-                        internal_err!("Unsupported data type {other:?} for Spark function `last_day`")
+                        internal_err!(
+                            "Unsupported data type {other:?} for Spark function `last_day`"
+                        )
                     }
                 }?;
                 Ok(ColumnarValue::Array(result))
@@ -123,3 +134,60 @@ fn spark_last_day(days: i32) -> Result<i32> {
         first_day_next_month - Duration::days(1),
     ))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::function::utils::test::test_scalar_function;
+    use arrow::array::{Array, Date32Array};
+    use arrow::datatypes::Field;
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::{ColumnarValue, ReturnFieldArgs};
+
+    #[test]
+    fn test_last_day_nullability_matches_input() {
+        let func = SparkLastDay::new();
+
+        let non_nullable_arg = Arc::new(Field::new("arg", DataType::Date32, false));
+        let nullable_arg = Arc::new(Field::new("arg", DataType::Date32, true));
+
+        let non_nullable_out = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_arg)],
+                scalar_arguments: &[None],
+            })
+            .expect("non-nullable arg should succeed");
+        assert_eq!(non_nullable_out.data_type(), &DataType::Date32);
+        assert!(!non_nullable_out.is_nullable());
+
+        let nullable_out = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_arg)],
+                scalar_arguments: &[None],
+            })
+            .expect("nullable arg should succeed");
+        assert_eq!(nullable_out.data_type(), &DataType::Date32);
+        assert!(nullable_out.is_nullable());
+    }
+
+    #[test]
+    fn test_last_day_scalar_evaluation() {
+        test_scalar_function!(
+            SparkLastDay::new(),
+            vec![ColumnarValue::Scalar(ScalarValue::Date32(Some(0)))],
+            Ok(Some(30)),
+            i32,
+            DataType::Date32,
+            Date32Array
+        );
+
+        test_scalar_function!(
+            SparkLastDay::new(),
+            vec![ColumnarValue::Scalar(ScalarValue::Date32(None))],
+            Ok(None),
+            i32,
+            DataType::Date32,
+            Date32Array
+        );
+    }
+}
diff --git a/datafusion/spark/src/function/datetime/make_dt_interval.rs b/datafusion/spark/src/function/datetime/make_dt_interval.rs
index bbfba44861344..f00b4c5804eca 100644
--- a/datafusion/spark/src/function/datetime/make_dt_interval.rs
+++ b/datafusion/spark/src/function/datetime/make_dt_interval.rs
@@ -22,12 +22,14 @@ use arrow::array::{
     Array, ArrayRef, AsArray, DurationMicrosecondBuilder, PrimitiveArray,
 };
 use arrow::datatypes::TimeUnit::Microsecond;
-use arrow::datatypes::{DataType, Float64Type, Int32Type};
+use arrow::datatypes::{DataType, Field, FieldRef, Float64Type, Int32Type};
+use datafusion_common::types::{NativeType, logical_float64, logical_int32};
 use datafusion_common::{
-    exec_err, plan_datafusion_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, internal_err, plan_datafusion_err,
 };
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignature, TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -44,8 +46,37 @@ impl Default for SparkMakeDtInterval {
 
 impl SparkMakeDtInterval {
     pub fn new() -> Self {
+        let int32 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int32,
+        );
+
+        let float64 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_float64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
+
+        let variants = vec![
+            TypeSignature::Nullary,
+            // (days)
+            TypeSignature::Coercible(vec![int32.clone()]),
+            // (days, hours)
+            TypeSignature::Coercible(vec![int32.clone(), int32.clone()]),
+            // (days, hours, minutes)
+            TypeSignature::Coercible(vec![int32.clone(), int32.clone(), int32.clone()]),
+            // (days, hours, minutes, seconds)
+            TypeSignature::Coercible(vec![
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                float64,
+            ]),
+        ];
+
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::one_of(variants, Volatility::Immutable),
         }
     }
 }
@@ -70,7 +101,28 @@ impl ScalarUDFImpl for SparkMakeDtInterval {
     ///
     /// [Sail compatibility doc]: https://github.com/lakehq/sail/blob/dc5368daa24d40a7758a299e1ba8fc985cb29108/docs/guide/dataframe/data-types/compatibility.md?plain=1#L260
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Duration(Microsecond))
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let has_non_finite_secs = args
+            .scalar_arguments
+            .get(3)
+            .and_then(|arg| {
+                arg.map(|scalar| match scalar {
+                    ScalarValue::Float64(Some(v)) => !v.is_finite(),
+                    ScalarValue::Float32(Some(v)) => !v.is_finite(),
+                    _ => false,
+                })
+            })
+            .unwrap_or(false);
+        let nullable =
+            has_non_finite_secs || args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Duration(Microsecond),
+            nullable,
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -79,26 +131,13 @@ impl ScalarUDFImpl for SparkMakeDtInterval {
                 Some(0),
             )));
         }
-        make_scalar_function(make_dt_interval_kernel, vec![])(&args.args)
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() > 4 {
-            return exec_err!(
+        if args.args.len() > 4 {
+            return Err(DataFusionError::Execution(format!(
                 "make_dt_interval expects between 0 and 4 arguments, got {}",
-                arg_types.len()
-            );
+                args.args.len()
+            )));
         }
-
-        Ok((0..arg_types.len())
-            .map(|i| {
-                if i == 3 {
-                    DataType::Float64
-                } else {
-                    DataType::Int32
-                }
-            })
-            .collect())
+        make_scalar_function(make_dt_interval_kernel, vec![])(&args.args)
     }
 }
 
@@ -209,10 +248,9 @@ mod tests {
 
     use arrow::array::{DurationMicrosecondArray, Float64Array, Int32Array};
     use arrow::datatypes::DataType::Duration;
-    use arrow::datatypes::Field;
-    use arrow::datatypes::TimeUnit::Microsecond;
-    use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
-    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+    use arrow::datatypes::{DataType, Field, TimeUnit::Microsecond};
+    use datafusion_common::{DataFusionError, Result, internal_datafusion_err};
+    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs};
 
     use super::*;
 
@@ -276,6 +314,59 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn return_field_respects_nullability() -> Result<()> {
+        let udf = SparkMakeDtInterval::new();
+
+        // All nullable inputs -> nullable output
+        let arg_fields = vec![
+            Arc::new(Field::new("days", DataType::Int32, true)),
+            Arc::new(Field::new("hours", DataType::Int32, true)),
+            Arc::new(Field::new("mins", DataType::Int32, true)),
+            Arc::new(Field::new("secs", DataType::Float64, true)),
+        ];
+
+        let out = udf.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &arg_fields,
+            scalar_arguments: &[None, None, None, None],
+        })?;
+        assert!(out.is_nullable());
+        assert_eq!(out.data_type(), &Duration(Microsecond));
+
+        // Non-nullable inputs -> non-nullable output
+        let non_nullable_arg_fields = vec![
+            Arc::new(Field::new("days", DataType::Int32, false)),
+            Arc::new(Field::new("hours", DataType::Int32, false)),
+            Arc::new(Field::new("mins", DataType::Int32, false)),
+            Arc::new(Field::new("secs", DataType::Float64, false)),
+        ];
+
+        let out = udf.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &non_nullable_arg_fields,
+            scalar_arguments: &[None, None, None, None],
+        })?;
+        assert!(!out.is_nullable());
+
+        // Non-finite secs scalar should force nullable even if fields are non-nullable
+        let scalar_values =
+            [None, None, None, Some(ScalarValue::Float64(Some(f64::NAN)))];
+        let scalar_refs = scalar_values.iter().map(|v| v.as_ref()).collect::<Vec<_>>();
+        let out = udf.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &non_nullable_arg_fields,
+            scalar_arguments: &scalar_refs,
+        })?;
+        assert!(out.is_nullable());
+
+        // Zero-arg call (defaults) should also be non-nullable
+        let out = udf.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[],
+            scalar_arguments: &[],
+        })?;
+        assert!(!out.is_nullable());
+
+        Ok(())
+    }
+
     #[test]
     fn error_months_overflow_should_be_null() -> Result<()> {
         // months = year*12 + month → NULL
@@ -465,19 +556,33 @@ mod tests {
     fn no_more_than_4_params() -> Result<()> {
         let udf = SparkMakeDtInterval::new();
 
-        let arg_types = vec![
-            DataType::Int32,
-            DataType::Int32,
-            DataType::Int32,
-            DataType::Float64,
-            DataType::Int32,
+        // Create args with 5 parameters (exceeds the limit of 4)
+        let args = vec![
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
+            ColumnarValue::Scalar(ScalarValue::Float64(Some(4.0))),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(5))),
         ];
 
-        let res = udf.coerce_types(&arg_types);
+        let arg_fields = args
+            .iter()
+            .map(|arg| Field::new("a", arg.data_type(), true).into())
+            .collect::<Vec<_>>();
+
+        let func_args = ScalarFunctionArgs {
+            args,
+            arg_fields,
+            number_rows: 1,
+            return_field: Field::new("f", Duration(Microsecond), true).into(),
+            config_options: Arc::new(Default::default()),
+        };
+
+        let res = udf.invoke_with_args(func_args);
 
         assert!(
             matches!(res, Err(DataFusionError::Execution(_))),
-            "make_dt_interval should return execution error for too many arguments"
+            "make_dt_interval should return execution error for more than 4 arguments"
         );
 
         Ok(())
diff --git a/datafusion/spark/src/function/datetime/make_interval.rs b/datafusion/spark/src/function/datetime/make_interval.rs
index 8e3169556b95b..e4dd541793048 100644
--- a/datafusion/spark/src/function/datetime/make_interval.rs
+++ b/datafusion/spark/src/function/datetime/make_interval.rs
@@ -22,11 +22,11 @@ use arrow::array::{Array, ArrayRef, IntervalMonthDayNanoBuilder, PrimitiveArray}
 use arrow::datatypes::DataType::Interval;
 use arrow::datatypes::IntervalUnit::MonthDayNano;
 use arrow::datatypes::{DataType, IntervalMonthDayNano};
-use datafusion_common::{
-    exec_err, plan_datafusion_err, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::types::{NativeType, logical_float64, logical_int32};
+use datafusion_common::{DataFusionError, Result, ScalarValue, plan_datafusion_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
+    TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -43,8 +43,64 @@ impl Default for SparkMakeInterval {
 
 impl SparkMakeInterval {
     pub fn new() -> Self {
+        let int32 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int32,
+        );
+
+        let float64 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_float64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
+
+        let variants = vec![
+            TypeSignature::Nullary,
+            // year
+            TypeSignature::Coercible(vec![int32.clone()]),
+            // year, month
+            TypeSignature::Coercible(vec![int32.clone(), int32.clone()]),
+            // year, month, week
+            TypeSignature::Coercible(vec![int32.clone(), int32.clone(), int32.clone()]),
+            // year, month, week, day
+            TypeSignature::Coercible(vec![
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+            ]),
+            // year, month, week, day, hour
+            TypeSignature::Coercible(vec![
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+            ]),
+            // year, month, week, day, hour, minute
+            TypeSignature::Coercible(vec![
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+            ]),
+            // year, month, week, day, hour, minute, second
+            TypeSignature::Coercible(vec![
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                int32.clone(),
+                float64.clone(),
+            ]),
+        ];
+
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::one_of(variants, Volatility::Immutable),
         }
     }
 }
@@ -74,27 +130,6 @@ impl ScalarUDFImpl for SparkMakeInterval {
         }
         make_scalar_function(make_interval_kernel, vec![])(&args.args)
     }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let length = arg_types.len();
-        match length {
-            x if x > 7 => {
-                exec_err!(
-                    "make_interval expects between 0 and 7 arguments, got {}",
-                    arg_types.len()
-                )
-            }
-            _ => Ok((0..arg_types.len())
-                .map(|i| {
-                    if i == 6 {
-                        DataType::Float64
-                    } else {
-                        DataType::Int32
-                    }
-                })
-                .collect()),
-        }
-    }
 }
 
 fn make_interval_kernel(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
@@ -238,7 +273,9 @@ mod tests {
     use arrow::array::{Float64Array, Int32Array, IntervalMonthDayNanoArray};
     use arrow::datatypes::Field;
     use datafusion_common::config::ConfigOptions;
-    use datafusion_common::{internal_datafusion_err, internal_err, Result};
+    use datafusion_common::{
+        Result, assert_eq_or_internal_err, internal_datafusion_err, internal_err,
+    };
 
     use super::*;
     fn run_make_interval_month_day_nano(arrs: Vec<ArrayRef>) -> Result<ArrayRef> {
@@ -533,34 +570,33 @@ mod tests {
                     .ok_or_else(|| {
                         internal_datafusion_err!("expected IntervalMonthDayNanoArray")
                     })?;
-                if arr.len() != number_rows {
-                    return internal_err!(
-                        "expected array length {number_rows}, got {}",
-                        arr.len()
-                    );
-                }
+                assert_eq_or_internal_err!(
+                    arr.len(),
+                    number_rows,
+                    "expected array length {number_rows}"
+                );
                 for i in 0..number_rows {
                     let iv = arr.value(i);
-                    if (iv.months, iv.days, iv.nanoseconds) != (0, 0, 0) {
-                        return internal_err!(
-                            "row {i}: expected (0,0,0), got ({},{},{})",
-                            iv.months,
-                            iv.days,
-                            iv.nanoseconds
-                        );
-                    }
-                }
-            }
-            ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(iv))) => {
-                if (iv.months, iv.days, iv.nanoseconds) != (0, 0, 0) {
-                    return internal_err!(
-                        "expected scalar 0s, got ({},{},{})",
+                    assert_eq_or_internal_err!(
+                        (iv.months, iv.days, iv.nanoseconds),
+                        (0, 0, 0),
+                        "row {i}: expected (0,0,0), got ({},{},{})",
                         iv.months,
                         iv.days,
                         iv.nanoseconds
                     );
                 }
             }
+            ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(iv))) => {
+                assert_eq_or_internal_err!(
+                    (iv.months, iv.days, iv.nanoseconds),
+                    (0, 0, 0),
+                    "expected scalar 0s, got ({},{},{})",
+                    iv.months,
+                    iv.days,
+                    iv.nanoseconds
+                );
+            }
             other => {
                 return internal_err!(
                     "expected Array or Scalar IntervalMonthDayNano, got {other:?}"
diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs
index 32739f3e2c591..2acd295f8f142 100644
--- a/datafusion/spark/src/function/datetime/next_day.rs
+++ b/datafusion/spark/src/function/datetime/next_day.rs
@@ -18,12 +18,13 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayType};
-use arrow::datatypes::{DataType, Date32Type};
+use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, new_null_array};
+use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
 use chrono::{Datelike, Duration, Weekday};
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
 };
 
 /// <https://spark.apache.org/docs/latest/api/sql/index.html#next_day>
@@ -63,7 +64,13 @@ impl ScalarUDFImpl for SparkNextDay {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Date32)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> {
+        // Spark marks next_day as always nullable because invalid day_of_week values
+        // can yield NULL even when inputs are non-null.
+        Ok(Arc::new(Field::new(self.name(), DataType::Date32, true)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -78,7 +85,12 @@ impl ScalarUDFImpl for SparkNextDay {
         match (date, day_of_week) {
             (ColumnarValue::Scalar(date), ColumnarValue::Scalar(day_of_week)) => {
                 match (date, day_of_week) {
-                    (ScalarValue::Date32(days), ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => {
+                    (
+                        ScalarValue::Date32(days),
+                        ScalarValue::Utf8(day_of_week)
+                        | ScalarValue::LargeUtf8(day_of_week)
+                        | ScalarValue::Utf8View(day_of_week),
+                    ) => {
                         if let Some(days) = days {
                             if let Some(day_of_week) = day_of_week {
                                 Ok(ColumnarValue::Scalar(ScalarValue::Date32(
@@ -93,25 +105,39 @@ impl ScalarUDFImpl for SparkNextDay {
                             Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
                         }
                     }
-                    _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"),
+                    _ => exec_err!(
+                        "Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"
+                    ),
                 }
             }
             (ColumnarValue::Array(date_array), ColumnarValue::Scalar(day_of_week)) => {
                 match (date_array.data_type(), day_of_week) {
-                    (DataType::Date32, ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => {
+                    (
+                        DataType::Date32,
+                        ScalarValue::Utf8(day_of_week)
+                        | ScalarValue::LargeUtf8(day_of_week)
+                        | ScalarValue::Utf8View(day_of_week),
+                    ) => {
                         if let Some(day_of_week) = day_of_week {
                             let result: Date32Array = date_array
                                 .as_primitive::<Date32Type>()
-                                .unary_opt(|days| spark_next_day(days, day_of_week.as_str()))
+                                .unary_opt(|days| {
+                                    spark_next_day(days, day_of_week.as_str())
+                                })
                                 .with_data_type(DataType::Date32);
                             Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef))
                         } else {
                             // TODO: if spark.sql.ansi.enabled is false,
                             //  returns NULL instead of an error for a malformed dayOfWeek.
-                            Ok(ColumnarValue::Array(Arc::new(new_null_array(&DataType::Date32, date_array.len()))))
+                            Ok(ColumnarValue::Array(Arc::new(new_null_array(
+                                &DataType::Date32,
+                                date_array.len(),
+                            ))))
                         }
                     }
-                    _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"),
+                    _ => exec_err!(
+                        "Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"
+                    ),
                 }
             }
             (
@@ -143,7 +169,9 @@ impl ScalarUDFImpl for SparkNextDay {
                                 process_next_day_arrays(date_array, day_of_week_array)
                             }
                             other => {
-                                exec_err!("Spark `next_day` function: second arg must be string. Got {other:?}")
+                                exec_err!(
+                                    "Spark `next_day` function: second arg must be string. Got {other:?}"
+                                )
                             }
                         }
                     }
@@ -224,3 +252,40 @@ fn spark_next_day(days: i32, day_of_week: &str) -> Option<i32> {
         None
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion_expr::ReturnFieldArgs;
+
+    #[test]
+    fn return_type_is_not_used() {
+        let func = SparkNextDay::new();
+        let err = func
+            .return_type(&[DataType::Date32, DataType::Utf8])
+            .unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("return_field_from_args should be used instead")
+        );
+    }
+
+    #[test]
+    fn next_day_is_always_nullable() {
+        let func = SparkNextDay::new();
+        let date_field: FieldRef =
+            Arc::new(Field::new("start_date", DataType::Date32, false));
+        let day_field: FieldRef =
+            Arc::new(Field::new("day_of_week", DataType::Utf8, false));
+
+        let field = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&date_field), Arc::clone(&day_field)],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        assert_eq!(field.data_type(), &DataType::Date32);
+        assert!(field.is_nullable());
+    }
+}
diff --git a/datafusion/spark/src/function/error_utils.rs b/datafusion/spark/src/function/error_utils.rs
index b972d64ed3e9a..362a32bcd0cc2 100644
--- a/datafusion/spark/src/function/error_utils.rs
+++ b/datafusion/spark/src/function/error_utils.rs
@@ -18,7 +18,7 @@
 // TODO: https://github.com/apache/spark/tree/master/common/utils/src/main/resources/error
 
 use arrow::datatypes::DataType;
-use datafusion_common::{exec_datafusion_err, internal_datafusion_err, DataFusionError};
+use datafusion_common::{DataFusionError, exec_datafusion_err, internal_datafusion_err};
 
 pub fn invalid_arg_count_exec_err(
     function_name: &str,
@@ -44,7 +44,9 @@ pub fn unsupported_data_type_exec_err(
     required: &str,
     provided: &DataType,
 ) -> DataFusionError {
-    exec_datafusion_err!("Unsupported Data Type: Spark `{function_name}` function expects {required}, got {provided}")
+    exec_datafusion_err!(
+        "Unsupported Data Type: Spark `{function_name}` function expects {required}, got {provided}"
+    )
 }
 
 pub fn unsupported_data_types_exec_err(
diff --git a/datafusion/spark/src/function/hash/crc32.rs b/datafusion/spark/src/function/hash/crc32.rs
index 76e31d12c6487..f079d82f9bded 100644
--- a/datafusion/spark/src/function/hash/crc32.rs
+++ b/datafusion/spark/src/function/hash/crc32.rs
@@ -19,14 +19,18 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int64Array};
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use crc32fast::Hasher;
 use datafusion_common::cast::{
-    as_binary_array, as_binary_view_array, as_large_binary_array,
+    as_binary_array, as_binary_view_array, as_fixed_size_binary_array,
+    as_large_binary_array,
 };
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::types::{NativeType, logical_string};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -45,7 +49,14 @@ impl Default for SparkCrc32 {
 impl SparkCrc32 {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![Coercion::new_implicit(
+                    TypeSignatureClass::Binary,
+                    vec![TypeSignatureClass::Native(logical_string())],
+                    NativeType::Binary,
+                )],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -64,29 +75,16 @@ impl ScalarUDFImpl for SparkCrc32 {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Int64)
+        internal_err!("return_field_from_args should be used instead")
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        make_scalar_function(spark_crc32, vec![])(&args.args)
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(self.name(), DataType::Int64, nullable)))
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 1 {
-            return exec_err!(
-                "`crc32` function requires 1 argument, got {}",
-                arg_types.len()
-            );
-        }
-        match arg_types[0] {
-            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
-                Ok(vec![arg_types[0].clone()])
-            }
-            DataType::Utf8 | DataType::Utf8View => Ok(vec![DataType::Binary]),
-            DataType::LargeUtf8 => Ok(vec![DataType::LargeBinary]),
-            DataType::Null => Ok(vec![DataType::Binary]),
-            _ => exec_err!("`crc32` function does not support type {}", arg_types[0]),
-        }
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        make_scalar_function(spark_crc32, vec![])(&args.args)
     }
 }
 
@@ -104,14 +102,10 @@ fn spark_crc32_impl<'a>(input: impl Iterator<Item = Option<&'a [u8]>>) -> ArrayR
 }
 
 fn spark_crc32(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let [input] = args else {
-        return internal_err!(
-            "Spark `crc32` function requires 1 argument, got {}",
-            args.len()
-        );
-    };
+    let [input] = take_function_args("crc32", args)?;
 
     match input.data_type() {
+        DataType::Null => Ok(Arc::new(Int64Array::new_null(input.len()))),
         DataType::Binary => {
             let input = as_binary_array(input)?;
             Ok(spark_crc32_impl(input.iter()))
@@ -124,11 +118,42 @@ fn spark_crc32(args: &[ArrayRef]) -> Result<ArrayRef> {
             let input = as_binary_view_array(input)?;
             Ok(spark_crc32_impl(input.iter()))
         }
-        _ => {
-            exec_err!(
-                "Spark `crc32` function: argument must be binary or large binary, got {:?}",
-                input.data_type()
-            )
+        DataType::FixedSizeBinary(_) => {
+            let input = as_fixed_size_binary_array(input)?;
+            Ok(spark_crc32_impl(input.iter()))
+        }
+        dt => {
+            internal_err!("Unsupported data type for crc32: {dt}")
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_crc32_nullability() -> Result<()> {
+        let crc32_func = SparkCrc32::new();
+
+        // non-nullable field should produce non-nullable output
+        let field_not_null = Arc::new(Field::new("data", DataType::Binary, false));
+        let result = crc32_func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: std::slice::from_ref(&field_not_null),
+            scalar_arguments: &[None],
+        })?;
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Int64);
+
+        // nullable field should produce nullable output
+        let field_nullable = Arc::new(Field::new("data", DataType::Binary, true));
+        let result = crc32_func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[field_nullable],
+            scalar_arguments: &[None],
+        })?;
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &DataType::Int64);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/hash/sha1.rs b/datafusion/spark/src/function/hash/sha1.rs
index 25cbdd4453505..3250c8ca848c4 100644
--- a/datafusion/spark/src/function/hash/sha1.rs
+++ b/datafusion/spark/src/function/hash/sha1.rs
@@ -20,13 +20,17 @@ use std::fmt::Write;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, StringArray};
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::cast::{
-    as_binary_array, as_binary_view_array, as_large_binary_array,
+    as_binary_array, as_binary_view_array, as_fixed_size_binary_array,
+    as_large_binary_array,
 };
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::types::{NativeType, logical_string};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignatureClass, Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 use sha1::{Digest, Sha1};
@@ -47,7 +51,14 @@ impl Default for SparkSha1 {
 impl SparkSha1 {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(
+                vec![Coercion::new_implicit(
+                    TypeSignatureClass::Binary,
+                    vec![TypeSignatureClass::Native(logical_string())],
+                    NativeType::Binary,
+                )],
+                Volatility::Immutable,
+            ),
             aliases: vec!["sha".to_string()],
         }
     }
@@ -71,29 +82,16 @@ impl ScalarUDFImpl for SparkSha1 {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Utf8)
+        internal_err!("return_field_from_args should be used instead")
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        make_scalar_function(spark_sha1, vec![])(&args.args)
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable)))
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 1 {
-            return exec_err!(
-                "`sha1` function requires 1 argument, got {}",
-                arg_types.len()
-            );
-        }
-        match arg_types[0] {
-            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
-                Ok(vec![arg_types[0].clone()])
-            }
-            DataType::Utf8 | DataType::Utf8View => Ok(vec![DataType::Binary]),
-            DataType::LargeUtf8 => Ok(vec![DataType::LargeBinary]),
-            DataType::Null => Ok(vec![DataType::Binary]),
-            _ => exec_err!("`sha1` function does not support type {}", arg_types[0]),
-        }
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        make_scalar_function(spark_sha1, vec![])(&args.args)
     }
 }
 
@@ -101,7 +99,6 @@ fn spark_sha1_digest(value: &[u8]) -> String {
     let result = Sha1::digest(value);
     let mut s = String::with_capacity(result.len() * 2);
     for b in result.as_slice() {
-        #[allow(clippy::unwrap_used)]
         write!(&mut s, "{b:02x}").unwrap();
     }
     s
@@ -115,14 +112,10 @@ fn spark_sha1_impl<'a>(input: impl Iterator<Item = Option<&'a [u8]>>) -> ArrayRe
 }
 
 fn spark_sha1(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let [input] = args else {
-        return internal_err!(
-            "Spark `sha1` function requires 1 argument, got {}",
-            args.len()
-        );
-    };
+    let [input] = take_function_args("sha1", args)?;
 
     match input.data_type() {
+        DataType::Null => Ok(Arc::new(StringArray::new_null(input.len()))),
         DataType::Binary => {
             let input = as_binary_array(input)?;
             Ok(spark_sha1_impl(input.iter()))
@@ -135,11 +128,42 @@ fn spark_sha1(args: &[ArrayRef]) -> Result<ArrayRef> {
             let input = as_binary_view_array(input)?;
             Ok(spark_sha1_impl(input.iter()))
         }
-        _ => {
-            exec_err!(
-                "Spark `sha1` function: argument must be binary or large binary, got {:?}",
-                input.data_type()
-            )
+        DataType::FixedSizeBinary(_) => {
+            let input = as_fixed_size_binary_array(input)?;
+            Ok(spark_sha1_impl(input.iter()))
+        }
+        dt => {
+            internal_err!("Unsupported data type for sha1: {dt}")
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sha1_nullability() -> Result<()> {
+        let func = SparkSha1::new();
+
+        // Non-nullable input keeps output non-nullable
+        let non_nullable: FieldRef = Arc::new(Field::new("col", DataType::Binary, false));
+        let out = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&non_nullable)],
+            scalar_arguments: &[None],
+        })?;
+        assert!(!out.is_nullable());
+        assert_eq!(out.data_type(), &DataType::Utf8);
+
+        // Nullable input makes output nullable
+        let nullable: FieldRef = Arc::new(Field::new("col", DataType::Binary, true));
+        let out = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[Arc::clone(&nullable)],
+            scalar_arguments: &[None],
+        })?;
+        assert!(out.is_nullable());
+        assert_eq!(out.data_type(), &DataType::Utf8);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/hash/sha2.rs b/datafusion/spark/src/function/hash/sha2.rs
index b006607d3eeda..1f17275062778 100644
--- a/datafusion/spark/src/function/hash/sha2.rs
+++ b/datafusion/spark/src/function/hash/sha2.rs
@@ -23,7 +23,7 @@ use crate::function::error_utils::{
 use crate::function::math::hex::spark_sha2_hex;
 use arrow::array::{ArrayRef, AsArray, StringArray};
 use arrow::datatypes::{DataType, Int32Type};
-use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err};
 use datafusion_expr::Signature;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility};
 pub use datafusion_functions::crypto::basic::{sha224, sha256, sha384, sha512};
@@ -81,7 +81,7 @@ impl ScalarUDFImpl for SparkSha2 {
                 return exec_err!(
                     "{} function can only accept strings or binary arrays.",
                     self.name()
-                )
+                );
             }
         })
     }
@@ -138,17 +138,21 @@ impl ScalarUDFImpl for SparkSha2 {
 
 pub fn sha2(args: [ColumnarValue; 2]) -> Result<ColumnarValue> {
     match args {
-        [ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg)))] => {
-            compute_sha2(
-                bit_length_arg,
-                &[ColumnarValue::from(ScalarValue::Utf8(expr_arg))],
-            )
-        }
-        [ColumnarValue::Array(expr_arg), ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg)))] => {
-            compute_sha2(bit_length_arg, &[ColumnarValue::from(expr_arg)])
-        }
-        [ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), ColumnarValue::Array(bit_length_arg)] =>
-        {
+        [
+            ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg))),
+        ] => compute_sha2(
+            bit_length_arg,
+            &[ColumnarValue::from(ScalarValue::Utf8(expr_arg))],
+        ),
+        [
+            ColumnarValue::Array(expr_arg),
+            ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg))),
+        ] => compute_sha2(bit_length_arg, &[ColumnarValue::from(expr_arg)]),
+        [
+            ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)),
+            ColumnarValue::Array(bit_length_arg),
+        ] => {
             let arr: StringArray = bit_length_arg
                 .as_primitive::<Int32Type>()
                 .iter()
@@ -171,7 +175,10 @@ pub fn sha2(args: [ColumnarValue; 2]) -> Result<ColumnarValue> {
                 .collect();
             Ok(ColumnarValue::Array(Arc::new(arr) as ArrayRef))
         }
-        [ColumnarValue::Array(expr_arg), ColumnarValue::Array(bit_length_arg)] => {
+        [
+            ColumnarValue::Array(expr_arg),
+            ColumnarValue::Array(bit_length_arg),
+        ] => {
             let expr_iter = expr_arg.as_string::<i32>().iter();
             let bit_length_iter = bit_length_arg.as_primitive::<Int32Type>().iter();
             let arr: StringArray = expr_iter
diff --git a/datafusion/spark/src/function/map/map_from_arrays.rs b/datafusion/spark/src/function/map/map_from_arrays.rs
index 987548e353e44..f6ca02e2fe867 100644
--- a/datafusion/spark/src/function/map/map_from_arrays.rs
+++ b/datafusion/spark/src/function/map/map_from_arrays.rs
@@ -23,11 +23,14 @@ use crate::function::map::utils::{
 };
 use arrow::array::{Array, ArrayRef, NullArray};
 use arrow::compute::kernels::cast;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::Result;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_common::{Result, internal_err};
+use datafusion_expr::{
+    ColumnarValue, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility,
+};
 use datafusion_functions::utils::make_scalar_function;
+use std::sync::Arc;
 
 /// Spark-compatible `map_from_arrays` expression
 /// <https://spark.apache.org/docs/latest/api/sql/index.html#map_from_arrays>
@@ -63,12 +66,23 @@ impl ScalarUDFImpl for MapFromArrays {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let [key_type, value_type] = take_function_args("map_from_arrays", arg_types)?;
-        Ok(map_type_from_key_value_types(
-            get_element_type(key_type)?,
-            get_element_type(value_type)?,
-        ))
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let [keys_field, values_field] = args.arg_fields else {
+            return internal_err!("map_from_arrays expects exactly 2 arguments");
+        };
+
+        let map_type = map_type_from_key_value_types(
+            get_element_type(keys_field.data_type())?,
+            get_element_type(values_field.data_type())?,
+        );
+        // Spark marks map_from_arrays as null intolerant, so the output is
+        // nullable if either input is nullable.
+        let nullable = keys_field.is_nullable() || values_field.is_nullable();
+        Ok(Arc::new(Field::new(self.name(), map_type, nullable)))
     }
 
     fn invoke_with_args(
@@ -103,3 +117,59 @@ fn map_from_arrays_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
         values.nulls(),
     )
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::Field;
+    use datafusion_expr::ReturnFieldArgs;
+
+    #[test]
+    fn test_map_from_arrays_nullability_and_type() {
+        let func = MapFromArrays::new();
+
+        let keys_field: FieldRef = Arc::new(Field::new(
+            "keys",
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+            false,
+        ));
+        let values_field: FieldRef = Arc::new(Field::new(
+            "values",
+            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+            false,
+        ));
+
+        let out = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&keys_field), Arc::clone(&values_field)],
+                scalar_arguments: &[None, None],
+            })
+            .expect("return_field_from_args should succeed");
+
+        let expected_type =
+            map_type_from_key_value_types(&DataType::Int32, &DataType::Utf8);
+        assert_eq!(out.data_type(), &expected_type);
+        assert!(
+            !out.is_nullable(),
+            "map_from_arrays should be non-nullable when both inputs are non-nullable"
+        );
+
+        let nullable_keys: FieldRef = Arc::new(Field::new(
+            "keys",
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+            true,
+        ));
+
+        let out_nullable = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[nullable_keys, values_field],
+                scalar_arguments: &[None, None],
+            })
+            .expect("return_field_from_args should succeed");
+
+        assert!(
+            out_nullable.is_nullable(),
+            "map_from_arrays should be nullable when any input is nullable"
+        );
+    }
+}
diff --git a/datafusion/spark/src/function/map/map_from_entries.rs b/datafusion/spark/src/function/map/map_from_entries.rs
index 6648979c5dd23..6b2114863d11f 100644
--- a/datafusion/spark/src/function/map/map_from_entries.rs
+++ b/datafusion/spark/src/function/map/map_from_entries.rs
@@ -16,17 +16,20 @@
 // under the License.
 
 use std::any::Any;
+use std::sync::Arc;
 
 use crate::function::map::utils::{
-    get_element_type, get_list_offsets, get_list_values,
-    map_from_keys_values_offsets_nulls, map_type_from_key_value_types,
+    get_list_offsets, get_list_values, map_from_keys_values_offsets_nulls,
+    map_type_from_key_value_types,
 };
 use arrow::array::{Array, ArrayRef, NullBufferBuilder, StructArray};
 use arrow::buffer::NullBuffer;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_common::{Result, exec_err, internal_err};
+use datafusion_expr::{
+    ColumnarValue, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility,
+};
 use datafusion_functions::utils::make_scalar_function;
 
 /// Spark-compatible `map_from_entries` expression
@@ -63,9 +66,28 @@ impl ScalarUDFImpl for MapFromEntries {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let [entries_type] = take_function_args("map_from_entries", arg_types)?;
-        let entries_element_type = get_element_type(entries_type)?;
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let [entries_field] = args.arg_fields else {
+            return exec_err!("map_from_entries: expected one argument");
+        };
+
+        let (entries_element_field, entries_element_type) =
+            match entries_field.data_type() {
+                DataType::List(field)
+                | DataType::LargeList(field)
+                | DataType::FixedSizeList(field, _) => {
+                    Ok((field.as_ref(), field.data_type()))
+                }
+                wrong_type => exec_err!(
+                    "map_from_entries: expected array<struct<key, value>>, got {:?}",
+                    wrong_type
+                ),
+            }?;
+
         let (keys_type, values_type) = match entries_element_type {
             DataType::Struct(fields) if fields.len() == 2 => {
                 Ok((fields[0].data_type(), fields[1].data_type()))
@@ -75,7 +97,11 @@ impl ScalarUDFImpl for MapFromEntries {
                 wrong_type
             ),
         }?;
-        Ok(map_type_from_key_value_types(keys_type, values_type))
+
+        let map_type = map_type_from_key_value_types(keys_type, values_type);
+        let nullable = entries_field.is_nullable() || entries_element_field.is_nullable();
+
+        Ok(Arc::new(Field::new(self.name(), map_type, nullable)))
     }
 
     fn invoke_with_args(
@@ -131,3 +157,62 @@ fn map_from_entries_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
         res_nulls.as_ref(),
     )
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::Fields;
+    use datafusion_expr::ReturnFieldArgs;
+
+    fn make_entries_field(array_nullable: bool, element_nullable: bool) -> FieldRef {
+        let struct_type = DataType::Struct(Fields::from(vec![
+            Field::new("key", DataType::Int32, false),
+            Field::new("value", DataType::Utf8, true),
+        ]));
+        Arc::new(Field::new(
+            "entries",
+            DataType::List(Arc::new(Field::new("item", struct_type, element_nullable))),
+            array_nullable,
+        ))
+    }
+
+    #[test]
+    fn test_map_from_entries_nullability_matches_input() {
+        let func = MapFromEntries::new();
+        let expected_type =
+            map_type_from_key_value_types(&DataType::Int32, &DataType::Utf8);
+
+        // Non-nullable array and elements => non-nullable result
+        let non_nullable_field = make_entries_field(false, false);
+        let result = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_field)],
+                scalar_arguments: &[None],
+            })
+            .expect("should infer field");
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &expected_type);
+
+        // Nullable elements should make result nullable even if array is non-nullable
+        let element_nullable_field = make_entries_field(false, true);
+        let result = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&element_nullable_field)],
+                scalar_arguments: &[None],
+            })
+            .expect("should infer field");
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &expected_type);
+
+        // Nullable array should also yield nullable result
+        let array_nullable_field = make_entries_field(true, false);
+        let result = func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&array_nullable_field)],
+                scalar_arguments: &[None],
+            })
+            .expect("should infer field");
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &expected_type);
+    }
+}
diff --git a/datafusion/spark/src/function/map/utils.rs b/datafusion/spark/src/function/map/utils.rs
index b568f45403c30..1a25ffb295687 100644
--- a/datafusion/spark/src/function/map/utils.rs
+++ b/datafusion/spark/src/function/map/utils.rs
@@ -23,7 +23,7 @@ use arrow::array::{Array, ArrayRef, AsArray, BooleanBuilder, MapArray, StructArr
 use arrow::buffer::{NullBuffer, OffsetBuffer};
 use arrow::compute::filter;
 use arrow::datatypes::{DataType, Field, Fields};
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err};
 
 /// Helper function to get element [`DataType`]
 /// from [`List`](DataType::List)/[`LargeList`](DataType::LargeList)/[`FixedSizeList`](DataType::FixedSizeList)<br>
@@ -64,14 +64,15 @@ pub fn get_list_offsets(array: &ArrayRef) -> Result<Cow<'_, [i32]>> {
     match array.data_type() {
         DataType::List(_) => Ok(Cow::Borrowed(array.as_list::<i32>().offsets().as_ref())),
         DataType::LargeList(_) => Ok(Cow::Owned(
-            array.as_list::<i64>()
+            array
+                .as_list::<i64>()
                 .offsets()
                 .iter()
                 .map(|i| *i as i32)
                 .collect::<Vec<_>>(),
         )),
         DataType::FixedSizeList(_, size) => Ok(Cow::Owned(
-             (0..=array.len() as i32).map(|i| size * i).collect()
+            (0..=array.len() as i32).map(|i| size * i).collect(),
         )),
         wrong_type => exec_err!(
             "get_list_offsets expects List/LargeList/FixedSizeList as argument, got {wrong_type:?}"
@@ -188,7 +189,9 @@ fn map_deduplicate_keys(
 
         if key_is_valid && value_is_valid {
             if num_keys_entries != num_values_entries {
-                return exec_err!("map_deduplicate_keys: keys and values lists in the same row must have equal lengths");
+                return exec_err!(
+                    "map_deduplicate_keys: keys and values lists in the same row must have equal lengths"
+                );
             } else if num_keys_entries != 0 {
                 let mut seen_keys = HashSet::new();
 
diff --git a/datafusion/spark/src/function/math/abs.rs b/datafusion/spark/src/function/math/abs.rs
new file mode 100644
index 0000000000000..101291ac5f66e
--- /dev/null
+++ b/datafusion/spark/src/function/math/abs.rs
@@ -0,0 +1,448 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::*;
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::{DataFusionError, Result, ScalarValue, internal_err};
+use datafusion_expr::{
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
+};
+use datafusion_functions::{
+    downcast_named_arg, make_abs_function, make_wrapping_abs_function,
+};
+use std::any::Any;
+use std::sync::Arc;
+
+/// Spark-compatible `abs` expression
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#abs>
+///
+/// Returns the absolute value of input
+/// Returns NULL if input is NULL, returns NaN if input is NaN.
+///
+/// TODOs:
+///  - Spark's ANSI-compliant dialect, when off (i.e. `spark.sql.ansi.enabled=false`), taking absolute value on the minimal value of a signed integer returns the value as is. DataFusion's abs throws "DataFusion error: Arrow error: Compute error" on arithmetic overflow
+///  - Spark's abs also supports ANSI interval types: YearMonthIntervalType and DayTimeIntervalType. DataFusion's abs doesn't.
+///
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkAbs {
+    signature: Signature,
+}
+
+impl Default for SparkAbs {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkAbs {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::numeric(1, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkAbs {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "abs"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!(
+            "SparkAbs: return_type() is not used; return_field_from_args() is implemented"
+        )
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let input_field = &args.arg_fields[0];
+        let out_dt = input_field.data_type().clone();
+        let out_nullable = input_field.is_nullable();
+
+        Ok(Arc::new(Field::new(self.name(), out_dt, out_nullable)))
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        spark_abs(&args.args)
+    }
+}
+
+macro_rules! scalar_compute_op {
+    ($INPUT:ident, $SCALAR_TYPE:ident) => {{
+        let result = $INPUT.wrapping_abs();
+        Ok(ColumnarValue::Scalar(ScalarValue::$SCALAR_TYPE(Some(
+            result,
+        ))))
+    }};
+    ($INPUT:ident, $PRECISION:expr, $SCALE:expr, $SCALAR_TYPE:ident) => {{
+        let result = $INPUT.wrapping_abs();
+        Ok(ColumnarValue::Scalar(ScalarValue::$SCALAR_TYPE(
+            Some(result),
+            $PRECISION,
+            $SCALE,
+        )))
+    }};
+}
+
+pub fn spark_abs(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
+    if args.len() != 1 {
+        return internal_err!("abs takes exactly 1 argument, but got: {}", args.len());
+    }
+
+    match &args[0] {
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Null
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => Ok(args[0].clone()),
+            DataType::Int8 => {
+                let abs_fun = make_wrapping_abs_function!(Int8Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Int16 => {
+                let abs_fun = make_wrapping_abs_function!(Int16Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Int32 => {
+                let abs_fun = make_wrapping_abs_function!(Int32Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Int64 => {
+                let abs_fun = make_wrapping_abs_function!(Int64Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Float32 => {
+                let abs_fun = make_abs_function!(Float32Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Float64 => {
+                let abs_fun = make_abs_function!(Float64Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Decimal128(_, _) => {
+                let abs_fun = make_wrapping_abs_function!(Decimal128Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            DataType::Decimal256(_, _) => {
+                let abs_fun = make_wrapping_abs_function!(Decimal256Array);
+                abs_fun(array).map(ColumnarValue::Array)
+            }
+            dt => internal_err!("Not supported datatype for Spark ABS: {dt}"),
+        },
+        ColumnarValue::Scalar(sv) => match sv {
+            ScalarValue::Null
+            | ScalarValue::UInt8(_)
+            | ScalarValue::UInt16(_)
+            | ScalarValue::UInt32(_)
+            | ScalarValue::UInt64(_) => Ok(args[0].clone()),
+            sv if sv.is_null() => Ok(args[0].clone()),
+            ScalarValue::Int8(Some(v)) => scalar_compute_op!(v, Int8),
+            ScalarValue::Int16(Some(v)) => scalar_compute_op!(v, Int16),
+            ScalarValue::Int32(Some(v)) => scalar_compute_op!(v, Int32),
+            ScalarValue::Int64(Some(v)) => scalar_compute_op!(v, Int64),
+            ScalarValue::Float32(Some(v)) => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(v.abs()))))
+            }
+            ScalarValue::Float64(Some(v)) => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(v.abs()))))
+            }
+            ScalarValue::Decimal128(Some(v), precision, scale) => {
+                scalar_compute_op!(v, *precision, *scale, Decimal128)
+            }
+            ScalarValue::Decimal256(Some(v), precision, scale) => {
+                scalar_compute_op!(v, *precision, *scale, Decimal256)
+            }
+            dt => internal_err!("Not supported datatype for Spark ABS: {dt}"),
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::i256;
+
+    macro_rules! eval_legacy_mode {
+        ($TYPE:ident, $VAL:expr) => {{
+            let args = ColumnarValue::Scalar(ScalarValue::$TYPE(Some($VAL)));
+            match spark_abs(&[args]) {
+                Ok(ColumnarValue::Scalar(ScalarValue::$TYPE(Some(result)))) => {
+                    assert_eq!(result, $VAL);
+                }
+                _ => unreachable!(),
+            }
+        }};
+        ($TYPE:ident, $VAL:expr, $RESULT:expr) => {{
+            let args = ColumnarValue::Scalar(ScalarValue::$TYPE(Some($VAL)));
+            match spark_abs(&[args]) {
+                Ok(ColumnarValue::Scalar(ScalarValue::$TYPE(Some(result)))) => {
+                    assert_eq!(result, $RESULT);
+                }
+                _ => unreachable!(),
+            }
+        }};
+        ($TYPE:ident, $VAL:expr, $PRECISION:expr, $SCALE:expr) => {{
+            let args =
+                ColumnarValue::Scalar(ScalarValue::$TYPE(Some($VAL), $PRECISION, $SCALE));
+            match spark_abs(&[args]) {
+                Ok(ColumnarValue::Scalar(ScalarValue::$TYPE(
+                    Some(result),
+                    precision,
+                    scale,
+                ))) => {
+                    assert_eq!(result, $VAL);
+                    assert_eq!(precision, $PRECISION);
+                    assert_eq!(scale, $SCALE);
+                }
+                _ => unreachable!(),
+            }
+        }};
+        ($TYPE:ident, $VAL:expr, $PRECISION:expr, $SCALE:expr, $RESULT:expr) => {{
+            let args =
+                ColumnarValue::Scalar(ScalarValue::$TYPE(Some($VAL), $PRECISION, $SCALE));
+            match spark_abs(&[args]) {
+                Ok(ColumnarValue::Scalar(ScalarValue::$TYPE(
+                    Some(result),
+                    precision,
+                    scale,
+                ))) => {
+                    assert_eq!(result, $RESULT);
+                    assert_eq!(precision, $PRECISION);
+                    assert_eq!(scale, $SCALE);
+                }
+                _ => unreachable!(),
+            }
+        }};
+    }
+
+    #[test]
+    fn test_abs_scalar_legacy_mode() {
+        // NumericType MIN
+        eval_legacy_mode!(UInt8, u8::MIN);
+        eval_legacy_mode!(UInt16, u16::MIN);
+        eval_legacy_mode!(UInt32, u32::MIN);
+        eval_legacy_mode!(UInt64, u64::MIN);
+        eval_legacy_mode!(Int8, i8::MIN);
+        eval_legacy_mode!(Int16, i16::MIN);
+        eval_legacy_mode!(Int32, i32::MIN);
+        eval_legacy_mode!(Int64, i64::MIN);
+        eval_legacy_mode!(Float32, f32::MIN, f32::MAX);
+        eval_legacy_mode!(Float64, f64::MIN, f64::MAX);
+        eval_legacy_mode!(Decimal128, i128::MIN, 18, 10);
+        eval_legacy_mode!(Decimal256, i256::MIN, 10, 2);
+
+        // NumericType not MIN
+        eval_legacy_mode!(Int8, -1i8, 1i8);
+        eval_legacy_mode!(Int16, -1i16, 1i16);
+        eval_legacy_mode!(Int32, -1i32, 1i32);
+        eval_legacy_mode!(Int64, -1i64, 1i64);
+        eval_legacy_mode!(Decimal128, -1i128, 18, 10, 1i128);
+        eval_legacy_mode!(Decimal256, i256::from(-1i8), 10, 2, i256::from(1i8));
+
+        // Float32, Float64
+        eval_legacy_mode!(Float32, f32::NEG_INFINITY, f32::INFINITY);
+        eval_legacy_mode!(Float32, f32::INFINITY, f32::INFINITY);
+        eval_legacy_mode!(Float32, 0.0f32, 0.0f32);
+        eval_legacy_mode!(Float32, -0.0f32, 0.0f32);
+        eval_legacy_mode!(Float64, f64::NEG_INFINITY, f64::INFINITY);
+        eval_legacy_mode!(Float64, f64::INFINITY, f64::INFINITY);
+        eval_legacy_mode!(Float64, 0.0f64, 0.0f64);
+        eval_legacy_mode!(Float64, -0.0f64, 0.0f64);
+    }
+
+    macro_rules! eval_array_legacy_mode {
+        ($INPUT:expr, $OUTPUT:expr, $FUNC:ident) => {{
+            let input = $INPUT;
+            let args = ColumnarValue::Array(Arc::new(input));
+            let expected = $OUTPUT;
+            match spark_abs(&[args]) {
+                Ok(ColumnarValue::Array(result)) => {
+                    let actual = datafusion_common::cast::$FUNC(&result).unwrap();
+                    assert_eq!(actual, &expected);
+                }
+                _ => unreachable!(),
+            }
+        }};
+    }
+
+    #[test]
+    fn test_abs_array_legacy_mode() {
+        eval_array_legacy_mode!(
+            Int8Array::from(vec![Some(-1), Some(i8::MIN), Some(i8::MAX), None]),
+            Int8Array::from(vec![Some(1), Some(i8::MIN), Some(i8::MAX), None]),
+            as_int8_array
+        );
+
+        eval_array_legacy_mode!(
+            Int16Array::from(vec![Some(-1), Some(i16::MIN), Some(i16::MAX), None]),
+            Int16Array::from(vec![Some(1), Some(i16::MIN), Some(i16::MAX), None]),
+            as_int16_array
+        );
+
+        eval_array_legacy_mode!(
+            Int32Array::from(vec![Some(-1), Some(i32::MIN), Some(i32::MAX), None]),
+            Int32Array::from(vec![Some(1), Some(i32::MIN), Some(i32::MAX), None]),
+            as_int32_array
+        );
+
+        eval_array_legacy_mode!(
+            Int64Array::from(vec![Some(-1), Some(i64::MIN), Some(i64::MAX), None]),
+            Int64Array::from(vec![Some(1), Some(i64::MIN), Some(i64::MAX), None]),
+            as_int64_array
+        );
+
+        eval_array_legacy_mode!(
+            Float32Array::from(vec![
+                Some(-1f32),
+                Some(f32::MIN),
+                Some(f32::MAX),
+                None,
+                Some(f32::NAN),
+                Some(f32::INFINITY),
+                Some(f32::NEG_INFINITY),
+                Some(0.0),
+                Some(-0.0),
+            ]),
+            Float32Array::from(vec![
+                Some(1f32),
+                Some(f32::MAX),
+                Some(f32::MAX),
+                None,
+                Some(f32::NAN),
+                Some(f32::INFINITY),
+                Some(f32::INFINITY),
+                Some(0.0),
+                Some(0.0),
+            ]),
+            as_float32_array
+        );
+
+        eval_array_legacy_mode!(
+            Float64Array::from(vec![
+                Some(-1f64),
+                Some(f64::MIN),
+                Some(f64::MAX),
+                None,
+                Some(f64::NAN),
+                Some(f64::INFINITY),
+                Some(f64::NEG_INFINITY),
+                Some(0.0),
+                Some(-0.0),
+            ]),
+            Float64Array::from(vec![
+                Some(1f64),
+                Some(f64::MAX),
+                Some(f64::MAX),
+                None,
+                Some(f64::NAN),
+                Some(f64::INFINITY),
+                Some(f64::INFINITY),
+                Some(0.0),
+                Some(0.0),
+            ]),
+            as_float64_array
+        );
+
+        eval_array_legacy_mode!(
+            Decimal128Array::from(vec![Some(i128::MIN), None])
+                .with_precision_and_scale(38, 37)
+                .unwrap(),
+            Decimal128Array::from(vec![Some(i128::MIN), None])
+                .with_precision_and_scale(38, 37)
+                .unwrap(),
+            as_decimal128_array
+        );
+
+        eval_array_legacy_mode!(
+            Decimal256Array::from(vec![Some(i256::MIN), None])
+                .with_precision_and_scale(5, 2)
+                .unwrap(),
+            Decimal256Array::from(vec![Some(i256::MIN), None])
+                .with_precision_and_scale(5, 2)
+                .unwrap(),
+            as_decimal256_array
+        );
+    }
+
+    #[test]
+    fn test_abs_nullability() {
+        use arrow::datatypes::{DataType, Field};
+        use datafusion_expr::ReturnFieldArgs;
+        use std::sync::Arc;
+
+        let abs = SparkAbs::new();
+
+        // --- non-nullable Int32 input ---
+        let non_nullable_i32 = Arc::new(Field::new("c", DataType::Int32, false));
+        let out_non_null = abs
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_i32)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // result should be non-nullable and the same DataType as input
+        assert!(!out_non_null.is_nullable());
+        assert_eq!(out_non_null.data_type(), &DataType::Int32);
+
+        // --- nullable Int32 input ---
+        let nullable_i32 = Arc::new(Field::new("c", DataType::Int32, true));
+        let out_nullable = abs
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_i32)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        // result should be nullable and the same DataType as input
+        assert!(out_nullable.is_nullable());
+        assert_eq!(out_nullable.data_type(), &DataType::Int32);
+
+        // --- non-nullable Float64 input ---
+        let non_nullable_f64 = Arc::new(Field::new("c", DataType::Float64, false));
+        let out_f64 = abs
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&non_nullable_f64)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        assert!(!out_f64.is_nullable());
+        assert_eq!(out_f64.data_type(), &DataType::Float64);
+
+        // --- nullable Float64 input ---
+        let nullable_f64 = Arc::new(Field::new("c", DataType::Float64, true));
+        let out_f64_null = abs
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_f64)],
+                scalar_arguments: &[None],
+            })
+            .unwrap();
+
+        assert!(out_f64_null.is_nullable());
+        assert_eq!(out_f64_null.data_type(), &DataType::Float64);
+    }
+}
diff --git a/datafusion/spark/src/function/math/expm1.rs b/datafusion/spark/src/function/math/expm1.rs
index 42eccf3a2431a..b0b2b1a0865cd 100644
--- a/datafusion/spark/src/function/math/expm1.rs
+++ b/datafusion/spark/src/function/math/expm1.rs
@@ -15,11 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::function::error_utils::{
-    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
-};
+use crate::function::error_utils::unsupported_data_type_exec_err;
 use arrow::array::{ArrayRef, AsArray};
 use arrow::datatypes::{DataType, Float64Type};
+use datafusion_common::utils::take_function_args;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
@@ -31,7 +30,6 @@ use std::sync::Arc;
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct SparkExpm1 {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl Default for SparkExpm1 {
@@ -43,8 +41,7 @@ impl Default for SparkExpm1 {
 impl SparkExpm1 {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-            aliases: vec![],
+            signature: Signature::exact(vec![DataType::Float64], Volatility::Immutable),
         }
     }
 }
@@ -67,10 +64,8 @@ impl ScalarUDFImpl for SparkExpm1 {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        if args.args.len() != 1 {
-            return Err(invalid_arg_count_exec_err("expm1", (1, 1), args.args.len()));
-        }
-        match &args.args[0] {
+        let [arg] = take_function_args(self.name(), args.args)?;
+        match arg {
             ColumnarValue::Scalar(ScalarValue::Float64(value)) => Ok(
                 ColumnarValue::Scalar(ScalarValue::Float64(value.map(|x| x.exp_m1()))),
             ),
@@ -94,52 +89,4 @@ impl ScalarUDFImpl for SparkExpm1 {
             )),
         }
     }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.len() != 1 {
-            return Err(invalid_arg_count_exec_err("expm1", (1, 1), arg_types.len()));
-        }
-        if arg_types[0].is_numeric() {
-            Ok(vec![DataType::Float64])
-        } else {
-            Err(unsupported_data_type_exec_err(
-                "expm1",
-                "Numeric Type",
-                &arg_types[0],
-            ))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::function::math::expm1::SparkExpm1;
-    use crate::function::utils::test::test_scalar_function;
-    use arrow::array::{Array, Float64Array};
-    use arrow::datatypes::DataType::Float64;
-    use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
-
-    macro_rules! test_expm1_float64_invoke {
-        ($INPUT:expr, $EXPECTED:expr) => {
-            test_scalar_function!(
-                SparkExpm1::new(),
-                vec![ColumnarValue::Scalar(ScalarValue::Float64($INPUT))],
-                $EXPECTED,
-                f64,
-                Float64,
-                Float64Array
-            );
-        };
-    }
-
-    #[test]
-    fn test_expm1_invoke() -> Result<()> {
-        test_expm1_float64_invoke!(Some(0f64), Ok(Some(0.0f64)));
-        Ok(())
-    }
 }
diff --git a/datafusion/spark/src/function/math/factorial.rs b/datafusion/spark/src/function/math/factorial.rs
index 4921e73d262a3..439e79a9dd8b2 100644
--- a/datafusion/spark/src/function/math/factorial.rs
+++ b/datafusion/spark/src/function/math/factorial.rs
@@ -22,7 +22,9 @@ use arrow::array::{Array, Int64Array};
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Int32, Int64};
 use datafusion_common::cast::as_int32_array;
-use datafusion_common::{exec_err, internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{
+    DataFusionError, Result, ScalarValue, exec_err, utils::take_function_args,
+};
 use datafusion_expr::Signature;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility};
 
@@ -99,11 +101,9 @@ const FACTORIALS: [i64; 21] = [
 ];
 
 pub fn spark_factorial(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
-    if args.len() != 1 {
-        return internal_err!("`factorial` expects exactly one argument");
-    }
+    let [arg] = take_function_args("factorial", args)?;
 
-    match &args[0] {
+    match arg {
         ColumnarValue::Scalar(ScalarValue::Int32(value)) => {
             let result = compute_factorial(*value);
             Ok(ColumnarValue::Scalar(ScalarValue::Int64(result)))
@@ -136,8 +136,8 @@ fn compute_factorial(num: Option<i32>) -> Option<i64> {
 mod test {
     use crate::function::math::factorial::spark_factorial;
     use arrow::array::{Int32Array, Int64Array};
-    use datafusion_common::cast::as_int64_array;
     use datafusion_common::ScalarValue;
+    use datafusion_common::cast::as_int64_array;
     use datafusion_expr::ColumnarValue;
     use std::sync::Arc;
 
diff --git a/datafusion/spark/src/function/math/hex.rs b/datafusion/spark/src/function/math/hex.rs
index cdd13e9033265..dbbea17db5bfa 100644
--- a/datafusion/spark/src/function/math/hex.rs
+++ b/datafusion/spark/src/function/math/hex.rs
@@ -18,22 +18,25 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::function::error_utils::{
-    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
-};
 use arrow::array::{Array, StringArray};
 use arrow::datatypes::DataType;
 use arrow::{
     array::{as_dictionary_array, as_largestring_array, as_string_array},
     datatypes::Int32Type,
 };
+use datafusion_common::cast::as_large_binary_array;
 use datafusion_common::cast::as_string_view_array;
+use datafusion_common::types::{NativeType, logical_int64, logical_string};
+use datafusion_common::utils::take_function_args;
 use datafusion_common::{
+    DataFusionError,
     cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array},
-    exec_err, internal_err, DataFusionError,
+    exec_err,
+};
+use datafusion_expr::{
+    Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
+    TypeSignatureClass, Volatility,
 };
-use datafusion_expr::Signature;
-use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility};
 use std::fmt::Write;
 
 /// <https://spark.apache.org/docs/latest/api/sql/index.html#hex>
@@ -51,8 +54,27 @@ impl Default for SparkHex {
 
 impl SparkHex {
     pub fn new() -> Self {
+        let int64 = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Int64,
+        );
+
+        let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string()));
+
+        let binary = Coercion::new_exact(TypeSignatureClass::Binary);
+
+        let variants = vec![
+            // accepts numeric types
+            TypeSignature::Coercible(vec![int64]),
+            // accepts string types (Utf8, Utf8View, LargeUtf8)
+            TypeSignature::Coercible(vec![string]),
+            // accepts binary types (Binary, FixedSizeBinary, LargeBinary)
+            TypeSignature::Coercible(vec![binary]),
+        ];
+
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::one_of(variants, Volatility::Immutable),
             aliases: vec![],
         }
     }
@@ -88,56 +110,6 @@ impl ScalarUDFImpl for SparkHex {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
-
-    fn coerce_types(
-        &self,
-        arg_types: &[DataType],
-    ) -> datafusion_common::Result<Vec<DataType>> {
-        if arg_types.len() != 1 {
-            return Err(invalid_arg_count_exec_err("hex", (1, 1), arg_types.len()));
-        }
-        match &arg_types[0] {
-            DataType::Int64
-            | DataType::Utf8
-            | DataType::Utf8View
-            | DataType::LargeUtf8
-            | DataType::Binary
-            | DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
-            DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
-                DataType::Int64
-                | DataType::Utf8
-                | DataType::Utf8View
-                | DataType::LargeUtf8
-                | DataType::Binary
-                | DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
-                other => {
-                    if other.is_numeric() {
-                        Ok(vec![DataType::Dictionary(
-                            key_type.clone(),
-                            Box::new(DataType::Int64),
-                        )])
-                    } else {
-                        Err(unsupported_data_type_exec_err(
-                            "hex",
-                            "Numeric, String, or Binary",
-                            &arg_types[0],
-                        ))
-                    }
-                }
-            },
-            other => {
-                if other.is_numeric() {
-                    Ok(vec![DataType::Int64])
-                } else {
-                    Err(unsupported_data_type_exec_err(
-                        "hex",
-                        "Numeric, String, or Binary",
-                        &arg_types[0],
-                    ))
-                }
-            }
-        }
-    }
 }
 
 fn hex_int64(num: i64) -> String {
@@ -184,13 +156,9 @@ pub fn compute_hex(
     args: &[ColumnarValue],
     lowercase: bool,
 ) -> Result<ColumnarValue, DataFusionError> {
-    if args.len() != 1 {
-        return internal_err!("hex expects exactly one argument");
-    }
-
-    let input = match &args[0] {
-        ColumnarValue::Scalar(value) => ColumnarValue::Array(value.to_array()?),
-        ColumnarValue::Array(_) => args[0].clone(),
+    let input = match take_function_args("hex", args)? {
+        [ColumnarValue::Scalar(value)] => ColumnarValue::Array(value.to_array()?),
+        [ColumnarValue::Array(arr)] => ColumnarValue::Array(Arc::clone(arr)),
     };
 
     match &input {
@@ -243,6 +211,16 @@ pub fn compute_hex(
 
                 Ok(ColumnarValue::Array(Arc::new(hexed)))
             }
+            DataType::LargeBinary => {
+                let array = as_large_binary_array(array)?;
+
+                let hexed: StringArray = array
+                    .iter()
+                    .map(|v| v.map(|b| hex_bytes(b, lowercase)).transpose())
+                    .collect::<Result<_, _>>()?;
+
+                Ok(ColumnarValue::Array(Arc::new(hexed)))
+            }
             DataType::FixedSizeBinary(_) => {
                 let array = as_fixed_size_binary_array(array)?;
 
@@ -298,8 +276,8 @@ mod test {
     use arrow::array::{Int64Array, StringArray};
     use arrow::{
         array::{
-            as_string_array, BinaryDictionaryBuilder, PrimitiveDictionaryBuilder,
-            StringBuilder, StringDictionaryBuilder,
+            BinaryDictionaryBuilder, PrimitiveDictionaryBuilder, StringBuilder,
+            StringDictionaryBuilder, as_string_array,
         },
         datatypes::{Int32Type, Int64Type},
     };
diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs
index 092335e4aa18d..1422eb250d939 100644
--- a/datafusion/spark/src/function/math/mod.rs
+++ b/datafusion/spark/src/function/math/mod.rs
@@ -15,17 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub mod abs;
 pub mod expm1;
 pub mod factorial;
 pub mod hex;
 pub mod modulus;
 pub mod rint;
+pub mod trigonometry;
 pub mod width_bucket;
 
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
+make_udf_function!(abs::SparkAbs, abs);
 make_udf_function!(expm1::SparkExpm1, expm1);
 make_udf_function!(factorial::SparkFactorial, factorial);
 make_udf_function!(hex::SparkHex, hex);
@@ -33,10 +36,13 @@ make_udf_function!(modulus::SparkMod, modulus);
 make_udf_function!(modulus::SparkPmod, pmod);
 make_udf_function!(rint::SparkRint, rint);
 make_udf_function!(width_bucket::SparkWidthBucket, width_bucket);
+make_udf_function!(trigonometry::SparkCsc, csc);
+make_udf_function!(trigonometry::SparkSec, sec);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
 
+    export_functions!((abs, "Returns abs(expr)", arg1));
     export_functions!((expm1, "Returns exp(expr) - 1 as a Float64.", arg1));
     export_functions!((
         factorial,
@@ -46,12 +52,19 @@ pub mod expr_fn {
     export_functions!((hex, "Computes hex value of the given column.", arg1));
     export_functions!((modulus, "Returns the remainder of division of the first argument by the second argument.", arg1 arg2));
     export_functions!((pmod, "Returns the positive remainder of division of the first argument by the second argument.", arg1 arg2));
-    export_functions!((rint, "Returns the double value that is closest in value to the argument and is equal to a mathematical integer.", arg1));
+    export_functions!((
+        rint,
+        "Returns the double value that is closest in value to the argument and is equal to a mathematical integer.",
+        arg1
+    ));
     export_functions!((width_bucket, "Returns the bucket number into which the value of this expression would fall after being evaluated.", arg1 arg2 arg3 arg4));
+    export_functions!((csc, "Returns the cosecant of expr.", arg1));
+    export_functions!((sec, "Returns the secant of expr.", arg1));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
+        abs(),
         expm1(),
         factorial(),
         hex(),
@@ -59,5 +72,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         pmod(),
         rint(),
         width_bucket(),
+        csc(),
+        sec(),
     ]
 }
diff --git a/datafusion/spark/src/function/math/modulus.rs b/datafusion/spark/src/function/math/modulus.rs
index fea0297a7ae94..49657e2cb8cee 100644
--- a/datafusion/spark/src/function/math/modulus.rs
+++ b/datafusion/spark/src/function/math/modulus.rs
@@ -18,7 +18,7 @@
 use arrow::compute::kernels::numeric::add;
 use arrow::compute::kernels::{cmp::lt, numeric::rem, zip::zip};
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
 };
@@ -27,9 +27,7 @@ use std::any::Any;
 /// Spark-compatible `mod` function
 /// This function directly uses Arrow's arithmetic_op function for modulo operations
 pub fn spark_mod(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return internal_err!("mod expects exactly two arguments");
-    }
+    assert_eq_or_internal_err!(args.len(), 2, "mod expects exactly two arguments");
     let args = ColumnarValue::values_to_arrays(args)?;
     let result = rem(&args[0], &args[1])?;
     Ok(ColumnarValue::Array(result))
@@ -38,9 +36,7 @@ pub fn spark_mod(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 /// Spark-compatible `pmod` function
 /// This function directly uses Arrow's arithmetic_op function for modulo operations
 pub fn spark_pmod(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return internal_err!("pmod expects exactly two arguments");
-    }
+    assert_eq_or_internal_err!(args.len(), 2, "pmod expects exactly two arguments");
     let args = ColumnarValue::values_to_arrays(args)?;
     let left = &args[0];
     let right = &args[1];
@@ -87,9 +83,11 @@ impl ScalarUDFImpl for SparkMod {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.len() != 2 {
-            return internal_err!("mod expects exactly two arguments");
-        }
+        assert_eq_or_internal_err!(
+            arg_types.len(),
+            2,
+            "mod expects exactly two arguments"
+        );
 
         // Return the same type as the first argument for simplicity
         // Arrow's rem function handles type promotion internally
@@ -135,9 +133,11 @@ impl ScalarUDFImpl for SparkPmod {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.len() != 2 {
-            return internal_err!("pmod expects exactly two arguments");
-        }
+        assert_eq_or_internal_err!(
+            arg_types.len(),
+            2,
+            "pmod expects exactly two arguments"
+        );
 
         // Return the same type as the first argument for simplicity
         // Arrow's rem function handles type promotion internally
@@ -239,7 +239,7 @@ mod test {
             assert!((result_float64.value(0) - 1.5).abs() < f64::EPSILON); // 10.5 % 3.0 = 1.5
             assert!((result_float64.value(1) - 2.2).abs() < f64::EPSILON); // 7.2 % 2.5 = 2.2
             assert!((result_float64.value(2) - 3.2).abs() < f64::EPSILON); // 15.8 % 4.2 = 3.2
-                                                                           // nan % 2.0 = nan
+            // nan % 2.0 = nan
             assert!(result_float64.value(3).is_nan());
             // inf % 2.0 = nan (IEEE 754)
             assert!(result_float64.value(4).is_nan());
@@ -295,7 +295,7 @@ mod test {
             assert!((result_float32.value(0) - 1.5).abs() < f32::EPSILON); // 10.5 % 3.0 = 1.5
             assert!((result_float32.value(1) - 2.2).abs() < f32::EPSILON * 3.0); // 7.2 % 2.5 = 2.2
             assert!((result_float32.value(2) - 3.2).abs() < f32::EPSILON * 10.0); // 15.8 % 4.2 = 3.2
-                                                                                  // nan % 2.0 = nan
+            // nan % 2.0 = nan
             assert!(result_float32.value(3).is_nan());
             // inf % 2.0 = nan (IEEE 754)
             assert!(result_float32.value(4).is_nan());
@@ -437,7 +437,7 @@ mod test {
             assert!((result_float64.value(1) - 1.8).abs() < f64::EPSILON * 3.0); // -7.2 pmod 3.0 = 1.8 (positive)
             assert!((result_float64.value(2) - 3.2).abs() < f64::EPSILON * 3.0); // 15.8 pmod 4.2 = 3.2
             assert!((result_float64.value(3) - 1.0).abs() < f64::EPSILON * 3.0); // -15.8 pmod 4.2 = 1.0 (positive)
-                                                                                 // nan pmod 2.0 = nan
+            // nan pmod 2.0 = nan
             assert!(result_float64.value(4).is_nan());
             // inf pmod 2.0 = nan (IEEE 754)
             assert!(result_float64.value(5).is_nan());
@@ -488,7 +488,7 @@ mod test {
             assert!((result_float32.value(1) - 1.8).abs() < f32::EPSILON * 3.0); // -7.2 pmod 3.0 = 1.8 (positive)
             assert!((result_float32.value(2) - 3.2).abs() < f32::EPSILON * 10.0); // 15.8 pmod 4.2 = 3.2
             assert!((result_float32.value(3) - 1.0).abs() < f32::EPSILON * 10.0); // -15.8 pmod 4.2 = 1.0 (positive)
-                                                                                  // nan pmod 2.0 = nan
+            // nan pmod 2.0 = nan
             assert!(result_float32.value(4).is_nan());
             // inf pmod 2.0 = nan (IEEE 754)
             assert!(result_float32.value(5).is_nan());
diff --git a/datafusion/spark/src/function/math/rint.rs b/datafusion/spark/src/function/math/rint.rs
index 9b61529c5bc44..ae1a25110ac89 100644
--- a/datafusion/spark/src/function/math/rint.rs
+++ b/datafusion/spark/src/function/math/rint.rs
@@ -21,10 +21,10 @@ use std::sync::Arc;
 use arrow::array::{Array, ArrayRef, AsArray};
 use arrow::compute::cast;
 use arrow::datatypes::DataType::{
-    Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8,
+    Float32, Float64, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
 };
 use arrow::datatypes::{DataType, Float32Type, Float64Type};
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{Result, assert_eq_or_internal_err, exec_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
@@ -83,9 +83,7 @@ impl ScalarUDFImpl for SparkRint {
 }
 
 pub fn spark_rint(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 1 {
-        return exec_err!("rint expects exactly 1 argument, got {}", args.len());
-    }
+    assert_eq_or_internal_err!(args.len(), 1, "`rint` expects exactly one argument");
 
     let array: &dyn Array = args[0].as_ref();
     match args[0].data_type() {
diff --git a/datafusion/spark/src/function/math/trigonometry.rs b/datafusion/spark/src/function/math/trigonometry.rs
new file mode 100644
index 0000000000000..85b10f5b998c6
--- /dev/null
+++ b/datafusion/spark/src/function/math/trigonometry.rs
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::function::error_utils::unsupported_data_type_exec_err;
+use arrow::array::{ArrayRef, AsArray};
+use arrow::datatypes::{DataType, Float64Type};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use std::any::Any;
+use std::sync::Arc;
+
+static CSC_FUNCTION_NAME: &str = "csc";
+
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#csc>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkCsc {
+    signature: Signature,
+}
+
+impl Default for SparkCsc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkCsc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Float64], Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkCsc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        CSC_FUNCTION_NAME
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Float64)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let [arg] = take_function_args(self.name(), &args.args)?;
+        spark_csc(arg)
+    }
+}
+
+fn spark_csc(arg: &ColumnarValue) -> Result<ColumnarValue> {
+    match arg {
+        ColumnarValue::Scalar(ScalarValue::Float64(value)) => Ok(ColumnarValue::Scalar(
+            ScalarValue::Float64(value.map(|x| 1.0 / x.sin())),
+        )),
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Float64 => Ok(ColumnarValue::Array(Arc::new(
+                array
+                    .as_primitive::<Float64Type>()
+                    .unary::<_, Float64Type>(|x| 1.0 / x.sin()),
+            ) as ArrayRef)),
+            other => Err(unsupported_data_type_exec_err(
+                CSC_FUNCTION_NAME,
+                format!("{}", DataType::Float64).as_str(),
+                other,
+            )),
+        },
+        other => Err(unsupported_data_type_exec_err(
+            CSC_FUNCTION_NAME,
+            format!("{}", DataType::Float64).as_str(),
+            &other.data_type(),
+        )),
+    }
+}
+
+static SEC_FUNCTION_NAME: &str = "sec";
+
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#sec>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkSec {
+    signature: Signature,
+}
+
+impl Default for SparkSec {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkSec {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Float64], Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkSec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        SEC_FUNCTION_NAME
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Float64)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let [arg] = take_function_args(self.name(), &args.args)?;
+        spark_sec(arg)
+    }
+}
+
+fn spark_sec(arg: &ColumnarValue) -> Result<ColumnarValue> {
+    match arg {
+        ColumnarValue::Scalar(ScalarValue::Float64(value)) => Ok(ColumnarValue::Scalar(
+            ScalarValue::Float64(value.map(|x| 1.0 / x.cos())),
+        )),
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Float64 => Ok(ColumnarValue::Array(Arc::new(
+                array
+                    .as_primitive::<Float64Type>()
+                    .unary::<_, Float64Type>(|x| 1.0 / x.cos()),
+            ) as ArrayRef)),
+            other => Err(unsupported_data_type_exec_err(
+                SEC_FUNCTION_NAME,
+                format!("{}", DataType::Float64).as_str(),
+                other,
+            )),
+        },
+        other => Err(unsupported_data_type_exec_err(
+            SEC_FUNCTION_NAME,
+            format!("{}", DataType::Float64).as_str(),
+            &other.data_type(),
+        )),
+    }
+}
diff --git a/datafusion/spark/src/function/math/width_bucket.rs b/datafusion/spark/src/function/math/width_bucket.rs
index 45a0d843b7ed7..8d748439ad806 100644
--- a/datafusion/spark/src/function/math/width_bucket.rs
+++ b/datafusion/spark/src/function/math/width_bucket.rs
@@ -18,7 +18,6 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::function::error_utils::unsupported_data_types_exec_err;
 use arrow::array::{
     Array, ArrayRef, DurationMicrosecondArray, Float64Array, IntervalMonthDayNanoArray,
     IntervalYearMonthArray,
@@ -30,14 +29,21 @@ use datafusion_common::cast::{
     as_duration_microsecond_array, as_float64_array, as_int32_array,
     as_interval_mdn_array, as_interval_ym_array,
 };
-use datafusion_common::{exec_err, Result};
+use datafusion_common::types::{
+    NativeType, logical_duration_microsecond, logical_float64, logical_int32,
+    logical_interval_mdn, logical_interval_year_month,
+};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
-use datafusion_expr::type_coercion::is_signed_numeric;
-use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
+    TypeSignatureClass,
+};
 use datafusion_functions::utils::make_scalar_function;
 
 use arrow::array::{Int32Array, Int32Builder};
 use arrow::datatypes::TimeUnit::Microsecond;
+use datafusion_expr::Coercion;
 use datafusion_expr::Volatility::Immutable;
 
 #[derive(Debug, PartialEq, Eq, Hash)]
@@ -53,8 +59,59 @@ impl Default for SparkWidthBucket {
 
 impl SparkWidthBucket {
     pub fn new() -> Self {
+        let numeric = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_float64()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
+        let duration = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_duration_microsecond()),
+            vec![TypeSignatureClass::Duration],
+            NativeType::Duration(Microsecond),
+        );
+        let interval_ym = Coercion::new_exact(TypeSignatureClass::Native(
+            logical_interval_year_month(),
+        ));
+        let interval_mdn =
+            Coercion::new_exact(TypeSignatureClass::Native(logical_interval_mdn()));
+        let bucket = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int32()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int32,
+        );
+        let type_signature = Signature::one_of(
+            vec![
+                TypeSignature::Coercible(vec![
+                    numeric.clone(),
+                    numeric.clone(),
+                    numeric.clone(),
+                    bucket.clone(),
+                ]),
+                TypeSignature::Coercible(vec![
+                    duration.clone(),
+                    duration.clone(),
+                    duration.clone(),
+                    bucket.clone(),
+                ]),
+                TypeSignature::Coercible(vec![
+                    interval_ym.clone(),
+                    interval_ym.clone(),
+                    interval_ym.clone(),
+                    bucket.clone(),
+                ]),
+                TypeSignature::Coercible(vec![
+                    interval_mdn.clone(),
+                    interval_mdn.clone(),
+                    interval_mdn.clone(),
+                    bucket.clone(),
+                ]),
+            ],
+            Immutable,
+        )
+        .with_parameter_names(vec!["expr", "min", "max", "num_buckets"])
+        .expect("valid parameter names");
         Self {
-            signature: Signature::user_defined(Immutable),
+            signature: type_signature,
         }
     }
 }
@@ -88,63 +145,6 @@ impl ScalarUDFImpl for SparkWidthBucket {
             Ok(SortProperties::default())
         }
     }
-
-    fn coerce_types(&self, types: &[DataType]) -> Result<Vec<DataType>> {
-        use DataType::*;
-
-        let (v, lo, hi, n) = (&types[0], &types[1], &types[2], &types[3]);
-
-        match (v, lo, hi, n) {
-            (a, b, c, &(Int8 | Int16 | Int32 | Int64))
-                if is_signed_numeric(a)
-                    && is_signed_numeric(b)
-                    && is_signed_numeric(c) =>
-            {
-                Ok(vec![Float64, Float64, Float64, Int32])
-            }
-            (
-                &Duration(_),
-                &Duration(_),
-                &Duration(_),
-                &(Int8 | Int16 | Int32 | Int64),
-            ) => Ok(vec![
-                Duration(Microsecond),
-                Duration(Microsecond),
-                Duration(Microsecond),
-                Int32,
-            ]),
-            (
-                &Interval(MonthDayNano),
-                &Interval(MonthDayNano),
-                &Interval(MonthDayNano),
-                &(Int8 | Int16 | Int32 | Int64),
-            ) => Ok(vec![
-                Interval(MonthDayNano),
-                Interval(MonthDayNano),
-                Interval(MonthDayNano),
-                Int32,
-            ]),
-            (
-                &Interval(YearMonth),
-                &Interval(YearMonth),
-                &Interval(YearMonth),
-                &(Int8 | Int16 | Int32 | Int64),
-            ) => Ok(vec![
-                Interval(YearMonth),
-                Interval(YearMonth),
-                Interval(YearMonth),
-                Int32,
-            ]),
-
-            _ => exec_err!(
-                "width_bucket expects a numeric argument, got {} {} {} {}",
-                types[0],
-                types[1],
-                types[2],
-                types[3]
-            ),
-        }
-    }
 }
 
 fn width_bucket_kern(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -182,20 +182,18 @@ fn width_bucket_kern(args: &[ArrayRef]) -> Result<ArrayRef> {
             let min = as_interval_mdn_array(minv)?;
             let max = as_interval_mdn_array(maxv)?;
             let n_bucket = as_int32_array(nb)?;
-            Ok(Arc::new(width_bucket_interval_mdn_exact(v, min, max, n_bucket)))
+            Ok(Arc::new(width_bucket_interval_mdn_exact(
+                v, min, max, n_bucket,
+            )))
         }
 
-
-        other => Err(unsupported_data_types_exec_err(
-            "width_bucket",
-            "Float/Decimal OR Duration OR Interval(YearMonth) for first 3 args; Int for 4th",
-            &[
-                other.clone(),
-                minv.data_type().clone(),
-                maxv.data_type().clone(),
-                nb.data_type().clone(),
-            ],
-        )),
+        other => internal_err!(
+            "width_bucket received unexpected data types: {:?}, {:?}, {:?}, {:?}",
+            other,
+            minv.data_type(),
+            maxv.data_type(),
+            nb.data_type()
+        ),
     }
 }
 
@@ -780,8 +778,7 @@ mod tests {
         let err = width_bucket_kern(&[v, lo, hi, n]).unwrap_err();
         let msg = format!("{err}");
         assert!(
-            msg.contains("unsupported data types")
-                || msg.contains("Float/Decimal OR Duration OR Interval(YearMonth)"),
+            msg.contains("width_bucket received unexpected data types"),
             "unexpected error: {msg}"
         );
     }
diff --git a/datafusion/spark/src/function/string/ascii.rs b/datafusion/spark/src/function/string/ascii.rs
index f14a66d4e484d..44e3501b86adb 100644
--- a/datafusion/spark/src/function/string/ascii.rs
+++ b/datafusion/spark/src/function/string/ascii.rs
@@ -15,10 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
-use datafusion_common::Result;
-use datafusion_expr::ColumnarValue;
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
+use std::sync::Arc;
+
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::types::{NativeType, logical_string};
+use datafusion_common::{Result, internal_err};
+use datafusion_expr::{
+    Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignatureClass, Volatility,
+};
 use datafusion_functions::string::ascii::ascii;
 use datafusion_functions::utils::make_scalar_function;
 use std::any::Any;
@@ -42,8 +47,17 @@ impl Default for SparkAscii {
 
 impl SparkAscii {
     pub fn new() -> Self {
+        // Spark's ascii uses ImplicitCastInputTypes with StringType,
+        // which allows numeric types to be implicitly cast to String.
+        // See: https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+        let string_coercion = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_string()),
+            vec![TypeSignatureClass::Numeric],
+            NativeType::String,
+        );
+
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::coercible(vec![string_coercion], Volatility::Immutable),
         }
     }
 }
@@ -62,14 +76,61 @@ impl ScalarUDFImpl for SparkAscii {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Int32)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        // ascii returns an Int32 value
+        // The result is nullable only if any of the input arguments is nullable
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new("ascii", DataType::Int32, nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         make_scalar_function(ascii, vec![])(&args.args)
     }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion_expr::ReturnFieldArgs;
+
+    #[test]
+    fn test_return_field_nullable_input() {
+        let ascii_func = SparkAscii::new();
+        let nullable_field = Arc::new(Field::new("input", DataType::Utf8, true));
+
+        let result = ascii_func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[nullable_field],
+                scalar_arguments: &[],
+            })
+            .unwrap();
+
+        assert_eq!(result.data_type(), &DataType::Int32);
+        assert!(
+            result.is_nullable(),
+            "Output should be nullable when input is nullable"
+        );
+    }
+
+    #[test]
+    fn test_return_field_non_nullable_input() {
+        let ascii_func = SparkAscii::new();
+        let non_nullable_field = Arc::new(Field::new("input", DataType::Utf8, false));
+
+        let result = ascii_func
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[non_nullable_field],
+                scalar_arguments: &[],
+            })
+            .unwrap();
 
-    fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        Ok(vec![DataType::Utf8])
+        assert_eq!(result.data_type(), &DataType::Int32);
+        assert!(
+            !result.is_nullable(),
+            "Output should not be nullable when input is not nullable"
+        );
     }
 }
diff --git a/datafusion/spark/src/function/string/char.rs b/datafusion/spark/src/function/string/char.rs
index a1813373c65ff..16dfe0943565f 100644
--- a/datafusion/spark/src/function/string/char.rs
+++ b/datafusion/spark/src/function/string/char.rs
@@ -17,14 +17,15 @@
 
 use arrow::array::ArrayRef;
 use arrow::array::GenericStringBuilder;
-use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Int64;
 use arrow::datatypes::DataType::Utf8;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use std::{any::Any, sync::Arc};
 
-use datafusion_common::{cast::as_int64_array, exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, cast::as_int64_array, exec_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
 };
 
 /// Spark-compatible `char` expression
@@ -62,12 +63,19 @@ impl ScalarUDFImpl for CharFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Utf8)
+        datafusion_common::internal_err!(
+            "return_type should not be called, use return_field_from_args instead"
+        )
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         spark_chr(&args.args)
     }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(self.name(), Utf8, nullable)))
+    }
 }
 
 /// Returns the ASCII character having the binary equivalent to the input expression.
@@ -119,7 +127,7 @@ fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
                         None => {
                             return exec_err!(
                                 "requested character not compatible for encoding."
-                            )
+                            );
                         }
                     }
                 }
@@ -130,3 +138,48 @@ fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     Ok(Arc::new(builder.finish()) as ArrayRef)
 }
+
+#[test]
+fn test_char_nullability() -> Result<()> {
+    use arrow::datatypes::{DataType::Utf8, Field, FieldRef};
+    use datafusion_expr::ReturnFieldArgs;
+    use std::sync::Arc;
+
+    let func = CharFunc::new();
+
+    let nullable_field: FieldRef = Arc::new(Field::new("col", Int64, true));
+
+    let out_nullable = func.return_field_from_args(ReturnFieldArgs {
+        arg_fields: &[nullable_field],
+        scalar_arguments: &[None],
+    })?;
+
+    assert!(
+        out_nullable.is_nullable(),
+        "char(col) should be nullable when input column is nullable"
+    );
+    assert_eq!(
+        out_nullable.data_type(),
+        &Utf8,
+        "char always returns Utf8 regardless of input type"
+    );
+
+    let non_nullable_field: FieldRef = Arc::new(Field::new("col", Int64, false));
+
+    let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
+        arg_fields: &[non_nullable_field],
+        scalar_arguments: &[None],
+    })?;
+
+    assert!(
+        !out_non_nullable.is_nullable(),
+        "char(col) should NOT be nullable when input column is NOT nullable"
+    );
+    assert_eq!(
+        out_non_nullable.data_type(),
+        &Utf8,
+        "char always returns Utf8 regardless of input type"
+    );
+
+    Ok(())
+}
diff --git a/datafusion/spark/src/function/string/concat.rs b/datafusion/spark/src/function/string/concat.rs
index 0dcc58d5bb8ed..8e97e591fc357 100644
--- a/datafusion/spark/src/function/string/concat.rs
+++ b/datafusion/spark/src/function/string/concat.rs
@@ -17,8 +17,10 @@
 
 use arrow::array::Array;
 use arrow::buffer::NullBuffer;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field};
+use datafusion_common::arrow::datatypes::FieldRef;
 use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::ReturnFieldArgs;
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
     Volatility,
@@ -71,10 +73,6 @@ impl ScalarUDFImpl for SparkConcat {
         &self.signature
     }
 
-    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Utf8)
-    }
-
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         spark_concat(args)
     }
@@ -83,6 +81,17 @@ impl ScalarUDFImpl for SparkConcat {
         // Accept any string types, including zero arguments
         Ok(arg_types.to_vec())
     }
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        datafusion_common::internal_err!(
+            "return_type should not be called for Spark concat"
+        )
+    }
+    fn return_field_from_args(&self, args: ReturnFieldArgs<'_>) -> Result<FieldRef> {
+        // Spark semantics: concat returns NULL if ANY input is NULL
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+
+        Ok(Arc::new(Field::new("concat", DataType::Utf8, nullable)))
+    }
 }
 
 /// Represents the null state for Spark concat
@@ -150,10 +159,10 @@ fn compute_null_mask(
     if all_scalars {
         // For scalars, check if any is NULL
         for arg in args {
-            if let ColumnarValue::Scalar(scalar) = arg {
-                if scalar.is_null() {
-                    return Ok(NullMaskResolution::ReturnNull);
-                }
+            if let ColumnarValue::Scalar(scalar) = arg
+                && scalar.is_null()
+            {
+                return Ok(NullMaskResolution::ReturnNull);
             }
         }
         // No NULLs in scalars
@@ -231,8 +240,10 @@ mod tests {
     use super::*;
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::StringArray;
-    use arrow::datatypes::DataType;
+    use arrow::datatypes::{DataType, Field};
     use datafusion_common::Result;
+    use datafusion_expr::ReturnFieldArgs;
+    use std::sync::Arc;
 
     #[test]
     fn test_concat_basic() -> Result<()> {
@@ -266,4 +277,50 @@ mod tests {
         );
         Ok(())
     }
+    #[test]
+    fn test_spark_concat_return_field_non_nullable() -> Result<()> {
+        let func = SparkConcat::new();
+
+        let fields = vec![
+            Arc::new(Field::new("a", DataType::Utf8, false)),
+            Arc::new(Field::new("b", DataType::Utf8, false)),
+        ];
+
+        let args = ReturnFieldArgs {
+            arg_fields: &fields,
+            scalar_arguments: &[],
+        };
+
+        let field = func.return_field_from_args(args)?;
+
+        assert!(
+            !field.is_nullable(),
+            "Expected concat result to be non-nullable when all inputs are non-nullable"
+        );
+
+        Ok(())
+    }
+    #[test]
+    fn test_spark_concat_return_field_nullable() -> Result<()> {
+        let func = SparkConcat::new();
+
+        let fields = vec![
+            Arc::new(Field::new("a", DataType::Utf8, false)),
+            Arc::new(Field::new("b", DataType::Utf8, true)),
+        ];
+
+        let args = ReturnFieldArgs {
+            arg_fields: &fields,
+            scalar_arguments: &[],
+        };
+
+        let field = func.return_field_from_args(args)?;
+
+        assert!(
+            field.is_nullable(),
+            "Expected concat result to be nullable when any input is nullable"
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/string/elt.rs b/datafusion/spark/src/function/string/elt.rs
index 35a22fe5edb6f..7d4b0dbd7a168 100644
--- a/datafusion/spark/src/function/string/elt.rs
+++ b/datafusion/spark/src/function/string/elt.rs
@@ -23,11 +23,12 @@ use arrow::array::{
 };
 use arrow::compute::{can_cast_types, cast};
 use arrow::datatypes::DataType::{Int64, Utf8};
-use arrow::datatypes::{DataType, Int64Type};
+use arrow::datatypes::{DataType, Field, FieldRef, Int64Type};
 use datafusion_common::cast::as_string_array;
-use datafusion_common::{plan_datafusion_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, internal_err, plan_datafusion_err};
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 
@@ -64,7 +65,12 @@ impl ScalarUDFImpl for SparkElt {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Utf8)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(self.name(), Utf8, nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -248,4 +254,57 @@ mod tests {
         assert_eq!(out.data_type(), &Utf8);
         Ok(())
     }
+
+    #[test]
+    fn test_elt_nullability() -> Result<()> {
+        use datafusion_expr::ReturnFieldArgs;
+
+        let elt_func = SparkElt::new();
+
+        // Test with all non-nullable args - result should be non-nullable
+        let non_nullable_idx: FieldRef = Arc::new(Field::new("idx", Int64, false));
+        let non_nullable_v1: FieldRef = Arc::new(Field::new("v1", Utf8, false));
+        let non_nullable_v2: FieldRef = Arc::new(Field::new("v2", Utf8, false));
+
+        let result = elt_func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[
+                Arc::clone(&non_nullable_idx),
+                Arc::clone(&non_nullable_v1),
+                Arc::clone(&non_nullable_v2),
+            ],
+            scalar_arguments: &[None, None, None],
+        })?;
+        assert!(
+            !result.is_nullable(),
+            "elt should NOT be nullable when all args are non-nullable"
+        );
+
+        // Test with nullable index - result should be nullable
+        let nullable_idx: FieldRef = Arc::new(Field::new("idx", Int64, true));
+        let result = elt_func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[
+                nullable_idx,
+                Arc::clone(&non_nullable_v1),
+                Arc::clone(&non_nullable_v2),
+            ],
+            scalar_arguments: &[None, None, None],
+        })?;
+        assert!(
+            result.is_nullable(),
+            "elt should be nullable when index is nullable"
+        );
+
+        // Test with nullable value - result should be nullable
+        let nullable_v1: FieldRef = Arc::new(Field::new("v1", Utf8, true));
+        let result = elt_func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[non_nullable_idx, nullable_v1, non_nullable_v2],
+            scalar_arguments: &[None, None, None],
+        })?;
+        assert!(
+            result.is_nullable(),
+            "elt should be nullable when any value is nullable"
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/string/format_string.rs b/datafusion/spark/src/function/string/format_string.rs
index 9809456af9a40..73de985109b7c 100644
--- a/datafusion/spark/src/function/string/format_string.rs
+++ b/datafusion/spark/src/function/string/format_string.rs
@@ -23,19 +23,19 @@ use core::num::FpCategory;
 
 use arrow::{
     array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
-    datatypes::DataType,
+    datatypes::{DataType, Field, FieldRef},
 };
 use bigdecimal::{
-    num_bigint::{BigInt, Sign},
     BigDecimal, ToPrimitive,
+    num_bigint::{BigInt, Sign},
 };
 use chrono::{DateTime, Datelike, Timelike, Utc};
 use datafusion_common::{
-    exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
+    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
 };
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
-    Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility,
 };
 
 /// Spark-compatible `format_string` expression
@@ -78,11 +78,24 @@ impl ScalarUDFImpl for FormatStringFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match arg_types[0] {
-            DataType::Null => Ok(DataType::Utf8),
-            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(arg_types[0].clone()),
-            _ => plan_err!("The format_string function expects the first argument to be Utf8, LargeUtf8 or Utf8View")
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        datafusion_common::internal_err!(
+            "return_type should not be called, use return_field_from_args instead"
+        )
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        match args.arg_fields[0].data_type() {
+            DataType::Null => {
+                Ok(Arc::new(Field::new("format_string", DataType::Utf8, true)))
+            }
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+                Ok(Arc::clone(&args.arg_fields[0]))
+            }
+            _ => exec_err!(
+                "format_string expects the first argument to be Utf8, LargeUtf8 or Utf8View, got {} instead",
+                args.arg_fields[0].data_type()
+            ),
         }
     }
 
@@ -304,7 +317,7 @@ impl<'a> Formatter<'a> {
                         return exec_err!("No previous argument to reference");
                     };
                     let (spec, rest) =
-                        take_conversion_specifier(rest, p, arg_types[p - 1].clone())?;
+                        take_conversion_specifier(rest, p, &arg_types[p - 1])?;
                     res.push(FormatElement::Format(spec));
                     rem = rest;
                     continue;
@@ -317,7 +330,7 @@ impl<'a> Formatter<'a> {
                             (index as usize, &rest2[1..])
                         }
                         (NumericParam::FromArgument, true) => {
-                            return exec_err!("Invalid numeric parameter")
+                            return exec_err!("Invalid numeric parameter");
                         }
                         (_, false) => {
                             argument_index += 1;
@@ -335,7 +348,7 @@ impl<'a> Formatter<'a> {
                 let (spec, rest) = take_conversion_specifier(
                     rest,
                     current_argument_index,
-                    arg_types[current_argument_index - 1].clone(),
+                    &arg_types[current_argument_index - 1],
                 )
                 .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
                 res.push(FormatElement::Format(spec));
@@ -582,7 +595,7 @@ impl TryFrom<char> for TimeFormat {
 }
 
 impl ConversionType {
-    pub fn validate(&self, arg_type: DataType) -> Result<()> {
+    pub fn validate(&self, arg_type: &DataType) -> Result<()> {
         match self {
             ConversionType::BooleanLower | ConversionType::BooleanUpper => {
                 if !matches!(arg_type, DataType::Boolean) {
@@ -716,11 +729,11 @@ impl ConversionType {
     }
 }
 
-fn take_conversion_specifier(
-    mut s: &str,
+fn take_conversion_specifier<'a>(
+    mut s: &'a str,
     argument_index: usize,
-    arg_type: DataType,
-) -> Result<(ConversionSpecifier, &str)> {
+    arg_type: &DataType,
+) -> Result<(ConversionSpecifier, &'a str)> {
     let mut spec = ConversionSpecifier {
         argument_index,
         alt_form: false,
@@ -1186,7 +1199,7 @@ impl ConversionSpecifier {
                         | ConversionType::CompactFloatLower
                         | ConversionType::CompactFloatUpper,
                         Some(value),
-                    ) => self.format_decimal(string, value.to_string(), *scale as i64),
+                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
                     (
                         ConversionType::StringLower | ConversionType::StringUpper,
                         Some(value),
@@ -1212,7 +1225,7 @@ impl ConversionSpecifier {
                         | ConversionType::CompactFloatLower
                         | ConversionType::CompactFloatUpper,
                         Some(value),
-                    ) => self.format_decimal(string, value.to_string(), *scale as i64),
+                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
                     (
                         ConversionType::StringLower | ConversionType::StringUpper,
                         Some(value),
@@ -1675,7 +1688,7 @@ impl ConversionSpecifier {
                 return exec_err!(
                     "Invalid conversion type: {:?} for boolean array",
                     self.conversion_type
-                )
+                );
             }
         };
         self.format_str(writer, formatted)
@@ -1744,7 +1757,7 @@ impl ConversionSpecifier {
                     return exec_err!(
                         "Invalid conversion type: {:?} for float",
                         self.conversion_type
-                    )
+                    );
                 }
             }
 
@@ -1789,7 +1802,7 @@ impl ConversionSpecifier {
                     return exec_err!(
                         "Invalid conversion type: {:?} for float",
                         self.conversion_type
-                    )
+                    );
                 }
             }
         }
@@ -1908,7 +1921,7 @@ impl ConversionSpecifier {
                 return exec_err!(
                     "Invalid conversion type: {:?} for u64",
                     self.conversion_type
-                )
+                );
             }
         }
         let mut prefix = if self.alt_form {
@@ -1991,12 +2004,7 @@ impl ConversionSpecifier {
         }
     }
 
-    fn format_decimal(
-        &self,
-        writer: &mut String,
-        value: String,
-        scale: i64,
-    ) -> Result<()> {
+    fn format_decimal(&self, writer: &mut String, value: &str, scale: i64) -> Result<()> {
         let mut prefix = String::new();
         let upper = self.conversion_type.is_upper();
 
@@ -2070,7 +2078,7 @@ impl ConversionSpecifier {
                 return exec_err!(
                     "Invalid conversion type: {:?} for decimal",
                     self.conversion_type
-                )
+                );
             }
         };
 
@@ -2348,3 +2356,39 @@ fn trim_trailing_0s_hex(number: &str) -> &str {
     }
     number
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::DataType::Utf8;
+    use datafusion_common::Result;
+
+    #[test]
+    fn test_format_string_nullability() -> Result<()> {
+        let func = FormatStringFunc::new();
+        let nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, true));
+
+        let out_nullable = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[nullable_format],
+            scalar_arguments: &[None],
+        })?;
+
+        assert!(
+            out_nullable.is_nullable(),
+            "format_string(fmt, ...) should be nullable when fmt is nullable"
+        );
+        let non_nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, false));
+
+        let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[non_nullable_format],
+            scalar_arguments: &[None],
+        })?;
+
+        assert!(
+            !out_non_nullable.is_nullable(),
+            "format_string(fmt, ...) should NOT be nullable when fmt is NOT nullable"
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/string/ilike.rs b/datafusion/spark/src/function/string/ilike.rs
index a160749523f1e..0d90bd1694175 100644
--- a/datafusion/spark/src/function/string/ilike.rs
+++ b/datafusion/spark/src/function/string/ilike.rs
@@ -17,10 +17,12 @@
 
 use arrow::array::ArrayRef;
 use arrow::compute::ilike;
-use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, Result};
+use arrow::datatypes::{DataType, Field};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::ColumnarValue;
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
 use datafusion_functions::utils::make_scalar_function;
 use std::any::Any;
 use std::sync::Arc;
@@ -60,7 +62,14 @@ impl ScalarUDFImpl for SparkILike {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Boolean)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<Arc<Field>> {
+        // ILIKE returns a boolean value
+        // The result is nullable if any of the input arguments is nullable
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new("ilike", DataType::Boolean, nullable)))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -83,9 +92,9 @@ mod tests {
     use super::*;
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, BooleanArray};
-    use arrow::datatypes::DataType::Boolean;
+    use arrow::datatypes::{DataType::Boolean, Field};
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
 
     macro_rules! test_ilike_string_invoke {
         ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
@@ -170,4 +179,73 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_ilike_nullability() {
+        let ilike = SparkILike::new();
+
+        // Test with non-nullable arguments
+        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
+        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
+
+        let result = ilike
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&non_nullable_field1),
+                    Arc::clone(&non_nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should not be nullable when both inputs are non-nullable
+        assert!(!result.is_nullable());
+        assert_eq!(result.data_type(), &Boolean);
+
+        // Test with first argument nullable
+        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
+
+        let result = ilike
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&nullable_field1),
+                    Arc::clone(&non_nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when first input is nullable
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &Boolean);
+
+        // Test with second argument nullable
+        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
+
+        let result = ilike
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&non_nullable_field1),
+                    Arc::clone(&nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when second input is nullable
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &Boolean);
+
+        // Test with both arguments nullable
+        let result = ilike
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when both inputs are nullable
+        assert!(result.is_nullable());
+        assert_eq!(result.data_type(), &Boolean);
+    }
 }
diff --git a/datafusion/spark/src/function/string/length.rs b/datafusion/spark/src/function/string/length.rs
index 1fa54d000effa..078b294cac07d 100644
--- a/datafusion/spark/src/function/string/length.rs
+++ b/datafusion/spark/src/function/string/length.rs
@@ -18,10 +18,11 @@
 use arrow::array::{
     Array, ArrayRef, AsArray, BinaryArrayType, PrimitiveArray, StringArrayType,
 };
-use arrow::datatypes::{DataType, Int32Type};
+use arrow::datatypes::{DataType, Field, FieldRef, Int32Type};
 use datafusion_common::exec_err;
 use datafusion_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    Volatility,
 };
 use datafusion_functions::utils::make_scalar_function;
 use std::sync::Arc;
@@ -78,8 +79,9 @@ impl ScalarUDFImpl for SparkLengthFunc {
     }
 
     fn return_type(&self, _args: &[DataType]) -> datafusion_common::Result<DataType> {
-        // spark length always returns Int32
-        Ok(DataType::Int32)
+        datafusion_common::internal_err!(
+            "return_type should not be called, use return_field_from_args instead"
+        )
     }
 
     fn invoke_with_args(
@@ -92,39 +94,48 @@ impl ScalarUDFImpl for SparkLengthFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn return_field_from_args(
+        &self,
+        args: ReturnFieldArgs,
+    ) -> datafusion_common::Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        // spark length always returns Int32
+        Ok(Arc::new(Field::new(self.name(), DataType::Int32, nullable)))
+    }
 }
 
 fn spark_length(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => {
             let string_array = args[0].as_string::<i32>();
-            character_length::<_>(string_array)
+            character_length::<_>(&string_array)
         }
         DataType::LargeUtf8 => {
             let string_array = args[0].as_string::<i64>();
-            character_length::<_>(string_array)
+            character_length::<_>(&string_array)
         }
         DataType::Utf8View => {
             let string_array = args[0].as_string_view();
-            character_length::<_>(string_array)
+            character_length::<_>(&string_array)
         }
         DataType::Binary => {
             let binary_array = args[0].as_binary::<i32>();
-            byte_length::<_>(binary_array)
+            byte_length::<_>(&binary_array)
         }
         DataType::LargeBinary => {
             let binary_array = args[0].as_binary::<i64>();
-            byte_length::<_>(binary_array)
+            byte_length::<_>(&binary_array)
         }
         DataType::BinaryView => {
             let binary_array = args[0].as_binary_view();
-            byte_length::<_>(binary_array)
+            byte_length::<_>(&binary_array)
         }
         other => exec_err!("Unsupported data type {other:?} for function `length`"),
     }
 }
 
-fn character_length<'a, V>(array: V) -> datafusion_common::Result<ArrayRef>
+fn character_length<'a, V>(array: &V) -> datafusion_common::Result<ArrayRef>
 where
     V: StringArrayType<'a>,
 {
@@ -169,7 +180,7 @@ where
     Ok(Arc::new(array))
 }
 
-fn byte_length<'a, V>(array: V) -> datafusion_common::Result<ArrayRef>
+fn byte_length<'a, V>(array: &V) -> datafusion_common::Result<ArrayRef>
 where
     V: BinaryArrayType<'a>,
 {
@@ -193,8 +204,9 @@ mod tests {
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, Int32Array};
     use arrow::datatypes::DataType::Int32;
+    use arrow::datatypes::{Field, FieldRef};
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
 
     macro_rules! test_spark_length_string {
         ($INPUT:expr, $EXPECTED:expr) => {
@@ -279,4 +291,36 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_spark_length_nullability() -> Result<()> {
+        let func = SparkLengthFunc::new();
+
+        let nullable_field: FieldRef = Arc::new(Field::new("col", DataType::Utf8, true));
+
+        let out_nullable = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[nullable_field],
+            scalar_arguments: &[None],
+        })?;
+
+        assert!(
+            out_nullable.is_nullable(),
+            "length(col) should be nullable when child is nullable"
+        );
+
+        let non_nullable_field: FieldRef =
+            Arc::new(Field::new("col", DataType::Utf8, false));
+
+        let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
+            arg_fields: &[non_nullable_field],
+            scalar_arguments: &[None],
+        })?;
+
+        assert!(
+            !out_non_nullable.is_nullable(),
+            "length(col) should NOT be nullable when child is NOT nullable"
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/string/like.rs b/datafusion/spark/src/function/string/like.rs
index df8eaef7cecbc..ffa1737023b61 100644
--- a/datafusion/spark/src/function/string/like.rs
+++ b/datafusion/spark/src/function/string/like.rs
@@ -17,10 +17,12 @@
 
 use arrow::array::ArrayRef;
 use arrow::compute::like;
-use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, Result};
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::{Result, exec_err, internal_err};
 use datafusion_expr::ColumnarValue;
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
 use datafusion_functions::utils::make_scalar_function;
 use std::any::Any;
 use std::sync::Arc;
@@ -60,7 +62,16 @@ impl ScalarUDFImpl for SparkLike {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Boolean)
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Boolean,
+            nullable,
+        )))
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -83,9 +94,9 @@ mod tests {
     use super::*;
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, BooleanArray};
-    use arrow::datatypes::DataType::Boolean;
+    use arrow::datatypes::{DataType::Boolean, Field};
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
 
     macro_rules! test_like_string_invoke {
         ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
@@ -175,4 +186,73 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_like_nullability() {
+        let like = SparkLike::new();
+
+        // Test with non-nullable arguments
+        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
+        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
+
+        let both_non_nullable = like
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&non_nullable_field1),
+                    Arc::clone(&non_nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should not be nullable when both inputs are non-nullable
+        assert!(!both_non_nullable.is_nullable());
+        assert_eq!(both_non_nullable.data_type(), &Boolean);
+
+        // Test with first argument nullable
+        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
+
+        let first_nullable = like
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&nullable_field1),
+                    Arc::clone(&non_nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when first input is nullable
+        assert!(first_nullable.is_nullable());
+        assert_eq!(first_nullable.data_type(), &Boolean);
+
+        // Test with second argument nullable
+        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
+
+        let second_nullable = like
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[
+                    Arc::clone(&non_nullable_field1),
+                    Arc::clone(&nullable_field2),
+                ],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when second input is nullable
+        assert!(second_nullable.is_nullable());
+        assert_eq!(second_nullable.data_type(), &Boolean);
+
+        // Test with both arguments nullable
+        let first_second_nullable = like
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
+                scalar_arguments: &[None, None],
+            })
+            .unwrap();
+
+        // The result should be nullable when both inputs are nullable
+        assert!(first_second_nullable.is_nullable());
+        assert_eq!(first_second_nullable.data_type(), &Boolean);
+    }
 }
diff --git a/datafusion/spark/src/function/string/luhn_check.rs b/datafusion/spark/src/function/string/luhn_check.rs
index 090b16e34b8f1..dffd4fe0ae7e2 100644
--- a/datafusion/spark/src/function/string/luhn_check.rs
+++ b/datafusion/spark/src/function/string/luhn_check.rs
@@ -21,7 +21,7 @@ use arrow::array::{Array, AsArray, BooleanArray};
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Boolean;
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
     Volatility,
diff --git a/datafusion/spark/src/function/url/mod.rs b/datafusion/spark/src/function/url/mod.rs
index 82bf8a9e09616..1313edaed5347 100644
--- a/datafusion/spark/src/function/url/mod.rs
+++ b/datafusion/spark/src/function/url/mod.rs
@@ -21,9 +21,15 @@ use std::sync::Arc;
 
 pub mod parse_url;
 pub mod try_parse_url;
+pub mod try_url_decode;
+pub mod url_decode;
+pub mod url_encode;
 
 make_udf_function!(parse_url::ParseUrl, parse_url);
 make_udf_function!(try_parse_url::TryParseUrl, try_parse_url);
+make_udf_function!(try_url_decode::TryUrlDecode, try_url_decode);
+make_udf_function!(url_decode::UrlDecode, url_decode);
+make_udf_function!(url_encode::UrlEncode, url_encode);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
@@ -38,8 +44,29 @@ pub mod expr_fn {
         "Same as parse_url but returns NULL if an invalid URL is provided.",
         args
     ));
+    export_functions!((
+        url_decode,
+        "Decodes a URL-encoded string in ‘application/x-www-form-urlencoded’ format to its original format.",
+        args
+    ));
+    export_functions!((
+        try_url_decode,
+        "Same as url_decode but returns NULL if an invalid URL-encoded string is provided",
+        args
+    ));
+    export_functions!((
+        url_encode,
+        "Encodes a string into a URL-encoded string in ‘application/x-www-form-urlencoded’ format.",
+        args
+    ));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![parse_url(), try_parse_url()]
+    vec![
+        parse_url(),
+        try_parse_url(),
+        try_url_decode(),
+        url_decode(),
+        url_encode(),
+    ]
 }
diff --git a/datafusion/spark/src/function/url/parse_url.rs b/datafusion/spark/src/function/url/parse_url.rs
index a8afa1d9639f5..e82ef28045a33 100644
--- a/datafusion/spark/src/function/url/parse_url.rs
+++ b/datafusion/spark/src/function/url/parse_url.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::DataType;
 use datafusion_common::cast::{
     as_large_string_array, as_string_array, as_string_view_array,
 };
-use datafusion_common::{exec_datafusion_err, exec_err, Result};
+use datafusion_common::{Result, exec_datafusion_err, exec_err};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
     Volatility,
@@ -86,7 +86,9 @@ impl ParseUrl {
             return if !value.contains("://") {
                 Ok(None)
             } else {
-                Err(exec_datafusion_err!("The url is invalid: {value}. Use `try_parse_url` to tolerate invalid URL and return NULL instead. SQLSTATE: 22P02"))
+                Err(exec_datafusion_err!(
+                    "The url is invalid: {value}. Use `try_parse_url` to tolerate invalid URL and return NULL instead. SQLSTATE: 22P02"
+                ))
             };
         };
         url.map_err(|e| exec_datafusion_err!("{e:?}"))
@@ -186,7 +188,7 @@ pub fn spark_handled_parse_url(
     let url = &args[0];
     let part = &args[1];
 
-    let result = if args.len() == 3 {
+    if args.len() == 3 {
         // In this case, the 'key' argument is passed
         let key = &args[2];
 
@@ -253,8 +255,7 @@ pub fn spark_handled_parse_url(
             }
             _ => exec_err!("{} expects STRING arguments, got {:?}", "`parse_url`", args),
         }
-    };
-    result
+    }
 }
 
 fn process_parse_url<'a, A, B, C, T>(
diff --git a/datafusion/spark/src/function/url/try_parse_url.rs b/datafusion/spark/src/function/url/try_parse_url.rs
index c04850f3a6bf0..4f6c5bb940fec 100644
--- a/datafusion/spark/src/function/url/try_parse_url.rs
+++ b/datafusion/spark/src/function/url/try_parse_url.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 
-use crate::function::url::parse_url::{spark_handled_parse_url, ParseUrl};
+use crate::function::url::parse_url::{ParseUrl, spark_handled_parse_url};
 use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
 use datafusion_common::Result;
diff --git a/datafusion/spark/src/function/url/try_url_decode.rs b/datafusion/spark/src/function/url/try_url_decode.rs
new file mode 100644
index 0000000000000..58013236d5ec9
--- /dev/null
+++ b/datafusion/spark/src/function/url/try_url_decode.rs
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow::array::ArrayRef;
+use arrow::datatypes::DataType;
+
+use datafusion_common::Result;
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+
+use crate::function::url::url_decode::{UrlDecode, spark_handled_url_decode};
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct TryUrlDecode {
+    signature: Signature,
+    url_decoder: UrlDecode,
+}
+
+impl Default for TryUrlDecode {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl TryUrlDecode {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::string(1, Volatility::Immutable),
+            url_decoder: UrlDecode::new(),
+        }
+    }
+}
+
+impl ScalarUDFImpl for TryUrlDecode {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "try_url_decode"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        self.url_decoder.return_type(arg_types)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let ScalarFunctionArgs { args, .. } = args;
+        make_scalar_function(spark_try_url_decode, vec![])(&args)
+    }
+}
+
+fn spark_try_url_decode(args: &[ArrayRef]) -> Result<ArrayRef> {
+    spark_handled_url_decode(args, |x| match x {
+        Err(_) => Ok(None),
+        result => result,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::StringArray;
+    use datafusion_common::{Result, cast::as_string_array};
+
+    use super::*;
+
+    #[test]
+    fn test_try_decode_error_handled() -> Result<()> {
+        let input = Arc::new(StringArray::from(vec![
+            Some("http%3A%2F%2spark.apache.org"), // '%2s' is not a valid percent encoded character
+            // Valid cases
+            Some("https%3A%2F%2Fspark.apache.org"),
+            None,
+        ]));
+
+        let expected =
+            StringArray::from(vec![None, Some("https://spark.apache.org"), None]);
+
+        let result = spark_try_url_decode(&[input as ArrayRef])?;
+        let result = as_string_array(&result)?;
+
+        assert_eq!(&expected, result);
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/url/url_decode.rs b/datafusion/spark/src/function/url/url_decode.rs
new file mode 100644
index 0000000000000..e4a9cf6acd3e7
--- /dev/null
+++ b/datafusion/spark/src/function/url/url_decode.rs
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::borrow::Cow;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::{
+    as_large_string_array, as_string_array, as_string_view_array,
+};
+use datafusion_common::{Result, exec_datafusion_err, exec_err, plan_err};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+use percent_encoding::percent_decode;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct UrlDecode {
+    signature: Signature,
+}
+
+impl Default for UrlDecode {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl UrlDecode {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::string(1, Volatility::Immutable),
+        }
+    }
+
+    /// Decodes a URL-encoded string from application/x-www-form-urlencoded format.
+    /// Although the `url::form_urlencoded` support decoding, it does not return error when the string is malformed
+    ///     For example: "%2s" is not a valid percent-encoding, the `decode` function from `url::form_urlencoded`
+    ///                  will ignore this instead of return error
+    /// This function reproduce the same decoding process, plus an extra validation step
+    /// See <https://github.com/servo/rust-url/blob/b06048d70d4cc9cf4ffb277f06cfcebd53b2141e/form_urlencoded/src/lib.rs#L70-L76>
+    ///
+    /// # Arguments
+    ///
+    /// * `value` - The URL-encoded string to decode
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(String)` - The decoded string
+    /// * `Err(DataFusionError)` - If the input is malformed or contains invalid UTF-8
+    ///
+    fn decode(value: &str) -> Result<String> {
+        // Check if the string has valid percent encoding
+        Self::validate_percent_encoding(value)?;
+
+        let replaced = Self::replace_plus(value.as_bytes());
+        percent_decode(&replaced)
+            .decode_utf8()
+            .map_err(|e| exec_datafusion_err!("Invalid UTF-8 sequence: {e}"))
+            .map(|parsed| parsed.into_owned())
+    }
+
+    /// Replace b'+' with b' '
+    /// See: <https://github.com/servo/rust-url/blob/dbd526178ed9276176602dd039022eba89e8fc93/form_urlencoded/src/lib.rs#L79-L93>
+    fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
+        match input.iter().position(|&b| b == b'+') {
+            None => Cow::Borrowed(input),
+            Some(first_position) => {
+                let mut replaced = input.to_owned();
+                replaced[first_position] = b' ';
+                for byte in &mut replaced[first_position + 1..] {
+                    if *byte == b'+' {
+                        *byte = b' ';
+                    }
+                }
+                Cow::Owned(replaced)
+            }
+        }
+    }
+
+    /// Validate percent-encoding of the string
+    fn validate_percent_encoding(value: &str) -> Result<()> {
+        let bytes = value.as_bytes();
+        let mut i = 0;
+
+        while i < bytes.len() {
+            if bytes[i] == b'%' {
+                // Check if we have at least 2 more characters
+                if i + 2 >= bytes.len() {
+                    return exec_err!(
+                        "Invalid percent-encoding: incomplete sequence at position {}",
+                        i
+                    );
+                }
+
+                let hex1 = bytes[i + 1];
+                let hex2 = bytes[i + 2];
+
+                if !hex1.is_ascii_hexdigit() || !hex2.is_ascii_hexdigit() {
+                    return exec_err!(
+                        "Invalid percent-encoding: invalid hex sequence '%{}{}' at position {}",
+                        hex1 as char,
+                        hex2 as char,
+                        i
+                    );
+                }
+                i += 3;
+            } else {
+                i += 1;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl ScalarUDFImpl for UrlDecode {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "url_decode"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 1 {
+            return plan_err!(
+                "{} expects 1 argument, but got {}",
+                self.name(),
+                arg_types.len()
+            );
+        }
+        // As the type signature is already checked, we can safely return the type of the first argument
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let ScalarFunctionArgs { args, .. } = args;
+        make_scalar_function(spark_url_decode, vec![])(&args)
+    }
+}
+
+/// Core implementation of URL decoding function.
+///
+/// # Arguments
+///
+/// * `args` - A slice containing exactly one ArrayRef with the URL-encoded strings to decode
+///
+/// # Returns
+///
+/// * `Ok(ArrayRef)` - A new array of the same type containing decoded strings
+/// * `Err(DataFusionError)` - If validation fails or invalid arguments are provided
+///
+fn spark_url_decode(args: &[ArrayRef]) -> Result<ArrayRef> {
+    spark_handled_url_decode(args, |x| x)
+}
+
+pub fn spark_handled_url_decode(
+    args: &[ArrayRef],
+    err_handle_fn: impl Fn(Result<Option<String>>) -> Result<Option<String>>,
+) -> Result<ArrayRef> {
+    if args.len() != 1 {
+        return exec_err!("`url_decode` expects 1 argument");
+    }
+
+    match &args[0].data_type() {
+        DataType::Utf8 => as_string_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlDecode::decode).transpose())
+            .map(&err_handle_fn)
+            .collect::<Result<StringArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        DataType::LargeUtf8 => as_large_string_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlDecode::decode).transpose())
+            .map(&err_handle_fn)
+            .collect::<Result<LargeStringArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        DataType::Utf8View => as_string_view_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlDecode::decode).transpose())
+            .map(&err_handle_fn)
+            .collect::<Result<StringViewArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        other => exec_err!("`url_decode`: Expr must be STRING, got {other:?}"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::StringArray;
+    use datafusion_common::Result;
+
+    use super::*;
+
+    #[test]
+    fn test_decode() -> Result<()> {
+        let input = Arc::new(StringArray::from(vec![
+            Some("https%3A%2F%2Fspark.apache.org"),
+            Some("inva+lid://user:pass@host/file\\;param?query\\;p2"),
+            Some("inva lid://user:pass@host/file\\;param?query\\;p2"),
+            Some("%7E%21%40%23%24%25%5E%26%2A%28%29%5F%2B"),
+            Some("%E4%BD%A0%E5%A5%BD"),
+            Some(""),
+            None,
+        ]));
+        let expected = StringArray::from(vec![
+            Some("https://spark.apache.org"),
+            Some("inva lid://user:pass@host/file\\;param?query\\;p2"),
+            Some("inva lid://user:pass@host/file\\;param?query\\;p2"),
+            Some("~!@#$%^&*()_+"),
+            Some("你好"),
+            Some(""),
+            None,
+        ]);
+
+        let result = spark_url_decode(&[input as ArrayRef])?;
+        let result = as_string_array(&result)?;
+
+        assert_eq!(&expected, result);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_decode_error() -> Result<()> {
+        let input = Arc::new(StringArray::from(vec![
+            Some("http%3A%2F%2spark.apache.org"), // '%2s' is not a valid percent encoded character
+            // Valid cases
+            Some("https%3A%2F%2Fspark.apache.org"),
+            None,
+        ]));
+
+        let result = spark_url_decode(&[input]);
+        assert!(
+            result.is_err_and(|e| e.to_string().contains("Invalid percent-encoding"))
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/url/url_encode.rs b/datafusion/spark/src/function/url/url_encode.rs
new file mode 100644
index 0000000000000..7292eb530a6ae
--- /dev/null
+++ b/datafusion/spark/src/function/url/url_encode.rs
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::{
+    as_large_string_array, as_string_array, as_string_view_array,
+};
+use datafusion_common::{Result, exec_err, plan_err};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+use url::form_urlencoded::byte_serialize;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct UrlEncode {
+    signature: Signature,
+}
+
+impl Default for UrlEncode {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl UrlEncode {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::string(1, Volatility::Immutable),
+        }
+    }
+
+    /// Encode a string to application/x-www-form-urlencoded format.
+    ///
+    /// # Arguments
+    ///
+    /// * `value` - The string to encode
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(String)` - The encoded string
+    ///
+    fn encode(value: &str) -> Result<String> {
+        Ok(byte_serialize(value.as_bytes()).collect::<String>())
+    }
+}
+
+impl ScalarUDFImpl for UrlEncode {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "url_encode"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 1 {
+            return plan_err!(
+                "{} expects 1 argument, but got {}",
+                self.name(),
+                arg_types.len()
+            );
+        }
+        // As the type signature is already checked, we can safely return the type of the first argument
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let ScalarFunctionArgs { args, .. } = args;
+        make_scalar_function(spark_url_encode, vec![])(&args)
+    }
+}
+
+/// Core implementation of URL encoding function.
+///
+/// # Arguments
+///
+/// * `args` - A slice containing exactly one ArrayRef with the strings to encode
+///
+/// # Returns
+///
+/// * `Ok(ArrayRef)` - A new array of the same type containing encoded strings
+/// * `Err(DataFusionError)` - If invalid arguments are provided
+///
+fn spark_url_encode(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 1 {
+        return exec_err!("`url_encode` expects 1 argument");
+    }
+
+    match &args[0].data_type() {
+        DataType::Utf8 => as_string_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlEncode::encode).transpose())
+            .collect::<Result<StringArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        DataType::LargeUtf8 => as_large_string_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlEncode::encode).transpose())
+            .collect::<Result<LargeStringArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        DataType::Utf8View => as_string_view_array(&args[0])?
+            .iter()
+            .map(|x| x.map(UrlEncode::encode).transpose())
+            .collect::<Result<StringViewArray>>()
+            .map(|array| Arc::new(array) as ArrayRef),
+        other => exec_err!("`url_encode`: Expr must be STRING, got {other:?}"),
+    }
+}
diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs
index 5b1fa06cb2c7c..aad3ceed68ce3 100644
--- a/datafusion/spark/src/lib.rs
+++ b/datafusion/spark/src/lib.rs
@@ -22,6 +22,8 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! Spark Expression packages for [DataFusion].
 //!
@@ -102,7 +104,7 @@ use log::debug;
 use std::sync::Arc;
 
 /// Fluent-style API for creating `Expr`s
-#[allow(unused)]
+#[expect(unused_imports)]
 pub mod expr_fn {
     pub use super::function::aggregate::expr_fn::*;
     pub use super::function::array::expr_fn::*;
@@ -121,8 +123,8 @@ pub mod expr_fn {
     pub use super::function::math::expr_fn::*;
     pub use super::function::misc::expr_fn::*;
     pub use super::function::predicate::expr_fn::*;
-    pub use super::function::r#struct::expr_fn::*;
     pub use super::function::string::expr_fn::*;
+    pub use super::function::r#struct::expr_fn::*;
     pub use super::function::table::expr_fn::*;
     pub use super::function::url::expr_fn::*;
     pub use super::function::window::expr_fn::*;
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index 5e107814176f0..a814292a3d71d 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -72,5 +72,5 @@ datafusion-functions-window = { workspace = true }
 env_logger = { workspace = true }
 insta = { workspace = true }
 itertools = { workspace = true }
-paste = "^1.0"
+paste = { workspace = true }
 rstest = { workspace = true }
diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index 2c0bb86cd8087..dbedaf3f15b8d 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -20,11 +20,11 @@ use std::{collections::HashMap, sync::Arc};
 use arrow::datatypes::{DataType, Field, Schema};
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{plan_err, Result, TableReference};
-use datafusion_expr::planner::ExprPlanner;
+use datafusion_common::{Result, TableReference, plan_err};
 use datafusion_expr::WindowUDF;
+use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{
-    logical_plan::builder::LogicalTableSource, AggregateUDF, ScalarUDF, TableSource,
+    AggregateUDF, ScalarUDF, TableSource, logical_plan::builder::LogicalTableSource,
 };
 use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::count::count_udaf;
diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
index aceec676761cb..18766d7056355 100644
--- a/datafusion/sql/src/cte.rs
+++ b/datafusion/sql/src/cte.rs
@@ -20,9 +20,8 @@ use std::sync::Arc;
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use datafusion_common::{
-    not_impl_err, plan_err,
+    Result, not_impl_err, plan_err,
     tree_node::{TreeNode, TreeNodeRecursion},
-    Result,
 };
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource};
 use sqlparser::ast::{Query, SetExpr, SetOperator, With};
@@ -46,7 +45,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
             // Create a logical plan for the CTE
             let cte_plan = if is_recursive {
-                self.recursive_cte(cte_name.clone(), *cte.query, planner_context)?
+                self.recursive_cte(&cte_name, *cte.query, planner_context)?
             } else {
                 self.non_recursive_cte(*cte.query, planner_context)?
             };
@@ -70,7 +69,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     fn recursive_cte(
         &self,
-        cte_name: String,
+        cte_name: &str,
         mut cte_query: Query,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
@@ -92,7 +91,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             } => (left, right, set_quantifier),
             other => {
                 // If the query is not a UNION, then it is not a recursive CTE
-                cte_query.body = Box::new(other);
+                *cte_query.body = other;
                 return self.non_recursive_cte(cte_query, planner_context);
             }
         };
@@ -136,7 +135,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Step 2.1: Create a table source for the temporary relation
         let work_table_source = self
             .context_provider
-            .create_cte_work_table(&cte_name, Arc::clone(static_plan.schema().inner()))?;
+            .create_cte_work_table(cte_name, Arc::clone(static_plan.schema().inner()))?;
 
         // Step 2.2: Create a temporary relation logical plan that will be used
         // as the input to the recursive term
@@ -147,14 +146,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         )?
         .build()?;
 
-        let name = cte_name.clone();
+        let name = cte_name.to_string();
 
         // Step 2.3: Register the temporary relation in the planning context
         // For all the self references in the variadic term, we'll replace it
         // with the temporary relation we created above by temporarily registering
         // it as a CTE. This temporary relation in the planning context will be
         // replaced by the actual CTE plan once we're done with the planning.
-        planner_context.insert_cte(cte_name.clone(), work_table_plan);
+        planner_context.insert_cte(cte_name.to_string(), work_table_plan);
 
         // ---------- Step 3: Compile the recursive term ------------------
         // this uses the named_relation we inserted above to resolve the
@@ -166,7 +165,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // if not, it is a non-recursive CTE
         if !has_work_table_reference(&recursive_plan, &work_table_source) {
             // Remove the work table plan from the context
-            planner_context.remove_cte(&cte_name);
+            planner_context.remove_cte(cte_name);
             // Compile it as a non-recursive CTE
             return self.set_operation_to_plan(
                 SetOperator::Union,
@@ -191,11 +190,11 @@ fn has_work_table_reference(
 ) -> bool {
     let mut has_reference = false;
     plan.apply(|node| {
-        if let LogicalPlan::TableScan(scan) = node {
-            if Arc::ptr_eq(&scan.source, work_table_source) {
-                has_reference = true;
-                return Ok(TreeNodeRecursion::Stop);
-            }
+        if let LogicalPlan::TableScan(scan) = node
+            && Arc::ptr_eq(&scan.source, work_table_source)
+        {
+            has_reference = true;
+            return Ok(TreeNodeRecursion::Stop);
         }
         Ok(TreeNodeRecursion::Continue)
     })
diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs
index 1c06f5ee926f9..edad5bbc6daad 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -16,13 +16,13 @@
 // under the License.
 
 use crate::planner::{ContextProvider, SqlToRel};
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{Result, not_impl_err};
 use datafusion_expr::Operator;
 use sqlparser::ast::BinaryOperator;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
-    pub(crate) fn parse_sql_binary_op(&self, op: BinaryOperator) -> Result<Operator> {
-        match op {
+    pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result<Operator> {
+        match *op {
             BinaryOperator::Gt => Ok(Operator::Gt),
             BinaryOperator::GtEq => Ok(Operator::GtEq),
             BinaryOperator::Lt => Ok(Operator::Lt),
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 50e479af36204..641f3bb8dcad1 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -19,14 +19,16 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use arrow::datatypes::DataType;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err,
-    DFSchema, Dependency, Diagnostic, Result, Span,
+    DFSchema, Dependency, Diagnostic, Result, Span, internal_datafusion_err,
+    internal_err, not_impl_err, plan_datafusion_err, plan_err,
 };
-use datafusion_expr::expr::{
-    NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction,
+use datafusion_expr::{
+    Expr, ExprSchemable, SortExpr, WindowFrame, WindowFunctionDefinition,
+    arguments::ArgumentName,
+    expr,
+    expr::{NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction},
+    planner::{PlannerResult, RawAggregateExpr, RawWindowExpr},
 };
-use datafusion_expr::planner::{PlannerResult, RawAggregateExpr, RawWindowExpr};
-use datafusion_expr::{expr, Expr, ExprSchemable, WindowFrame, WindowFunctionDefinition};
 use sqlparser::ast::{
     DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg,
     FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments,
@@ -151,42 +153,46 @@ impl FunctionArgs {
                 FunctionArgumentClause::OrderBy(oby) => {
                     if order_by.is_some() {
                         if !within_group.is_empty() {
-                            return plan_err!("ORDER BY clause is only permitted in WITHIN GROUP clause when a WITHIN GROUP is used");
+                            return plan_err!(
+                                "ORDER BY clause is only permitted in WITHIN GROUP clause when a WITHIN GROUP is used"
+                            );
                         }
-                        return not_impl_err!("Calling {name}: Duplicated ORDER BY clause in function arguments");
+                        return not_impl_err!(
+                            "Calling {name}: Duplicated ORDER BY clause in function arguments"
+                        );
                     }
                     order_by = Some(oby);
                 }
                 FunctionArgumentClause::Limit(limit) => {
                     return not_impl_err!(
                         "Calling {name}: LIMIT not supported in function arguments: {limit}"
-                    )
+                    );
                 }
                 FunctionArgumentClause::OnOverflow(overflow) => {
                     return not_impl_err!(
                         "Calling {name}: ON OVERFLOW not supported in function arguments: {overflow}"
-                    )
+                    );
                 }
                 FunctionArgumentClause::Having(having) => {
                     return not_impl_err!(
                         "Calling {name}: HAVING not supported in function arguments: {having}"
-                    )
+                    );
                 }
                 FunctionArgumentClause::Separator(sep) => {
                     return not_impl_err!(
                         "Calling {name}: SEPARATOR not supported in function arguments: {sep}"
-                    )
+                    );
                 }
                 FunctionArgumentClause::JsonNullClause(jn) => {
                     return not_impl_err!(
                         "Calling {name}: JSON NULL clause not supported in function arguments: {jn}"
-                    )
+                    );
                 }
                 FunctionArgumentClause::JsonReturningClause(jr) => {
                     return not_impl_err!(
                         "Calling {name}: JSON RETURNING clause not supported in function arguments: {jr}"
-                    )
-                },
+                    );
+                }
             }
         }
 
@@ -212,6 +218,9 @@ impl FunctionArgs {
     }
 }
 
+// Helper type for extracting WITHIN GROUP ordering and prepended args
+type WithinGroupExtraction = (Vec<SortExpr>, Vec<Expr>, Vec<Option<ArgumentName>>);
+
 impl<S: ContextProvider> SqlToRel<'_, S> {
     pub(super) fn sql_function_to_expr(
         &self,
@@ -233,12 +242,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         } = function_args;
 
         if over.is_some() && !within_group.is_empty() {
-            return plan_err!("OVER and WITHIN GROUP clause cannot be used together. \
-                OVER is for window functions, whereas WITHIN GROUP is for ordered set aggregate functions");
+            return plan_err!(
+                "OVER and WITHIN GROUP clause cannot be used together. \
+                OVER is for window functions, whereas WITHIN GROUP is for ordered set aggregate functions"
+            );
         }
 
         if !order_by.is_empty() && !within_group.is_empty() {
-            return plan_err!("ORDER BY and WITHIN GROUP clauses cannot be used together in the same aggregate function");
+            return plan_err!(
+                "ORDER BY and WITHIN GROUP clauses cannot be used together in the same aggregate function"
+            );
         }
 
         // If function is a window function (it has an OVER clause),
@@ -257,14 +270,49 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     return plan_err!(
                         "Expected an identifier in function name, but found {:?}",
                         object_name.0[0]
-                    )
+                    );
+                }
+            }
+        };
+
+        // handle make_map and map functions
+        // make_map always uses plan_make_map: make_map(k1, v1, k2, v2, ...)
+        // map has 2 syntaxes:
+        //     1. map([keys], [values]) - two arrays that get zipped
+        //     2. map(k1, v1, k2, v2, ...) - variadic pairs (uses plan_make_map)
+        let use_plan_make_map = match name.as_str() {
+            "make_map" => true,
+            "map" => {
+                // for map, check if this is the first syntax variant (two-array)
+                let args =
+                    self.function_args_to_expr(args.clone(), schema, planner_context)?;
+
+                let is_two_array_syntax = args.len() == 2
+                    && args.iter().all(|arg| {
+                        matches!(
+                            arg.get_type(schema),
+                            Ok(DataType::List(_))
+                                | Ok(DataType::LargeList(_))
+                                | Ok(DataType::FixedSizeList(_, _))
+                        )
+                    });
+
+                // map function with variadic syntax requires non-empty list of arguments
+                if !is_two_array_syntax && args.is_empty() {
+                    return plan_err!(
+                        "Function 'map' expected at least one argument but received 0"
+                    );
                 }
+
+                !is_two_array_syntax
             }
+            _ => false,
         };
 
-        if name.eq("make_map") {
+        if use_plan_make_map {
             let mut fn_args =
                 self.function_args_to_expr(args.clone(), schema, planner_context)?;
+
             for planner in self.context_provider.get_expr_planners().iter() {
                 match planner.plan_make_map(fn_args)? {
                     PlannerResult::Planned(expr) => return Ok(expr),
@@ -490,31 +538,30 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 let (mut args, mut arg_names) =
                     self.function_args_to_expr_with_names(args, schema, planner_context)?;
 
-                let order_by = if fm.supports_within_group_clause() {
-                    let within_group = self.order_by_to_sort_expr(
-                        within_group,
-                        schema,
-                        planner_context,
-                        false,
-                        None,
-                    )?;
-
-                    // Add the WITHIN GROUP ordering expressions to the front of the argument list
-                    // So function(arg) WITHIN GROUP (ORDER BY x) becomes function(x, arg)
-                    if !within_group.is_empty() {
-                        // Prepend None arg names for each WITHIN GROUP expression
-                        let within_group_count = within_group.len();
-                        arg_names = std::iter::repeat_n(None, within_group_count)
-                            .chain(arg_names)
-                            .collect();
-
-                        args = within_group
-                            .iter()
-                            .map(|sort| sort.expr.clone())
-                            .chain(args)
-                            .collect::<Vec<_>>();
-                    }
-                    within_group
+                // UDAFs must opt-in via `supports_within_group_clause()` to
+                // accept a WITHIN GROUP clause.
+                let supports_within_group = fm.supports_within_group_clause();
+
+                if !within_group.is_empty() && !supports_within_group {
+                    return plan_err!(
+                        "WITHIN GROUP is only supported for ordered-set aggregate functions"
+                    );
+                }
+
+                // If the UDAF supports WITHIN GROUP, convert the ordering into
+                // sort expressions and prepend them as unnamed function args.
+                let order_by = if supports_within_group {
+                    let (within_group_sorts, new_args, new_arg_names) = self
+                        .extract_and_prepend_within_group_args(
+                            within_group,
+                            args,
+                            arg_names,
+                            schema,
+                            planner_context,
+                        )?;
+                    args = new_args;
+                    arg_names = new_arg_names;
+                    within_group_sorts
                 } else {
                     let order_by = if !order_by.is_empty() {
                         order_by
@@ -700,7 +747,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         sql: FunctionArg,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
-    ) -> Result<(Expr, Option<String>)> {
+    ) -> Result<(Expr, Option<ArgumentName>)> {
         match sql {
             FunctionArg::Named {
                 name,
@@ -708,7 +755,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 operator: _,
             } => {
                 let expr = self.sql_expr_to_logical_expr(arg, schema, planner_context)?;
-                let arg_name = crate::utils::normalize_ident(name);
+                let arg_name = ArgumentName {
+                    value: name.value,
+                    is_quoted: name.quote_style.is_some(),
+                };
                 Ok((expr, Some(arg_name)))
             }
             FunctionArg::Named {
@@ -721,7 +771,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier: None,
                     options: Box::new(WildcardOptions::default()),
                 };
-                let arg_name = crate::utils::normalize_ident(name);
+                let arg_name = ArgumentName {
+                    value: name.value,
+                    is_quoted: name.quote_style.is_some(),
+                };
                 Ok((expr, Some(arg_name)))
             }
             FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
@@ -758,7 +811,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 operator: _,
             } => {
                 let expr = self.sql_expr_to_logical_expr(arg, schema, planner_context)?;
-                let arg_name = crate::utils::normalize_ident(name);
+                let arg_name = ArgumentName {
+                    value: name.value,
+                    is_quoted: name.quote_style.is_some(),
+                };
                 Ok((expr, Some(arg_name)))
             }
             FunctionArg::ExprNamed {
@@ -771,7 +827,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier: None,
                     options: Box::new(WildcardOptions::default()),
                 };
-                let arg_name = crate::utils::normalize_ident(name);
+                let arg_name = ArgumentName {
+                    value: name.value,
+                    is_quoted: name.quote_style.is_some(),
+                };
                 Ok((expr, Some(arg_name)))
             }
             _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"),
@@ -794,8 +853,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         args: Vec<FunctionArg>,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
-    ) -> Result<(Vec<Expr>, Vec<Option<String>>)> {
-        let results: Result<Vec<(Expr, Option<String>)>> = args
+    ) -> Result<(Vec<Expr>, Vec<Option<ArgumentName>>)> {
+        let results: Result<Vec<(Expr, Option<ArgumentName>)>> = args
             .into_iter()
             .map(|a| {
                 self.sql_fn_arg_to_logical_expr_with_name(a, schema, planner_context)
@@ -803,10 +862,43 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             .collect();
 
         let pairs = results?;
-        let (exprs, names): (Vec<Expr>, Vec<Option<String>>) = pairs.into_iter().unzip();
+        let (exprs, names): (Vec<Expr>, Vec<Option<ArgumentName>>) =
+            pairs.into_iter().unzip();
         Ok((exprs, names))
     }
 
+    fn extract_and_prepend_within_group_args(
+        &self,
+        within_group: Vec<OrderByExpr>,
+        mut args: Vec<Expr>,
+        mut arg_names: Vec<Option<ArgumentName>>,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<WithinGroupExtraction> {
+        let within_group = self.order_by_to_sort_expr(
+            within_group,
+            schema,
+            planner_context,
+            false,
+            None,
+        )?;
+
+        if !within_group.is_empty() {
+            let within_group_count = within_group.len();
+            arg_names = std::iter::repeat_n(None, within_group_count)
+                .chain(arg_names)
+                .collect();
+
+            args = within_group
+                .iter()
+                .map(|sort| sort.expr.clone())
+                .chain(args)
+                .collect::<Vec<_>>();
+        }
+
+        Ok((within_group, args, arg_names))
+    }
+
     pub(crate) fn check_unnest_arg(arg: &Expr, schema: &DFSchema) -> Result<()> {
         // Check argument type, array types are supported
         match arg.get_type(schema)? {
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 3c57d195ade67..4c23c7a818be4 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -15,10 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::Field;
+use arrow::datatypes::FieldRef;
+use datafusion_common::datatype::DataTypeExt;
 use datafusion_common::{
+    Column, DFSchema, Result, Span, TableReference, assert_or_internal_err,
     exec_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err,
-    Column, DFSchema, Result, Span, TableReference,
 };
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{Case, Expr};
@@ -39,13 +40,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         if id.value.starts_with('@') {
             // TODO: figure out if ScalarVariables should be insensitive.
             let var_names = vec![id.value];
-            let ty = self
+            let field = self
                 .context_provider
-                .get_variable_type(&var_names)
+                .get_variable_field(&var_names)
+                .or_else(|| {
+                    self.context_provider
+                        .get_variable_type(&var_names)
+                        .map(|ty| ty.into_nullable_field_ref())
+                })
                 .ok_or_else(|| {
                     plan_datafusion_err!("variable {var_names:?} has no type information")
                 })?;
-            Ok(Expr::ScalarVariable(ty, var_names))
+            Ok(Expr::ScalarVariable(field, var_names))
         } else {
             // Don't use `col()` here because it will try to
             // interpret names with '.' as if they were
@@ -61,33 +67,32 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier.filter(|q| q.table() != UNNAMED_TABLE).cloned(),
                     normalize_ident,
                 );
-                if self.options.collect_spans {
-                    if let Some(span) = Span::try_from_sqlparser_span(id_span) {
-                        column.spans_mut().add_span(span);
-                    }
+                if self.options.collect_spans
+                    && let Some(span) = Span::try_from_sqlparser_span(id_span)
+                {
+                    column.spans_mut().add_span(span);
                 }
                 return Ok(Expr::Column(column));
             }
 
             // Check the outer query schema
-            if let Some(outer) = planner_context.outer_query_schema() {
-                if let Ok((qualifier, field)) =
+            if let Some(outer) = planner_context.outer_query_schema()
+                && let Ok((qualifier, field)) =
                     outer.qualified_field_with_unqualified_name(normalize_ident.as_str())
-                {
-                    // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
-                    return Ok(Expr::OuterReferenceColumn(
-                        Arc::new(field.clone()),
-                        Column::from((qualifier, field)),
-                    ));
-                }
+            {
+                // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
+                return Ok(Expr::OuterReferenceColumn(
+                    Arc::clone(field),
+                    Column::from((qualifier, field)),
+                ));
             }
 
             // Default case
             let mut column = Column::new_unqualified(normalize_ident);
-            if self.options.collect_spans {
-                if let Some(span) = Span::try_from_sqlparser_span(id_span) {
-                    column.spans_mut().add_span(span);
-                }
+            if self.options.collect_spans
+                && let Some(span) = Span::try_from_sqlparser_span(id_span)
+            {
+                column.spans_mut().add_span(span);
             }
             Ok(Expr::Column(column))
         }
@@ -99,9 +104,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
-        if ids.len() < 2 {
-            return internal_err!("Not a compound identifier: {ids:?}");
-        }
+        assert_or_internal_err!(ids.len() >= 2, "Not a compound identifier: {ids:?}");
 
         let ids_span = Span::union_iter(
             ids.iter()
@@ -113,13 +116,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 .into_iter()
                 .map(|id| self.ident_normalizer.normalize(id))
                 .collect();
-            let ty = self
+            let field = self
                 .context_provider
-                .get_variable_type(&var_names)
+                .get_variable_field(&var_names)
+                .or_else(|| {
+                    self.context_provider
+                        .get_variable_type(&var_names)
+                        .map(|ty| ty.into_nullable_field_ref())
+                })
                 .ok_or_else(|| {
                     exec_datafusion_err!("variable {var_names:?} has no type information")
                 })?;
-            Ok(Expr::ScalarVariable(ty, var_names))
+            Ok(Expr::ScalarVariable(field, var_names))
         } else {
             let ids = ids
                 .into_iter()
@@ -150,10 +158,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 // Found matching field with no spare identifier(s)
                 Some((field, qualifier, _nested_names)) => {
                     let mut column = Column::from((qualifier, field));
-                    if self.options.collect_spans {
-                        if let Some(span) = ids_span {
-                            column.spans_mut().add_span(span);
-                        }
+                    if self.options.collect_spans
+                        && let Some(span) = ids_span
+                    {
+                        column.spans_mut().add_span(span);
                     }
                     Ok(Expr::Column(column))
                 }
@@ -174,14 +182,15 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                     // TODO: remove when can support nested identifiers for OuterReferenceColumn
                                     not_impl_err!(
                                         "Nested identifiers are not yet supported for OuterReferenceColumn {}",
-                                        Column::from((qualifier, field)).quoted_flat_name()
+                                        Column::from((qualifier, field))
+                                            .quoted_flat_name()
                                     )
                                 }
                                 // Found matching field with no spare identifier(s)
                                 Some((field, qualifier, _nested_names)) => {
                                     // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
                                     Ok(Expr::OuterReferenceColumn(
-                                        Arc::new(field.clone()),
+                                        Arc::clone(field),
                                         Column::from((qualifier, field)),
                                     ))
                                 }
@@ -199,10 +208,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                             // Safe unwrap as s can never be empty or exceed the bounds
                             let (relation, column_name) = form_identifier(s).unwrap();
                             let mut column = Column::new(relation, column_name);
-                            if self.options.collect_spans {
-                                if let Some(span) = ids_span {
-                                    column.spans_mut().add_span(span);
-                                }
+                            if self.options.collect_spans
+                                && let Some(span) = ids_span
+                            {
+                                column.spans_mut().add_span(span);
                             }
                             Ok(Expr::Column(column))
                         }
@@ -293,7 +302,7 @@ fn search_dfschema<'ids, 'schema>(
     ids: &'ids [String],
     schema: &'schema DFSchema,
 ) -> Option<(
-    &'schema Field,
+    &'schema FieldRef,
     Option<&'schema TableReference>,
     &'ids [String],
 )> {
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 715a02db8b027..fcd7d6376d21c 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -27,15 +27,15 @@ use sqlparser::ast::{
 };
 
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
-    ScalarValue,
+    DFSchema, Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err,
+    plan_err,
 };
 
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::expr::{InList, WildcardOptions};
 use datafusion_expr::{
-    lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
-    Operator, TryCast,
+    Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
+    Operator, TryCast, lit,
 };
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
@@ -140,7 +140,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let RawBinaryExpr { op, left, right } = binary_expr;
         Ok(Expr::BinaryExpr(BinaryExpr::new(
             Box::new(left),
-            self.parse_sql_binary_op(op)?,
+            self.parse_sql_binary_op(&op)?,
             Box::new(right),
         )))
     }
@@ -270,7 +270,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 expr,
                 data_type,
                 format,
-            } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
+            } => {
+                self.sql_cast_to_expr(*expr, &data_type, format, schema, planner_context)
+            }
 
             SQLExpr::Cast {
                 kind: CastKind::TryCast | CastKind::SafeCast,
@@ -553,7 +555,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
 
             SQLExpr::Struct { values, fields } => {
-                self.parse_struct(schema, planner_context, values, fields)
+                self.parse_struct(schema, planner_context, values, &fields)
             }
             SQLExpr::Position { expr, r#in } => {
                 self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
@@ -575,7 +577,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     _ => {
                         return not_impl_err!(
                             "Unsupported ast node in sqltorel: {time_zone:?}"
-                        )
+                        );
                     }
                 },
             ))),
@@ -639,7 +641,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
         values: Vec<SQLExpr>,
-        fields: Vec<StructField>,
+        fields: &[StructField],
     ) -> Result<Expr> {
         if !fields.is_empty() {
             return not_impl_err!("Struct fields are not supported yet");
@@ -673,7 +675,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Some(SQLExpr::Identifier(_))
             | Some(SQLExpr::Value(_))
             | Some(SQLExpr::CompoundIdentifier(_)) => {
-                self.parse_struct(schema, planner_context, values, vec![])
+                self.parse_struct(schema, planner_context, values, &[])
             }
             None => not_impl_err!("Empty tuple not supported yet"),
             _ => {
@@ -833,7 +835,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         )))
     }
 
-    #[allow(clippy::too_many_arguments)]
+    #[expect(clippy::too_many_arguments)]
     fn sql_like_to_expr(
         &self,
         negated: bool,
@@ -853,7 +855,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
                 Some(char.chars().next().unwrap())
             }
-            Some(value) => return plan_err!("Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"),
+            Some(value) => {
+                return plan_err!(
+                    "Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"
+                );
+            }
             None => None,
         };
         Ok(Expr::Like(Like::new(
@@ -883,7 +889,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Some(Value::SingleQuotedString(char)) if char.len() == 1 => {
                 Some(char.chars().next().unwrap())
             }
-            Some(value) => return plan_err!("Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"),
+            Some(value) => {
+                return plan_err!(
+                    "Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"
+                );
+            }
             None => None,
         };
         Ok(Expr::SimilarTo(Like::new(
@@ -979,7 +989,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn sql_cast_to_expr(
         &self,
         expr: SQLExpr,
-        data_type: SQLDataType,
+        data_type: &SQLDataType,
         format: Option<CastFormat>,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
@@ -988,7 +998,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return not_impl_err!("CAST with format is not supported: {format}");
         }
 
-        let dt = self.convert_data_type_to_field(&data_type)?;
+        let dt = self.convert_data_type_to_field(data_type)?;
         let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
 
         // numeric constants are treated as seconds (rather as nanoseconds)
@@ -1204,8 +1214,8 @@ mod tests {
     use sqlparser::dialect::GenericDialect;
     use sqlparser::parser::Parser;
 
-    use datafusion_common::config::ConfigOptions;
     use datafusion_common::TableReference;
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::logical_plan::builder::LogicalTableSource;
     use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
 
diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs
index 79ebc5943ffbe..faecfbcfecc05 100644
--- a/datafusion/sql/src/expr/order_by.rs
+++ b/datafusion/sql/src/expr/order_by.rs
@@ -17,7 +17,7 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::{
-    not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, Result,
+    Column, DFSchema, Result, not_impl_err, plan_datafusion_err, plan_err,
 };
 use datafusion_expr::expr::Sort;
 use datafusion_expr::{Expr, SortExpr};
diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs
index 24bb813634cc1..ec34ff3d53426 100644
--- a/datafusion/sql/src/expr/subquery.rs
+++ b/datafusion/sql/src/expr/subquery.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans};
+use datafusion_common::{DFSchema, Diagnostic, Result, Span, Spans, plan_err};
 use datafusion_expr::expr::{Exists, InSubquery};
 use datafusion_expr::{Expr, LogicalPlan, Subquery};
 use sqlparser::ast::Expr as SQLExpr;
@@ -60,10 +60,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let mut spans = Spans::new();
         if let SetExpr::Select(select) = &subquery.body.as_ref() {
             for item in &select.projection {
-                if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item {
-                    if let Some(span) = Span::try_from_sqlparser_span(ident.span) {
-                        spans.add_span(span);
-                    }
+                if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item
+                    && let Some(span) = Span::try_from_sqlparser_span(ident.span)
+                {
+                    spans.add_span(span);
                 }
             }
         }
@@ -74,7 +74,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         self.validate_single_column(
             &sub_plan,
-            spans.clone(),
+            &spans,
             "Too many columns! The subquery should only return one column",
             "Select only one column in the subquery",
         )?;
@@ -103,10 +103,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let mut spans = Spans::new();
         if let SetExpr::Select(select) = subquery.body.as_ref() {
             for item in &select.projection {
-                if let SelectItem::ExprWithAlias { alias, .. } = item {
-                    if let Some(span) = Span::try_from_sqlparser_span(alias.span) {
-                        spans.add_span(span);
-                    }
+                if let SelectItem::ExprWithAlias { alias, .. } = item
+                    && let Some(span) = Span::try_from_sqlparser_span(alias.span)
+                {
+                    spans.add_span(span);
                 }
             }
         }
@@ -116,7 +116,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         self.validate_single_column(
             &sub_plan,
-            spans.clone(),
+            &spans,
             "Too many columns! The subquery should only return one column",
             "Select only one column in the subquery",
         )?;
@@ -131,7 +131,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn validate_single_column(
         &self,
         sub_plan: &LogicalPlan,
-        spans: Spans,
+        spans: &Spans,
         error_message: &str,
         help_message: &str,
     ) -> Result<()> {
@@ -148,7 +148,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     fn build_multi_column_diagnostic(
         &self,
-        spans: Spans,
+        spans: &Spans,
         error_message: &str,
         help_message: &str,
     ) -> Diagnostic {
diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs
index 0ff361be0e206..d3b56097c1f58 100644
--- a/datafusion/sql/src/expr/substring.rs
+++ b/datafusion/sql/src/expr/substring.rs
@@ -16,9 +16,9 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, plan_err};
 use datafusion_common::{DFSchema, Result, ScalarValue};
-use datafusion_expr::{planner::PlannerResult, Expr};
+use datafusion_common::{not_impl_err, plan_err};
+use datafusion_expr::{Expr, planner::PlannerResult};
 
 use sqlparser::ast::Expr as SQLExpr;
 
@@ -79,7 +79,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
         }
 
-        not_impl_err!("Substring could not be planned by registered expr planner. \
-                        Hint: Please try with `unicode_expressions` DataFusion feature enabled")
+        not_impl_err!(
+            "Substring could not be planned by registered expr planner. \
+                        Hint: Please try with `unicode_expressions` DataFusion feature enabled"
+        )
     }
 }
diff --git a/datafusion/sql/src/expr/unary_op.rs b/datafusion/sql/src/expr/unary_op.rs
index e0c94543f6013..cd118c0fdd5c5 100644
--- a/datafusion/sql/src/expr/unary_op.rs
+++ b/datafusion/sql/src/expr/unary_op.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, plan_err, DFSchema, Diagnostic, Result};
+use datafusion_common::{DFSchema, Diagnostic, Result, not_impl_err, plan_err};
 use datafusion_expr::{
-    type_coercion::{is_interval, is_timestamp},
     Expr, ExprSchemable,
+    type_coercion::{is_interval, is_timestamp},
 };
 use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value, ValueWithSpan};
 
@@ -38,10 +38,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             UnaryOperator::Plus => {
                 let operand =
                     self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
-                let (data_type, _) = operand.data_type_and_nullable(schema)?;
+                let field = operand.to_field(schema)?.1;
+                let data_type = field.data_type();
                 if data_type.is_numeric()
-                    || is_interval(&data_type)
-                    || is_timestamp(&data_type)
+                    || is_interval(data_type)
+                    || is_timestamp(data_type)
                 {
                     Ok(operand)
                 } else {
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 3abb2752988f7..bd75ac36306fb 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -17,20 +17,20 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow::compute::kernels::cast_utils::{
-    parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
+    IntervalParseConfig, IntervalUnit, parse_interval_month_day_nano_config,
 };
 use arrow::datatypes::{
-    i256, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, FieldRef, i256,
 };
 use bigdecimal::num_bigint::BigInt;
 use bigdecimal::{BigDecimal, Signed, ToPrimitive};
 use datafusion_common::{
-    internal_datafusion_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result,
-    ScalarValue,
+    DFSchema, DataFusionError, Result, ScalarValue, internal_datafusion_err,
+    not_impl_err, plan_err,
 };
 use datafusion_expr::expr::{BinaryExpr, Placeholder};
 use datafusion_expr::planner::PlannerResult;
-use datafusion_expr::{lit, Expr, Operator};
+use datafusion_expr::{Expr, Operator, lit};
 use log::debug;
 use sqlparser::ast::{
     BinaryOperator, Expr as SQLExpr, Interval, UnaryOperator, Value, ValueWithSpan,
@@ -86,10 +86,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return Ok(lit(n));
         }
 
-        if !negative {
-            if let Ok(n) = unsigned_number.parse::<u64>() {
-                return Ok(lit(n));
-            }
+        if !negative && let Ok(n) = unsigned_number.parse::<u64>() {
+            return Ok(lit(n));
         }
 
         if self.options.parse_float_as_decimal {
@@ -104,13 +102,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     }
 
     /// Create a placeholder expression
-    /// This is the same as Postgres's prepare statement syntax in which a placeholder starts with `$` sign and then
-    /// number 1, 2, ... etc. For example, `$1` is the first placeholder; $2 is the second one and so on.
+    /// Both named (`$foo`) and positional (`$1`, `$2`, ...) placeholder styles are supported.
     fn create_placeholder_expr(
         param: String,
         param_data_types: &[FieldRef],
     ) -> Result<Expr> {
-        // Parse the placeholder as a number because it is the only support from sqlparser and postgres
+        // Try to parse the placeholder as a number. If the placeholder does not have a valid
+        // positional value, assume we have a named placeholder.
         let index = param[1..].parse::<usize>();
         let idx = match index {
             Ok(0) => {
@@ -123,12 +121,24 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 return if param_data_types.is_empty() {
                     Ok(Expr::Placeholder(Placeholder::new_with_field(param, None)))
                 } else {
-                    // when PREPARE Statement, param_data_types length is always 0
-                    plan_err!("Invalid placeholder, not a number: {param}")
+                    // FIXME: This branch is shared by params from PREPARE and CREATE FUNCTION, but
+                    // only CREATE FUNCTION currently supports named params. For now, we rewrite
+                    // these to positional params.
+                    let named_param_pos = param_data_types
+                        .iter()
+                        .position(|v| v.name() == &param[1..]);
+                    match named_param_pos {
+                        Some(pos) => Ok(Expr::Placeholder(Placeholder::new_with_field(
+                            format!("${}", pos + 1),
+                            param_data_types.get(pos).cloned(),
+                        ))),
+                        None => plan_err!("Unknown placeholder: {param}"),
+                    }
                 };
             }
         };
         // Check if the placeholder is in the parameter list
+        // FIXME: In the CREATE FUNCTION branch, param_type = None should raise an error
         let param_type = param_data_types.get(idx);
         // Data type of the parameter
         debug!("type of param {param} param_data_types[idx]: {param_type:?}");
@@ -169,86 +179,91 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
         }
 
-        not_impl_err!("Could not plan array literal. Hint: Please try with `nested_expressions` DataFusion feature enabled")
+        not_impl_err!(
+            "Could not plan array literal. Hint: Please try with `nested_expressions` DataFusion feature enabled"
+        )
     }
 
     /// Convert a SQL interval expression to a DataFusion logical plan
     /// expression
-    #[allow(clippy::only_used_in_recursion)]
     pub(super) fn sql_interval_to_expr(
         &self,
         negative: bool,
         interval: Interval,
     ) -> Result<Expr> {
-        if interval.leading_precision.is_some() {
-            return not_impl_err!(
-                "Unsupported Interval Expression with leading_precision {:?}",
-                interval.leading_precision
-            );
-        }
-
-        if interval.last_field.is_some() {
-            return not_impl_err!(
-                "Unsupported Interval Expression with last_field {:?}",
-                interval.last_field
-            );
-        }
+        sql_interval_to_expr_impl(negative, interval)
+    }
+}
 
-        if interval.fractional_seconds_precision.is_some() {
-            return not_impl_err!(
-                "Unsupported Interval Expression with fractional_seconds_precision {:?}",
-                interval.fractional_seconds_precision
-            );
-        }
+fn sql_interval_to_expr_impl(negative: bool, interval: Interval) -> Result<Expr> {
+    if interval.leading_precision.is_some() {
+        return not_impl_err!(
+            "Unsupported Interval Expression with leading_precision {:?}",
+            interval.leading_precision
+        );
+    }
 
-        if let SQLExpr::BinaryOp { left, op, right } = *interval.value {
-            let df_op = match op {
-                BinaryOperator::Plus => Operator::Plus,
-                BinaryOperator::Minus => Operator::Minus,
-                _ => {
-                    return not_impl_err!("Unsupported interval operator: {op:?}");
-                }
-            };
-            let left_expr = self.sql_interval_to_expr(
-                negative,
-                Interval {
-                    value: left,
-                    leading_field: interval.leading_field.clone(),
-                    leading_precision: None,
-                    last_field: None,
-                    fractional_seconds_precision: None,
-                },
-            )?;
-            let right_expr = self.sql_interval_to_expr(
-                false,
-                Interval {
-                    value: right,
-                    leading_field: interval.leading_field,
-                    leading_precision: None,
-                    last_field: None,
-                    fractional_seconds_precision: None,
-                },
-            )?;
-            return Ok(Expr::BinaryExpr(BinaryExpr::new(
-                Box::new(left_expr),
-                df_op,
-                Box::new(right_expr),
-            )));
-        }
+    if interval.last_field.is_some() {
+        return not_impl_err!(
+            "Unsupported Interval Expression with last_field {:?}",
+            interval.last_field
+        );
+    }
 
-        let value = interval_literal(*interval.value, negative)?;
+    if interval.fractional_seconds_precision.is_some() {
+        return not_impl_err!(
+            "Unsupported Interval Expression with fractional_seconds_precision {:?}",
+            interval.fractional_seconds_precision
+        );
+    }
 
-        // leading_field really means the unit if specified
-        // For example, "month" in  `INTERVAL '5' month`
-        let value = match interval.leading_field.as_ref() {
-            Some(leading_field) => format!("{value} {leading_field}"),
-            None => value,
+    if let SQLExpr::BinaryOp { left, op, right } = *interval.value {
+        let df_op = match op {
+            BinaryOperator::Plus => Operator::Plus,
+            BinaryOperator::Minus => Operator::Minus,
+            _ => {
+                return not_impl_err!("Unsupported interval operator: {op:?}");
+            }
         };
-
-        let config = IntervalParseConfig::new(IntervalUnit::Second);
-        let val = parse_interval_month_day_nano_config(&value, config)?;
-        Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
+        let left_expr = sql_interval_to_expr_impl(
+            negative,
+            Interval {
+                value: left,
+                leading_field: interval.leading_field.clone(),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            },
+        )?;
+        let right_expr = sql_interval_to_expr_impl(
+            false,
+            Interval {
+                value: right,
+                leading_field: interval.leading_field,
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            },
+        )?;
+        return Ok(Expr::BinaryExpr(BinaryExpr::new(
+            Box::new(left_expr),
+            df_op,
+            Box::new(right_expr),
+        )));
     }
+
+    let value = interval_literal(*interval.value, negative)?;
+
+    // leading_field really means the unit if specified
+    // For example, "month" in  `INTERVAL '5' month`
+    let value = match interval.leading_field.as_ref() {
+        Some(leading_field) => format!("{value} {leading_field}"),
+        None => value,
+    };
+
+    let config = IntervalParseConfig::new(IntervalUnit::Second);
+    let val = parse_interval_month_day_nano_config(&value, config)?;
+    Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
 }
 
 fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
@@ -282,14 +297,12 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
             interval_literal(*expr, negative)?
         }
         _ => {
-            return not_impl_err!("Unsupported interval argument. Expected string literal or number, got: {interval_value:?}");
+            return not_impl_err!(
+                "Unsupported interval argument. Expected string literal or number, got: {interval_value:?}"
+            );
         }
     };
-    if negative {
-        Ok(format!("-{s}"))
-    } else {
-        Ok(s)
-    }
+    if negative { Ok(format!("-{s}")) } else { Ok(s) }
 }
 
 /// Try to decode bytes from hex literal string.
@@ -492,9 +505,7 @@ mod tests {
 
         // scale < i8::MIN
         assert_eq!(
-            parse_decimal("1e129", false)
-                .unwrap_err()
-                .strip_backtrace(),
+            parse_decimal("1e129", false).unwrap_err().strip_backtrace(),
             "This feature is not implemented: Decimal scale -129 exceeds the minimum supported scale: -128"
         );
 
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index da15b90d22a84..b21eb52920ab5 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! This crate provides:
 //!
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 99d7467e1b7ca..27db2b0f97579 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -20,17 +20,17 @@
 //! This parser implements DataFusion specific statements such as
 //! `CREATE EXTERNAL TABLE`
 
-use datafusion_common::config::SqlParserOptions;
 use datafusion_common::DataFusionError;
-use datafusion_common::{sql_err, Diagnostic, Span};
-use sqlparser::ast::{ExprWithAlias, OrderByOptions};
+use datafusion_common::config::SqlParserOptions;
+use datafusion_common::{Diagnostic, Span, sql_err};
+use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions};
 use sqlparser::tokenizer::TokenWithSpan;
 use sqlparser::{
     ast::{
         ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
         Statement as SQLStatement, TableConstraint, Value,
     },
-    dialect::{keywords::Keyword, Dialect, GenericDialect},
+    dialect::{Dialect, GenericDialect, keywords::Keyword},
     parser::{Parser, ParserError},
     tokenizer::{Token, Tokenizer, Word},
 };
@@ -259,6 +259,21 @@ impl fmt::Display for CreateExternalTable {
     }
 }
 
+/// DataFusion extension for `RESET`
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ResetStatement {
+    /// Reset a single configuration variable (stored as provided)
+    Variable(ObjectName),
+}
+
+impl fmt::Display for ResetStatement {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ResetStatement::Variable(name) => write!(f, "RESET {name}"),
+        }
+    }
+}
+
 /// DataFusion SQL Statement.
 ///
 /// This can either be a [`Statement`] from [`sqlparser`] from a
@@ -276,6 +291,8 @@ pub enum Statement {
     CopyTo(CopyToStatement),
     /// EXPLAIN for extensions
     Explain(ExplainStatement),
+    /// Extension: `RESET`
+    Reset(ResetStatement),
 }
 
 impl fmt::Display for Statement {
@@ -285,6 +302,7 @@ impl fmt::Display for Statement {
             Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
             Statement::CopyTo(stmt) => write!(f, "{stmt}"),
             Statement::Explain(stmt) => write!(f, "{stmt}"),
+            Statement::Reset(stmt) => write!(f, "{stmt}"),
         }
     }
 }
@@ -456,7 +474,7 @@ impl<'a> DFParser<'a> {
                 break;
             }
             if expecting_statement_delimiter {
-                return self.expected("end of statement", self.parser.peek_token());
+                return self.expected("end of statement", &self.parser.peek_token());
             }
 
             let statement = self.parse_statement()?;
@@ -470,7 +488,7 @@ impl<'a> DFParser<'a> {
     fn expected<T>(
         &self,
         expected: &str,
-        found: TokenWithSpan,
+        found: &TokenWithSpan,
     ) -> Result<T, DataFusionError> {
         let sql_parser_span = found.span;
         let span = Span::try_from_sqlparser_span(sql_parser_span);
@@ -488,11 +506,11 @@ impl<'a> DFParser<'a> {
     fn expect_token(
         &mut self,
         expected: &str,
-        token: Token,
+        token: &Token,
     ) -> Result<(), DataFusionError> {
         let next_token = self.parser.peek_token_ref();
-        if next_token.token != token {
-            self.expected(expected, next_token.clone())
+        if next_token.token != *token {
+            self.expected(expected, next_token)
         } else {
             Ok(())
         }
@@ -521,6 +539,10 @@ impl<'a> DFParser<'a> {
                         self.parser.next_token(); // EXPLAIN
                         self.parse_explain()
                     }
+                    Keyword::RESET => {
+                        self.parser.next_token(); // RESET
+                        self.parse_reset()
+                    }
                     _ => {
                         // use sqlparser-rs parser
                         self.parse_and_handle_statement()
@@ -553,7 +575,7 @@ impl<'a> DFParser<'a> {
     /// contains any trailing, unparsed tokens.
     pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
         let expr = self.parse_expr()?;
-        self.expect_token("end of expression", Token::EOF)?;
+        self.expect_token("end of expression", &Token::EOF)?;
         Ok(expr)
     }
 
@@ -618,7 +640,9 @@ impl<'a> DFParser<'a> {
                     Keyword::WITH => {
                         self.parser.expect_keyword(Keyword::HEADER)?;
                         self.parser.expect_keyword(Keyword::ROW)?;
-                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
+                        return parser_err!(
+                            "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"
+                        )?;
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -638,7 +662,7 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return self.expected("end of statement or ;", token)?;
+                    return self.expected("end of statement or ;", &token)?;
                 }
             }
         }
@@ -675,7 +699,7 @@ impl<'a> DFParser<'a> {
                         // Unquoted namespaced keys have to conform to the syntax
                         // "<WORD>[\.<WORD>]*". If we have a key that breaks this
                         // pattern, error out:
-                        return self.expected("key name", next_token);
+                        return self.expected("key name", &next_token);
                     }
                 }
                 Ok(parts.join("."))
@@ -683,7 +707,7 @@ impl<'a> DFParser<'a> {
             Token::SingleQuotedString(s) => Ok(s),
             Token::DoubleQuotedString(s) => Ok(s),
             Token::EscapedStringLiteral(s) => Ok(s),
-            _ => self.expected("key name", next_token),
+            _ => self.expected("key name", &next_token),
         }
     }
 
@@ -702,7 +726,7 @@ impl<'a> DFParser<'a> {
             Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
             Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
             Token::Number(n, l) => Ok(Value::Number(n, l)),
-            _ => self.expected("string or numeric value", next_token),
+            _ => self.expected("string or numeric value", &next_token),
         }
     }
 
@@ -722,6 +746,47 @@ impl<'a> DFParser<'a> {
         }))
     }
 
+    /// Parse a SQL `RESET`
+    pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> {
+        let mut parts: Vec<String> = Vec::new();
+        let mut expecting_segment = true;
+
+        loop {
+            let next_token = self.parser.peek_token();
+            match &next_token.token {
+                Token::Word(word) => {
+                    self.parser.next_token();
+                    parts.push(word.value.clone());
+                    expecting_segment = false;
+                }
+                Token::SingleQuotedString(s)
+                | Token::DoubleQuotedString(s)
+                | Token::EscapedStringLiteral(s) => {
+                    self.parser.next_token();
+                    parts.push(s.clone());
+                    expecting_segment = false;
+                }
+                Token::Period => {
+                    self.parser.next_token();
+                    if expecting_segment || parts.is_empty() {
+                        return self.expected("configuration parameter", &next_token);
+                    }
+                    expecting_segment = true;
+                }
+                Token::EOF | Token::SemiColon => break,
+                _ => return self.expected("configuration parameter", &next_token),
+            }
+        }
+
+        if parts.is_empty() || expecting_segment {
+            return self.expected("configuration parameter", &self.parser.peek_token());
+        }
+
+        let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect();
+        let variable = ObjectName::from(idents);
+        Ok(Statement::Reset(ResetStatement::Variable(variable)))
+    }
+
     pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
         if !self.parser.parse_keyword(Keyword::FORMAT) {
             return Ok(None);
@@ -732,7 +797,7 @@ impl<'a> DFParser<'a> {
             Token::Word(w) => Ok(w.value),
             Token::SingleQuotedString(w) => Ok(w),
             Token::DoubleQuotedString(w) => Ok(w),
-            _ => self.expected("an explain format such as TREE", next_token),
+            _ => self.expected("an explain format such as TREE", &next_token),
         }?;
         Ok(Some(format))
     }
@@ -777,7 +842,7 @@ impl<'a> DFParser<'a> {
                 let identifier = self.parser.parse_identifier()?;
                 partitions.push(identifier.to_string());
             } else {
-                return self.expected("partition name", self.parser.peek_token());
+                return self.expected("partition name", &self.parser.peek_token());
             }
             let comma = self.parser.consume_token(&Token::Comma);
             if self.parser.consume_token(&Token::RParen) {
@@ -786,7 +851,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after partition definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
@@ -857,7 +922,7 @@ impl<'a> DFParser<'a> {
             } else {
                 return self.expected(
                     "column name or constraint definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
             let comma = self.parser.consume_token(&Token::Comma);
@@ -867,7 +932,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after column definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
@@ -887,7 +952,7 @@ impl<'a> DFParser<'a> {
                 } else {
                     return self.expected(
                         "constraint details after CONSTRAINT <name>",
-                        self.parser.peek_token(),
+                        &self.parser.peek_token(),
                     );
                 }
             } else if let Some(option) = self.parser.parse_optional_column_option()? {
@@ -961,15 +1026,21 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
+                            return parser_err!(
+                                "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"
+                            )?;
                         }
                     }
                     Keyword::DELIMITER => {
-                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
+                        return parser_err!(
+                            "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"
+                        )?;
                     }
                     Keyword::COMPRESSION => {
                         self.parser.expect_keyword(Keyword::TYPE)?;
-                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
+                        return parser_err!(
+                            "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"
+                        )?;
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -1012,7 +1083,7 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return self.expected("end of statement or ;", token)?;
+                    return self.expected("end of statement or ;", &token)?;
                 }
             }
         }
@@ -1051,7 +1122,7 @@ impl<'a> DFParser<'a> {
         let token = self.parser.next_token();
         match &token.token {
             Token::Word(w) => parse_file_type(&w.value),
-            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
+            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
         }
     }
 
@@ -1074,7 +1145,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after option definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
@@ -1322,8 +1393,7 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case: it is ok for avro files not to have columns specified
-        let sql =
-            "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
+        let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: name.clone(),
             columns: vec![],
@@ -1360,8 +1430,7 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case: column definition allowed in 'partition by' clause
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: name.clone(),
             columns: vec![
@@ -1382,17 +1451,18 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // negative case: mixed column defs and column names in `PARTITIONED BY` clause
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
         expect_parse_error(
             sql,
             "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
         );
 
         // negative case: mixed column defs and column names in `PARTITIONED BY` clause
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
-        expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
+        expect_parse_error(
+            sql,
+            "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")",
+        );
 
         // positive case: additional options (one entry) can be specified
         let sql =
@@ -1414,8 +1484,7 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case: additional options (multiple entries) can be specified
-        let sql =
-            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
+        let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: name.clone(),
             columns: vec![],
@@ -1436,15 +1505,17 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // Ordered Col
-        let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
-                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
+        let sqls = [
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'",
+        ];
         let expected = vec![
             (None, None),
             (None, Some(true)),
@@ -1855,8 +1926,7 @@ mod tests {
     #[test]
     fn copy_to_multi_options() -> Result<(), DataFusionError> {
         // order of options is preserved
-        let sql =
-            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
+        let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
 
         let expected_options = vec![
             (
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index eb1e711eb4fd8..eb798b71e4558 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -23,19 +23,19 @@ use std::vec;
 
 use crate::utils::make_decimal_type;
 use arrow::datatypes::*;
+use datafusion_common::TableReference;
 use datafusion_common::config::SqlParserOptions;
 use datafusion_common::datatype::{DataTypeExt, FieldExt};
 use datafusion_common::error::add_possible_columns_to_diag;
-use datafusion_common::TableReference;
+use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
 use datafusion_common::{
-    field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, Diagnostic,
-    SchemaError,
+    DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err,
+    plan_datafusion_err,
 };
-use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result};
 use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
 pub use datafusion_expr::planner::ContextProvider;
 use datafusion_expr::utils::find_column_exprs;
-use datafusion_expr::{col, Expr};
+use datafusion_expr::{Expr, col};
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
@@ -202,7 +202,9 @@ impl FromStr for NullOrdering {
             "nulls_min" => Ok(Self::NullsMin),
             "nulls_first" => Ok(Self::NullsFirst),
             "nulls_last" => Ok(Self::NullsLast),
-            _ => plan_err!("Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"),
+            _ => plan_err!(
+                "Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"
+            ),
         }
     }
 }
@@ -593,10 +595,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         sql_type: &SQLDataType,
     ) -> Result<FieldRef> {
         // First check if any of the registered type_planner can handle this type
-        if let Some(type_planner) = self.context_provider.get_type_planner() {
-            if let Some(data_type) = type_planner.plan_type(sql_type)? {
-                return Ok(data_type.into_nullable_field_ref());
-            }
+        if let Some(type_planner) = self.context_provider.get_type_planner()
+            && let Some(data_type) = type_planner.plan_type(sql_type)?
+        {
+            return Ok(data_type.into_nullable_field_ref());
         }
 
         // If no type_planner can handle this type, use the default conversion
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index d316550f4dd21..eba48a2401c38 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use crate::stack::StackGuard;
-use datafusion_common::{not_impl_err, Constraints, DFSchema, Result};
+use datafusion_common::{Constraints, DFSchema, Result, not_impl_err};
 use datafusion_expr::expr::{Sort, WildcardOptions};
 
 use datafusion_expr::select_expr::SelectExpr;
@@ -46,17 +46,34 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let mut query_plan_context = outer_planner_context.clone();
         let planner_context = &mut query_plan_context;
 
-        if let Some(with) = query.with {
+        let Query {
+            with,
+            body,
+            order_by,
+            limit_clause,
+            fetch,
+            locks: _,
+            for_clause: _,
+            settings: _,
+            format_clause: _,
+            pipe_operators,
+        } = query;
+
+        if fetch.is_some() {
+            return not_impl_err!("FETCH clause is not supported yet");
+        }
+
+        if let Some(with) = with {
             self.plan_with_clause(with, planner_context)?;
         }
 
-        let set_expr = *query.body;
+        let set_expr = *body;
         let plan = match set_expr {
             SetExpr::Select(mut select) => {
                 let select_into = select.into.take();
                 let plan =
-                    self.select_to_plan(*select, query.order_by, planner_context)?;
-                let plan = self.limit(plan, query.limit_clause, planner_context)?;
+                    self.select_to_plan(*select, order_by.clone(), planner_context)?;
+                let plan = self.limit(plan, limit_clause.clone(), planner_context)?;
                 // Process the `SELECT INTO` after `LIMIT`.
                 self.select_into(plan, select_into)
             }
@@ -69,7 +86,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     let _guard = StackGuard::new(256 * 1024);
                     self.set_expr_to_plan(other, planner_context)
                 }?;
-                let oby_exprs = to_order_by_exprs(query.order_by)?;
+                let oby_exprs = to_order_by_exprs(order_by)?;
                 let order_by_rex = self.order_by_to_sort_expr(
                     oby_exprs,
                     plan.schema(),
@@ -78,11 +95,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     None,
                 )?;
                 let plan = self.order_by(plan, order_by_rex)?;
-                self.limit(plan, query.limit_clause, planner_context)
+                self.limit(plan, limit_clause, planner_context)
             }
         }?;
 
-        self.pipe_operators(plan, query.pipe_operators, planner_context)
+        self.pipe_operators(plan, pipe_operators, planner_context)
     }
 
     /// Apply pipe operators to a plan
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index 754ded1514a63..8e1a8817309f0 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, plan_datafusion_err, Column, Result};
+use datafusion_common::{Column, Result, not_impl_err, plan_datafusion_err};
 use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{
     Join, JoinConstraint, JoinOperator, ObjectName, TableFactor, TableWithJoins,
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 9dfa078701d3d..3115d8dfffbd2 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -21,22 +21,125 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{
-    not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, Spans, TableReference,
+    DFSchema, Diagnostic, Result, Span, Spans, TableReference, not_impl_err, plan_err,
 };
 use datafusion_expr::builder::subquery_alias;
-use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion_expr::planner::{
+    PlannedRelation, RelationPlannerContext, RelationPlanning,
+};
+use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder, expr::Unnest};
 use datafusion_expr::{Subquery, SubqueryAlias};
 use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor};
 
 mod join;
 
+struct SqlToRelRelationContext<'a, 'b, S: ContextProvider> {
+    planner: &'a SqlToRel<'b, S>,
+    planner_context: &'a mut PlannerContext,
+}
+
+// Implement RelationPlannerContext
+impl<'a, 'b, S: ContextProvider> RelationPlannerContext
+    for SqlToRelRelationContext<'a, 'b, S>
+{
+    fn context_provider(&self) -> &dyn ContextProvider {
+        self.planner.context_provider
+    }
+
+    fn plan(&mut self, relation: TableFactor) -> Result<LogicalPlan> {
+        self.planner.create_relation(relation, self.planner_context)
+    }
+
+    fn sql_to_expr(
+        &mut self,
+        expr: sqlparser::ast::Expr,
+        schema: &DFSchema,
+    ) -> Result<Expr> {
+        self.planner.sql_to_expr(expr, schema, self.planner_context)
+    }
+
+    fn sql_expr_to_logical_expr(
+        &mut self,
+        expr: sqlparser::ast::Expr,
+        schema: &DFSchema,
+    ) -> Result<Expr> {
+        self.planner
+            .sql_expr_to_logical_expr(expr, schema, self.planner_context)
+    }
+
+    fn normalize_ident(&self, ident: sqlparser::ast::Ident) -> String {
+        self.planner.ident_normalizer.normalize(ident)
+    }
+
+    fn object_name_to_table_reference(
+        &self,
+        name: sqlparser::ast::ObjectName,
+    ) -> Result<TableReference> {
+        self.planner.object_name_to_table_reference(name)
+    }
+}
+
 impl<S: ContextProvider> SqlToRel<'_, S> {
-    /// Create a `LogicalPlan` that scans the named relation
+    /// Create a `LogicalPlan` that scans the named relation.
+    ///
+    /// First tries any registered extension planners. If no extension handles
+    /// the relation, falls back to the default planner.
     fn create_relation(
         &self,
         relation: TableFactor,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
+        let planned_relation =
+            match self.create_extension_relation(relation, planner_context)? {
+                RelationPlanning::Planned(planned) => planned,
+                RelationPlanning::Original(original) => {
+                    self.create_default_relation(original, planner_context)?
+                }
+            };
+
+        let optimized_plan = optimize_subquery_sort(planned_relation.plan)?.data;
+        if let Some(alias) = planned_relation.alias {
+            self.apply_table_alias(optimized_plan, alias)
+        } else {
+            Ok(optimized_plan)
+        }
+    }
+
+    fn create_extension_relation(
+        &self,
+        relation: TableFactor,
+        planner_context: &mut PlannerContext,
+    ) -> Result<RelationPlanning> {
+        let planners = self.context_provider.get_relation_planners();
+        if planners.is_empty() {
+            return Ok(RelationPlanning::Original(relation));
+        }
+
+        let mut current_relation = relation;
+        for planner in planners.iter() {
+            let mut context = SqlToRelRelationContext {
+                planner: self,
+                planner_context,
+            };
+
+            match planner.plan_relation(current_relation, &mut context)? {
+                RelationPlanning::Planned(planned) => {
+                    return Ok(RelationPlanning::Planned(planned));
+                }
+                RelationPlanning::Original(original) => {
+                    current_relation = original;
+                }
+            }
+        }
+
+        Ok(RelationPlanning::Original(current_relation))
+    }
+
+    fn create_default_relation(
+        &self,
+        relation: TableFactor,
+        planner_context: &mut PlannerContext,
+    ) -> Result<PlannedRelation> {
         let relation_span = relation.span();
         let (plan, alias) = match relation {
             TableFactor::Table {
@@ -190,13 +293,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 );
             }
         };
-
-        let optimized_plan = optimize_subquery_sort(plan)?.data;
-        if let Some(alias) = alias {
-            self.apply_table_alias(optimized_plan, alias)
-        } else {
-            Ok(optimized_plan)
-        }
+        Ok(PlannedRelation::new(plan, alias))
     }
 
     pub(crate) fn create_relation_subquery(
@@ -263,7 +360,8 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>>
     // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements.
     // 3. LIMIT => Handled by a `Sort`, so we need to search for it.
     let mut has_limit = false;
-    let new_plan = plan.transform_down(|c| {
+
+    plan.transform_down(|c| {
         if let LogicalPlan::Limit(_) = c {
             has_limit = true;
             return Ok(Transformed::no(c));
@@ -278,6 +376,5 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>>
             }
             _ => Ok(Transformed::no(c)),
         }
-    });
-    new_plan
+    })
 }
diff --git a/datafusion/sql/src/resolve.rs b/datafusion/sql/src/resolve.rs
index db5ddd5115194..148e886161fcb 100644
--- a/datafusion/sql/src/resolve.rs
+++ b/datafusion/sql/src/resolve.rs
@@ -147,6 +147,7 @@ fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor) {
             }
         },
         DFStatement::Explain(explain) => visit_statement(&explain.statement, visitor),
+        DFStatement::Reset(_) => {}
     }
 }
 
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 42013a76a8657..1d6ccde6be13a 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -22,14 +22,14 @@ use std::sync::Arc;
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use crate::query::to_order_by_exprs_with_select;
 use crate::utils::{
+    CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose,
     check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs,
     resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up,
-    CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose,
 };
 
 use datafusion_common::error::DataFusionErrorBuilder;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_common::{Column, Result, not_impl_err, plan_err};
 use datafusion_common::{RecursionUnnestOption, UnnestOptions};
 use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
 use datafusion_expr::expr_rewriter::{
@@ -41,16 +41,32 @@ use datafusion_expr::utils::{
 };
 use datafusion_expr::{
     Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder,
-    LogicalPlanBuilderOptions, Partitioning,
+    LogicalPlanBuilderOptions, Partitioning, SortExpr,
 };
 
 use indexmap::IndexMap;
 use sqlparser::ast::{
-    visit_expressions_mut, Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr,
-    OrderBy, SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType,
+    Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderBy,
+    SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType,
+    visit_expressions_mut,
 };
 use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
 
+/// Result of the `aggregate` function, containing the aggregate plan and
+/// rewritten expressions that reference the aggregate output columns.
+struct AggregatePlanResult {
+    /// The aggregate logical plan
+    plan: LogicalPlan,
+    /// SELECT expressions rewritten to reference aggregate output columns
+    select_exprs: Vec<Expr>,
+    /// HAVING expression rewritten to reference aggregate output columns
+    having_expr: Option<Expr>,
+    /// QUALIFY expression rewritten to reference aggregate output columns
+    qualify_expr: Option<Expr>,
+    /// ORDER BY expressions rewritten to reference aggregate output columns
+    order_by_exprs: Vec<SortExpr>,
+}
+
 impl<S: ContextProvider> SqlToRel<'_, S> {
     /// Generate a logic plan from an SQL select
     pub(super) fn select_to_plan(
@@ -219,33 +235,57 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             .transpose()?;
 
         // The outer expressions we will search through for aggregates.
-        // Aggregates may be sourced from the SELECT list or from the HAVING expression.
-        let aggr_expr_haystack = select_exprs
-            .iter()
-            .chain(having_expr_opt.iter())
-            .chain(qualify_expr_opt.iter());
-        // All of the aggregate expressions (deduplicated).
-        let aggr_exprs = find_aggregate_exprs(aggr_expr_haystack);
+        // First, find aggregates in SELECT, HAVING, and QUALIFY
+        let select_having_qualify_aggrs = find_aggregate_exprs(
+            select_exprs
+                .iter()
+                .chain(having_expr_opt.iter())
+                .chain(qualify_expr_opt.iter()),
+        );
+
+        // Find aggregates in ORDER BY
+        let order_by_aggrs = find_aggregate_exprs(order_by_rex.iter().map(|s| &s.expr));
+
+        // Combine: all aggregates from SELECT/HAVING/QUALIFY, plus ORDER BY aggregates
+        // that aren't already in SELECT/HAVING/QUALIFY
+        let mut aggr_exprs = select_having_qualify_aggrs;
+        for order_by_aggr in order_by_aggrs {
+            if !aggr_exprs.iter().any(|e| e == &order_by_aggr) {
+                aggr_exprs.push(order_by_aggr);
+            }
+        }
 
         // Process group by, aggregation or having
-        let (
+        let AggregatePlanResult {
             plan,
-            mut select_exprs_post_aggr,
-            having_expr_post_aggr,
-            qualify_expr_post_aggr,
-        ) = if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
+            select_exprs: mut select_exprs_post_aggr,
+            having_expr: having_expr_post_aggr,
+            qualify_expr: qualify_expr_post_aggr,
+            order_by_exprs: order_by_rex,
+        } = if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
             self.aggregate(
                 &base_plan,
                 &select_exprs,
                 having_expr_opt.as_ref(),
                 qualify_expr_opt.as_ref(),
+                &order_by_rex,
                 &group_by_exprs,
                 &aggr_exprs,
             )?
         } else {
             match having_expr_opt {
-                Some(having_expr) => return plan_err!("HAVING clause references: {having_expr} must appear in the GROUP BY clause or be used in an aggregate function"),
-                None => (base_plan.clone(), select_exprs.clone(), having_expr_opt, qualify_expr_opt)
+                Some(having_expr) => {
+                    return plan_err!(
+                        "HAVING clause references: {having_expr} must appear in the GROUP BY clause or be used in an aggregate function"
+                    );
+                }
+                None => AggregatePlanResult {
+                    plan: base_plan.clone(),
+                    select_exprs: select_exprs.clone(),
+                    having_expr: having_expr_opt,
+                    qualify_expr: qualify_expr_opt,
+                    order_by_exprs: order_by_rex,
+                },
             }
         };
 
@@ -329,7 +369,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     || !group_by_exprs.is_empty()
                     || !window_func_exprs.is_empty()
                 {
-                    return not_impl_err!("DISTINCT ON expressions with GROUP BY, aggregation or window functions are not supported ");
+                    return not_impl_err!(
+                        "DISTINCT ON expressions with GROUP BY, aggregation or window functions are not supported "
+                    );
                 }
 
                 let on_expr = on_expr
@@ -366,7 +408,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             plan
         };
 
-        self.order_by(plan, order_by_rex)
+        let plan = self.order_by(plan, order_by_rex)?;
+        Ok(plan)
     }
 
     /// Try converting Expr(Unnest(Expr)) to Projection/Unnest/Projection
@@ -744,7 +787,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     SelectItemQualifiedWildcardKind::Expr(_) => {
                         return plan_err!(
                             "Qualified wildcard with expression not supported"
-                        )
+                        );
                     }
                 };
                 let qualifier = self.object_name_to_table_reference(object_name)?;
@@ -872,16 +915,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     ///   the aggregate
     /// * `qualify_expr_post_aggr`  - The "qualify" expression rewritten to reference a column from
     ///   the aggregate
-    #[allow(clippy::type_complexity)]
+    /// * `order_by_post_aggr`     - The ORDER BY expressions rewritten to reference columns from
+    ///   the aggregate
+    #[expect(clippy::too_many_arguments)]
     fn aggregate(
         &self,
         input: &LogicalPlan,
         select_exprs: &[Expr],
         having_expr_opt: Option<&Expr>,
         qualify_expr_opt: Option<&Expr>,
+        order_by_exprs: &[SortExpr],
         group_by_exprs: &[Expr],
         aggr_exprs: &[Expr],
-    ) -> Result<(LogicalPlan, Vec<Expr>, Option<Expr>, Option<Expr>)> {
+    ) -> Result<AggregatePlanResult> {
         // create the aggregate plan
         let options =
             LogicalPlanBuilderOptions::new().with_add_implicit_group_by_exprs(true);
@@ -988,12 +1034,67 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             None
         };
 
-        Ok((
+        // Rewrite the ORDER BY expressions to use the columns produced by the
+        // aggregation. If an ORDER BY expression matches a SELECT expression
+        // (ignoring aliases), use the SELECT's output column name to avoid
+        // duplication when the SELECT expression has an alias.
+        let order_by_post_aggr = order_by_exprs
+            .iter()
+            .map(|sort_expr| {
+                let rewritten_expr =
+                    rebase_expr(&sort_expr.expr, &aggr_projection_exprs, input)?;
+
+                // Check if this ORDER BY expression matches any aliased SELECT expression
+                // If so, use the SELECT's alias instead of the raw expression
+                let final_expr = select_exprs_post_aggr
+                    .iter()
+                    .find_map(|select_expr| {
+                        // Only consider aliased expressions
+                        if let Expr::Alias(alias) = select_expr
+                            && alias.expr.as_ref() == &rewritten_expr
+                        {
+                            // Use the alias name
+                            return Some(Expr::Column(Column::new_unqualified(
+                                alias.name.clone(),
+                            )));
+                        }
+                        None
+                    })
+                    .unwrap_or(rewritten_expr);
+
+                Ok(sort_expr.with_expr(final_expr))
+            })
+            .collect::<Result<Vec<SortExpr>>>()?;
+
+        let all_valid_exprs: Vec<Expr> = column_exprs_post_aggr
+            .iter()
+            .cloned()
+            .chain(select_exprs_post_aggr.iter().filter_map(|e| {
+                if let Expr::Alias(alias) = e {
+                    Some(Expr::Column(Column::new_unqualified(alias.name.clone())))
+                } else {
+                    None
+                }
+            }))
+            .collect();
+
+        let order_by_exprs_only: Vec<Expr> =
+            order_by_post_aggr.iter().map(|s| s.expr.clone()).collect();
+        check_columns_satisfy_exprs(
+            &all_valid_exprs,
+            &order_by_exprs_only,
+            CheckColumnsSatisfyExprsPurpose::Aggregate(
+                CheckColumnsMustReferenceAggregatePurpose::OrderBy,
+            ),
+        )?;
+
+        Ok(AggregatePlanResult {
             plan,
-            select_exprs_post_aggr,
-            having_expr_post_aggr,
-            qualify_expr_post_aggr,
-        ))
+            select_exprs: select_exprs_post_aggr,
+            having_expr: having_expr_post_aggr,
+            qualify_expr: qualify_expr_post_aggr,
+            order_by_exprs: order_by_post_aggr,
+        })
     }
 
     // If the projection is done over a named window, that window
@@ -1013,33 +1114,32 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             {
                 let mut err = None;
                 let _ = visit_expressions_mut(expr, |expr| {
-                    if let SQLExpr::Function(f) = expr {
-                        if let Some(WindowType::NamedWindow(ident)) = &f.over {
-                            let normalized_ident =
-                                self.ident_normalizer.normalize(ident.clone());
-                            for (
-                                NamedWindowDefinition(_, window_expr),
-                                normalized_window_ident,
-                            ) in named_windows.iter()
-                            {
-                                if normalized_ident.eq(normalized_window_ident) {
-                                    f.over = Some(match window_expr {
-                                        NamedWindowExpr::NamedWindow(ident) => {
-                                            WindowType::NamedWindow(ident.clone())
-                                        }
-                                        NamedWindowExpr::WindowSpec(spec) => {
-                                            WindowType::WindowSpec(spec.clone())
-                                        }
-                                    })
-                                }
-                            }
-                            // All named windows must be defined with a WindowSpec.
-                            if let Some(WindowType::NamedWindow(ident)) = &f.over {
-                                err =
-                                    Some(plan_err!("The window {ident} is not defined!"));
-                                return ControlFlow::Break(());
+                    if let SQLExpr::Function(f) = expr
+                        && let Some(WindowType::NamedWindow(ident)) = &f.over
+                    {
+                        let normalized_ident =
+                            self.ident_normalizer.normalize(ident.clone());
+                        for (
+                            NamedWindowDefinition(_, window_expr),
+                            normalized_window_ident,
+                        ) in named_windows.iter()
+                        {
+                            if normalized_ident.eq(normalized_window_ident) {
+                                f.over = Some(match window_expr {
+                                    NamedWindowExpr::NamedWindow(ident) => {
+                                        WindowType::NamedWindow(ident.clone())
+                                    }
+                                    NamedWindowExpr::WindowSpec(spec) => {
+                                        WindowType::WindowSpec(spec.clone())
+                                    }
+                                })
                             }
                         }
+                        // All named windows must be defined with a WindowSpec.
+                        if let Some(WindowType::NamedWindow(ident)) = &f.over {
+                            err = Some(plan_err!("The window {ident} is not defined!"));
+                            return ControlFlow::Break(());
+                        }
                     }
                     ControlFlow::Continue(())
                 });
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index 5b65e1c045bdc..d4e771cb48585 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -17,7 +17,7 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::{
-    not_impl_err, plan_err, DataFusionError, Diagnostic, Result, Span,
+    DataFusionError, Diagnostic, Result, Span, not_impl_err, plan_err,
 };
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 81381bf49fc5b..1acbcc92dfe19 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -22,10 +22,10 @@ use std::sync::Arc;
 
 use crate::parser::{
     CopyToSource, CopyToStatement, CreateExternalTable, DFParser, ExplainStatement,
-    LexOrdering, Statement as DFStatement,
+    LexOrdering, ResetStatement, Statement as DFStatement,
 };
 use crate::planner::{
-    object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel,
+    ContextProvider, PlannerContext, SqlToRel, object_name_to_qualifier,
 };
 use crate::utils::normalize_ident;
 
@@ -33,26 +33,26 @@ use arrow::datatypes::{Field, FieldRef, Fields};
 use datafusion_common::error::_plan_err;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err,
-    unqualified_field_not_found, Column, Constraint, Constraints, DFSchema, DFSchemaRef,
-    DataFusionError, Result, ScalarValue, SchemaError, SchemaReference, TableReference,
-    ToDFSchema,
+    Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result,
+    ScalarValue, SchemaError, SchemaReference, TableReference, ToDFSchema, exec_err,
+    internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err,
+    unqualified_field_not_found,
 };
 use datafusion_expr::dml::{CopyTo, InsertOp};
 use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
-use datafusion_expr::logical_plan::builder::project;
 use datafusion_expr::logical_plan::DdlStatement;
+use datafusion_expr::logical_plan::builder::project;
 use datafusion_expr::utils::expr_to_columns;
 use datafusion_expr::{
-    cast, col, Analyze, CreateCatalog, CreateCatalogSchema,
+    Analyze, CreateCatalog, CreateCatalogSchema,
     CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody,
     CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, Deallocate,
     DescribeTable, DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView,
     EmptyRelation, Execute, Explain, ExplainFormat, Expr, ExprSchemable, Filter,
-    LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare, SetVariable,
-    SortExpr, Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
-    TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
-    Volatility, WriteOp,
+    LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare,
+    ResetVariable, SetVariable, SortExpr, Statement as PlanStatement, ToStringifiedPlan,
+    TransactionAccessMode, TransactionConclusion, TransactionEnd,
+    TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, cast, col,
 };
 use sqlparser::ast::{
     self, BeginTransactionKind, IndexColumn, IndexType, NullsDistinctOption, OrderByExpr,
@@ -211,6 +211,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 format,
                 statement,
             }) => self.explain_to_plan(verbose, analyze, format, *statement),
+            DFStatement::Reset(statement) => self.reset_statement_to_plan(statement),
         }
     }
 
@@ -716,18 +717,31 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     }
                     ObjectType::Schema => {
                         let name = match name {
-                            TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }),
-                            TableReference::Partial { schema, table } => Ok(SchemaReference::Full { schema: table, catalog: schema }),
-                            TableReference::Full { catalog: _, schema: _, table: _ } => {
-                                Err(ParserError("Invalid schema specifier (has 3 parts)".to_string()))
+                            TableReference::Bare { table } => {
+                                Ok(SchemaReference::Bare { schema: table })
+                            }
+                            TableReference::Partial { schema, table } => {
+                                Ok(SchemaReference::Full {
+                                    schema: table,
+                                    catalog: schema,
+                                })
                             }
+                            TableReference::Full {
+                                catalog: _,
+                                schema: _,
+                                table: _,
+                            } => Err(ParserError(
+                                "Invalid schema specifier (has 3 parts)".to_string(),
+                            )),
                         }?;
-                        Ok(LogicalPlan::Ddl(DdlStatement::DropCatalogSchema(DropCatalogSchema {
-                            name,
-                            if_exists,
-                            cascade,
-                            schema: DFSchemaRef::new(DFSchema::empty()),
-                        })))
+                        Ok(LogicalPlan::Ddl(DdlStatement::DropCatalogSchema(
+                            DropCatalogSchema {
+                                name,
+                                if_exists,
+                                cascade,
+                                schema: DFSchemaRef::new(DFSchema::empty()),
+                            },
+                        )))
                     }
                     _ => not_impl_err!(
                         "Only `DROP TABLE/VIEW/SCHEMA  ...` statement is supported currently"
@@ -955,7 +969,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 let table_name = match table {
                     TableObject::TableName(table_name) => table_name,
                     TableObject::TableFunction(_) => {
-                        return not_impl_err!("INSERT INTO Table functions not supported")
+                        return not_impl_err!(
+                            "INSERT INTO Table functions not supported"
+                        );
                     }
                 };
                 if let Some(or) = or {
@@ -1037,7 +1053,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 if limit.is_some() {
                     return not_impl_err!("Update-limit clause not supported")?;
                 }
-                self.update_to_plan(table, assignments, update_from, selection)
+                self.update_to_plan(table, &assignments, update_from, selection)
             }
 
             Statement::Delete(Delete {
@@ -1070,7 +1086,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 }
 
                 let table_name = self.get_delete_target(from)?;
-                self.delete_to_plan(table_name, selection)
+                self.delete_to_plan(&table_name, selection)
             }
 
             Statement::StartTransaction {
@@ -1100,7 +1116,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 if has_end_keyword {
                     return not_impl_err!("Transaction with END keyword not supported");
                 }
-                self.validate_transaction_kind(transaction)?;
+                self.validate_transaction_kind(transaction.as_ref())?;
                 let isolation_level: ast::TransactionIsolationLevel = modes
                     .iter()
                     .filter_map(|m: &TransactionMode| match m {
@@ -1222,6 +1238,27 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     }
                     None => None,
                 };
+                // Validate default arguments
+                let first_default = match args.as_ref() {
+                    Some(arg) => arg.iter().position(|t| t.default_expr.is_some()),
+                    None => None,
+                };
+                let last_non_default = match args.as_ref() {
+                    Some(arg) => arg
+                        .iter()
+                        .rev()
+                        .position(|t| t.default_expr.is_none())
+                        .map(|reverse_pos| arg.len() - reverse_pos - 1),
+                    None => None,
+                };
+                if let (Some(pos_default), Some(pos_non_default)) =
+                    (first_default, last_non_default)
+                    && pos_non_default > pos_default
+                {
+                    return plan_err!(
+                        "Non-default arguments cannot follow default arguments."
+                    );
+                }
                 // At the moment functions can't be qualified `schema.name`
                 let name = match &name.0[..] {
                     [] => exec_err!("Function should have name")?,
@@ -1233,9 +1270,25 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 //
                 let arg_types = args.as_ref().map(|arg| {
                     arg.iter()
-                        .map(|t| Arc::new(Field::new("", t.data_type.clone(), true)))
+                        .map(|t| {
+                            let name = match t.name.clone() {
+                                Some(name) => name.value,
+                                None => "".to_string(),
+                            };
+                            Arc::new(Field::new(name, t.data_type.clone(), true))
+                        })
                         .collect::<Vec<_>>()
                 });
+                // Validate parameter style
+                if let Some(ref fields) = arg_types {
+                    let count_positional =
+                        fields.iter().filter(|f| f.name() == "").count();
+                    if !(count_positional == 0 || count_positional == fields.len()) {
+                        return plan_err!(
+                            "All function arguments must use either named or positional style."
+                        );
+                    }
+                }
                 let mut planner_context = PlannerContext::new()
                     .with_prepare_param_data_types(arg_types.unwrap_or_default());
 
@@ -1606,22 +1659,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let constraints =
             self.new_constraint_from_table_constraints(&all_constraints, &df_schema)?;
         Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
-            PlanCreateExternalTable {
-                schema: df_schema,
-                name,
-                location,
-                file_type,
-                table_partition_cols,
-                if_not_exists,
-                or_replace,
-                temporary,
-                definition,
-                order_exprs: ordered_exprs,
-                unbounded,
-                options: options_map,
-                constraints,
-                column_defaults,
-            },
+            PlanCreateExternalTable::builder(name, location, file_type, df_schema)
+                .with_partition_cols(table_partition_cols)
+                .with_if_not_exists(if_not_exists)
+                .with_or_replace(or_replace)
+                .with_temporary(temporary)
+                .with_definition(definition)
+                .with_order_exprs(ordered_exprs)
+                .with_unbounded(unbounded)
+                .with_options(options_map)
+                .with_constraints(constraints)
+                .with_column_defaults(column_defaults)
+                .build(),
         )))
     }
 
@@ -1832,7 +1881,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 .iter()
                 .any(|opt| opt.key == variable);
 
-            if !is_valid_variable {
+            // Check if it's a runtime variable
+            let is_runtime_variable = variable.starts_with("datafusion.runtime.");
+
+            if !is_valid_variable && !is_runtime_variable {
                 return plan_err!(
                     "'{variable}' is not a variable which can be viewed with 'SHOW'"
                 );
@@ -1866,6 +1918,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 let variable = object_name_to_string(&variable);
                 let mut variable_lower = variable.to_lowercase();
 
+                // Map PostgreSQL "timezone" and MySQL "time.zone" aliases to DataFusion's canonical name
                 if variable_lower == "timezone" || variable_lower == "time.zone" {
                     variable_lower = "datafusion.execution.time_zone".to_string();
                 }
@@ -1901,9 +1954,29 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         }
     }
 
+    fn reset_statement_to_plan(&self, statement: ResetStatement) -> Result<LogicalPlan> {
+        match statement {
+            ResetStatement::Variable(variable) => {
+                let variable = object_name_to_string(&variable);
+                let mut variable_lower = variable.to_lowercase();
+
+                // Map PostgreSQL "timezone" and MySQL "time.zone" aliases to DataFusion's canonical name
+                if variable_lower == "timezone" || variable_lower == "time.zone" {
+                    variable_lower = "datafusion.execution.time_zone".to_string();
+                }
+
+                Ok(LogicalPlan::Statement(PlanStatement::ResetVariable(
+                    ResetVariable {
+                        variable: variable_lower,
+                    },
+                )))
+            }
+        }
+    }
+
     fn delete_to_plan(
         &self,
-        table_name: ObjectName,
+        table_name: &ObjectName,
         predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
         // Do a table lookup to verify the table exists
@@ -1947,7 +2020,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn update_to_plan(
         &self,
         table: TableWithJoins,
-        assignments: Vec<Assignment>,
+        assignments: &[Assignment],
         from: Option<TableWithJoins>,
         predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
@@ -2105,7 +2178,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     } else {
                         value_indices[column_index] = Some(i);
                     }
-                    Ok(table_schema.field(column_index).clone())
+                    Ok(Arc::clone(table_schema.field(column_index)))
                 })
                 .collect::<Result<Vec<_>>>()?;
             (Fields::from(fields), value_indices)
@@ -2178,7 +2251,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             (false, false) => InsertOp::Append,
             (true, false) => InsertOp::Overwrite,
             (false, true) => InsertOp::Replace,
-            (true, true) => plan_err!("Conflicting insert operations: `overwrite` and `replace_into` cannot both be true")?,
+            (true, true) => plan_err!(
+                "Conflicting insert operations: `overwrite` and `replace_into` cannot both be true"
+            )?,
         };
 
         let plan = LogicalPlan::Dml(DmlStatement::new(
@@ -2353,7 +2428,7 @@ ON p.function_name = r.routine_name
 
     fn validate_transaction_kind(
         &self,
-        kind: Option<BeginTransactionKind>,
+        kind: Option<&BeginTransactionKind>,
     ) -> Result<()> {
         match kind {
             // BEGIN
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index 2cf26009ac0f2..ec78a42d6534b 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -20,7 +20,7 @@ use std::ops::ControlFlow;
 
 use sqlparser::ast::helpers::attached_token::AttachedToken;
 use sqlparser::ast::{
-    self, visit_expressions_mut, LimitClause, OrderByKind, SelectFlavor,
+    self, LimitClause, OrderByKind, SelectFlavor, visit_expressions_mut,
 };
 
 #[derive(Clone)]
@@ -38,7 +38,6 @@ pub struct QueryBuilder {
     distinct_union: bool,
 }
 
-#[allow(dead_code)]
 impl QueryBuilder {
     pub fn with(&mut self, value: Option<ast::With>) -> &mut Self {
         self.with = value;
@@ -140,7 +139,16 @@ impl Default for QueryBuilder {
 pub struct SelectBuilder {
     distinct: Option<ast::Distinct>,
     top: Option<ast::Top>,
-    projection: Vec<ast::SelectItem>,
+    /// Projection items for the SELECT clause.
+    ///
+    /// This field uses `Option` to distinguish between three distinct states:
+    /// - `None`: No projection has been set (not yet initialized)
+    /// - `Some(vec![])`: Empty projection explicitly set (generates `SELECT FROM ...` or `SELECT 1 FROM ...`)
+    /// - `Some(vec![SelectItem::Wildcard(...)])`: Wildcard projection (generates `SELECT * FROM ...`)
+    /// - `Some(vec![...])`: Non-empty projection with specific columns/expressions
+    ///
+    /// Use `projection()` to set this field and `already_projected()` to check if it has been set.
+    projection: Option<Vec<ast::SelectItem>>,
     into: Option<ast::SelectInto>,
     from: Vec<TableWithJoinsBuilder>,
     lateral_views: Vec<ast::LateralView>,
@@ -156,7 +164,6 @@ pub struct SelectBuilder {
     flavor: Option<SelectFlavor>,
 }
 
-#[allow(dead_code)]
 impl SelectBuilder {
     pub fn distinct(&mut self, value: Option<ast::Distinct>) -> &mut Self {
         self.distinct = value;
@@ -167,16 +174,37 @@ impl SelectBuilder {
         self
     }
     pub fn projection(&mut self, value: Vec<ast::SelectItem>) -> &mut Self {
-        self.projection = value;
+        self.projection = Some(value);
         self
     }
     pub fn pop_projections(&mut self) -> Vec<ast::SelectItem> {
-        let ret = self.projection.clone();
-        self.projection.clear();
-        ret
+        self.projection.take().unwrap_or_default()
     }
+    /// Returns true if a projection has been explicitly set via `projection()`.
+    ///
+    /// This method is used to determine whether the SELECT clause has already been
+    /// defined, which helps avoid creating duplicate projection nodes during query
+    /// unparsing. It returns `true` for both empty and non-empty projections.
+    ///
+    /// # Returns
+    ///
+    /// - `true` if `projection()` has been called (regardless of whether it was empty or not)
+    /// - `false` if no projection has been set yet
+    ///
+    /// # Example
+    ///
+    /// ```ignore
+    /// let mut builder = SelectBuilder::default();
+    /// assert!(!builder.already_projected());
+    ///
+    /// builder.projection(vec![]);
+    /// assert!(builder.already_projected()); // true even for empty projection
+    ///
+    /// builder.projection(vec![SelectItem::Wildcard(...)]);
+    /// assert!(builder.already_projected()); // true for non-empty projection
+    /// ```
     pub fn already_projected(&self) -> bool {
-        !self.projection.is_empty()
+        self.projection.is_some()
     }
     pub fn into(&mut self, value: Option<ast::SelectInto>) -> &mut Self {
         self.into = value;
@@ -290,7 +318,7 @@ impl SelectBuilder {
             distinct: self.distinct.clone(),
             top_before_distinct: false,
             top: self.top.clone(),
-            projection: self.projection.clone(),
+            projection: self.projection.clone().unwrap_or_default(),
             into: self.into.clone(),
             from: self
                 .from
@@ -302,7 +330,7 @@ impl SelectBuilder {
             group_by: match self.group_by {
                 Some(ref value) => value.clone(),
                 None => {
-                    return Err(Into::into(UninitializedFieldError::from("group_by")))
+                    return Err(Into::into(UninitializedFieldError::from("group_by")));
                 }
             },
             cluster_by: self.cluster_by.clone(),
@@ -327,7 +355,7 @@ impl SelectBuilder {
         Self {
             distinct: Default::default(),
             top: Default::default(),
-            projection: Default::default(),
+            projection: None,
             into: Default::default(),
             from: Default::default(),
             lateral_views: Default::default(),
@@ -356,7 +384,6 @@ pub struct TableWithJoinsBuilder {
     joins: Vec<ast::Join>,
 }
 
-#[allow(dead_code)]
 impl TableWithJoinsBuilder {
     pub fn relation(&mut self, value: RelationBuilder) -> &mut Self {
         self.relation = Some(value);
@@ -402,9 +429,8 @@ pub struct RelationBuilder {
     relation: Option<TableFactorBuilder>,
 }
 
-#[allow(dead_code)]
 #[derive(Clone)]
-#[allow(clippy::large_enum_variant)]
+#[expect(clippy::large_enum_variant)]
 enum TableFactorBuilder {
     Table(TableRelationBuilder),
     Derived(DerivedRelationBuilder),
@@ -412,7 +438,6 @@ enum TableFactorBuilder {
     Empty,
 }
 
-#[allow(dead_code)]
 impl RelationBuilder {
     pub fn has_relation(&self) -> bool {
         self.relation.is_some()
@@ -484,7 +509,6 @@ pub struct TableRelationBuilder {
     index_hints: Vec<ast::TableIndexHints>,
 }
 
-#[allow(dead_code)]
 impl TableRelationBuilder {
     pub fn name(&mut self, value: ast::ObjectName) -> &mut Self {
         self.name = Some(value);
@@ -558,7 +582,6 @@ pub struct DerivedRelationBuilder {
     alias: Option<ast::TableAlias>,
 }
 
-#[allow(dead_code)]
 impl DerivedRelationBuilder {
     pub fn lateral(&mut self, value: bool) -> &mut Self {
         self.lateral = Some(value);
@@ -581,7 +604,7 @@ impl DerivedRelationBuilder {
             subquery: match self.subquery {
                 Some(ref value) => value.clone(),
                 None => {
-                    return Err(Into::into(UninitializedFieldError::from("subquery")))
+                    return Err(Into::into(UninitializedFieldError::from("subquery")));
                 }
             },
             alias: self.alias.clone(),
@@ -610,7 +633,6 @@ pub struct UnnestRelationBuilder {
     with_ordinality: bool,
 }
 
-#[allow(dead_code)]
 impl UnnestRelationBuilder {
     pub fn alias(&mut self, value: Option<ast::TableAlias>) -> &mut Self {
         self.alias = value;
@@ -711,10 +733,10 @@ impl From<String> for BuilderError {
 impl fmt::Display for BuilderError {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
-            Self::UninitializedField(ref field) => {
+            Self::UninitializedField(field) => {
                 write!(f, "`{field}` must be initialized")
             }
-            Self::ValidationError(ref error) => write!(f, "{error}"),
+            Self::ValidationError(error) => write!(f, "{error}"),
         }
     }
 }
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 834b0a97a47b0..1a3e1a06db5f1 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -18,8 +18,8 @@
 use std::{collections::HashMap, sync::Arc};
 
 use super::{
-    utils::character_length_to_sql, utils::date_part_to_sql,
-    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser,
+    Unparser, utils::character_length_to_sql, utils::date_part_to_sql,
+    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql,
 };
 use arrow::array::timezone::Tz;
 use arrow::datatypes::TimeUnit;
@@ -218,6 +218,36 @@ pub trait Dialect: Send + Sync {
     fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
         dt.to_string()
     }
+
+    /// Whether the dialect supports an empty select list such as `SELECT FROM table`.
+    ///
+    /// An empty select list returns rows without any column data, which is useful for:
+    /// - Counting rows: `SELECT FROM users WHERE active = true` (combined with `COUNT(*)`)
+    /// - Testing row existence without retrieving column data
+    /// - Performance optimization when only row counts or existence checks are needed
+    ///
+    /// # Default
+    ///
+    /// Returns `false` for maximum compatibility across SQL dialects. When `false`,
+    /// the unparser falls back to `SELECT 1 FROM table`.
+    ///
+    /// # Implementation Note
+    ///
+    /// Specific dialects should override this method to return `true` if they support
+    /// the empty select list syntax (e.g., PostgreSQL).
+    ///
+    /// # Example SQL Output
+    ///
+    /// ```sql
+    /// -- When supported:
+    /// SELECT FROM users WHERE active = true;
+    ///
+    /// -- Fallback when unsupported:
+    /// SELECT 1 FROM users WHERE active = true;
+    /// ```
+    fn supports_empty_select_list(&self) -> bool {
+        false
+    }
 }
 
 /// `IntervalStyle` to use for unparsing
@@ -268,13 +298,12 @@ impl Dialect for DefaultDialect {
         let id_upper = identifier.to_uppercase();
         // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
         // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
-        if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
+        // Also quote identifiers with uppercase letters since unquoted identifiers are
+        // normalized to lowercase by the SQL parser, which would break case-sensitive schemas
+        let needs_quote = (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
             || !identifier_regex.is_match(identifier)
-        {
-            Some('"')
-        } else {
-            None
-        }
+            || identifier.chars().any(|c| c.is_ascii_uppercase());
+        if needs_quote { Some('"') } else { None }
     }
 }
 
@@ -289,6 +318,10 @@ impl Dialect for PostgreSqlDialect {
         true
     }
 
+    fn supports_empty_select_list(&self) -> bool {
+        true
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('"')
     }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 8dc3092e9ce0a..5746a568e712b 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -25,27 +25,27 @@ use sqlparser::ast::{
 use std::sync::Arc;
 use std::vec;
 
-use super::dialect::IntervalStyle;
 use super::Unparser;
+use super::dialect::IntervalStyle;
 use arrow::array::{
+    ArrayRef, Date32Array, Date64Array, PrimitiveArray,
     types::{
         ArrowTemporalType, Time32MillisecondType, Time32SecondType,
         Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
         TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
     },
-    ArrayRef, Date32Array, Date64Array, PrimitiveArray,
 };
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, DecimalType,
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType,
 };
 use arrow::util::display::array_value_to_string;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, Result,
-    ScalarValue,
+    Column, Result, ScalarValue, assert_eq_or_internal_err, assert_or_internal_err,
+    internal_datafusion_err, internal_err, not_impl_err, plan_err,
 };
 use datafusion_expr::{
-    expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction},
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
+    expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction},
 };
 use sqlparser::ast::helpers::attached_token::AttachedToken;
 use sqlparser::tokenizer::Span;
@@ -447,9 +447,7 @@ impl Unparser<'_> {
                 })
             }
             Expr::ScalarVariable(_, ids) => {
-                if ids.is_empty() {
-                    return internal_err!("Not a valid ScalarVariable");
-                }
+                assert_or_internal_err!(!ids.is_empty(), "Not a valid ScalarVariable");
 
                 Ok(if ids.len() == 1 {
                     ast::Expr::Identifier(
@@ -593,9 +591,11 @@ impl Unparser<'_> {
     }
 
     fn array_element_to_sql(&self, args: &[Expr]) -> Result<ast::Expr> {
-        if args.len() != 2 {
-            return internal_err!("array_element must have exactly 2 arguments");
-        }
+        assert_eq_or_internal_err!(
+            args.len(),
+            2,
+            "array_element must have exactly 2 arguments"
+        );
         let array = self.expr_to_sql(&args[0])?;
         let index = self.expr_to_sql(&args[1])?;
         Ok(ast::Expr::CompoundFieldAccess {
@@ -605,9 +605,10 @@ impl Unparser<'_> {
     }
 
     fn named_struct_to_sql(&self, args: &[Expr]) -> Result<ast::Expr> {
-        if !args.len().is_multiple_of(2) {
-            return internal_err!("named_struct must have an even number of arguments");
-        }
+        assert_or_internal_err!(
+            args.len().is_multiple_of(2),
+            "named_struct must have an even number of arguments"
+        );
 
         let args = args
             .chunks_exact(2)
@@ -628,38 +629,53 @@ impl Unparser<'_> {
     }
 
     fn get_field_to_sql(&self, args: &[Expr]) -> Result<ast::Expr> {
-        if args.len() != 2 {
-            return internal_err!("get_field must have exactly 2 arguments");
+        if args.len() < 2 {
+            return internal_err!(
+                "get_field must have at least 2 arguments, got {}",
+                args.len()
+            );
         }
 
-        let field = match &args[1] {
-            Expr::Literal(lit, _) => self.new_ident_quoted_if_needs(lit.to_string()),
-            _ => {
-                return internal_err!(
-                "get_field expects second argument to be a string, but received: {:?}",
-                &args[1]
-            )
-            }
-        };
+        // Extract all field names (args[1..])
+        let mut fields = Vec::with_capacity(args.len() - 1);
+        for arg in &args[1..] {
+            let field = match arg {
+                Expr::Literal(lit, _) => self.new_ident_quoted_if_needs(lit.to_string()),
+                _ => {
+                    return internal_err!(
+                        "get_field expects field arguments to be strings, but received: {:?}",
+                        arg
+                    );
+                }
+            };
+            fields.push(field);
+        }
 
         match &args[0] {
             Expr::Column(col) => {
                 let mut id = match self.col_to_sql(col)? {
                     ast::Expr::Identifier(ident) => vec![ident],
                     ast::Expr::CompoundIdentifier(idents) => idents,
-                    other => return internal_err!("expected col_to_sql to return an Identifier or CompoundIdentifier, but received: {:?}", other),
+                    other => {
+                        return internal_err!(
+                            "expected col_to_sql to return an Identifier or CompoundIdentifier, but received: {:?}",
+                            other
+                        );
+                    }
                 };
-                id.push(field);
+                id.extend(fields);
                 Ok(ast::Expr::CompoundIdentifier(id))
             }
             Expr::ScalarFunction(struct_expr) => {
                 let root = self
                     .scalar_function_to_sql(struct_expr.func.name(), &struct_expr.args)?;
+                let access_chain = fields
+                    .into_iter()
+                    .map(|field| ast::AccessExpr::Dot(ast::Expr::Identifier(field)))
+                    .collect();
                 Ok(ast::Expr::CompoundFieldAccess {
                     root: Box::new(root),
-                    access_chain: vec![ast::AccessExpr::Dot(ast::Expr::Identifier(
-                        field,
-                    ))],
+                    access_chain,
                 })
             }
             _ => {
@@ -672,9 +688,7 @@ impl Unparser<'_> {
     }
 
     fn map_to_sql(&self, args: &[Expr]) -> Result<ast::Expr> {
-        if args.len() != 2 {
-            return internal_err!("map must have exactly 2 arguments");
-        }
+        assert_eq_or_internal_err!(args.len(), 2, "map must have exactly 2 arguments");
 
         let ast::Expr::Array(Array { elem: keys, .. }) = self.expr_to_sql(&args[0])?
         else {
@@ -1069,7 +1083,7 @@ impl Unparser<'_> {
                 return Err(internal_datafusion_err!(
                     "Expected Timestamp, got {:?}",
                     T::DATA_TYPE
-                ))
+                ));
             }
         };
 
@@ -1429,7 +1443,9 @@ impl Unparser<'_> {
             };
             return Ok(ast::Expr::Interval(interval));
         } else if months != 0 {
-            return not_impl_err!("Unsupported Interval scalar with both Month and DayTime for IntervalStyle::MySQL");
+            return not_impl_err!(
+                "Unsupported Interval scalar with both Month and DayTime for IntervalStyle::MySQL"
+            );
         }
 
         // DAY only
@@ -1617,7 +1633,9 @@ impl Unparser<'_> {
                         };
                         Ok(ast::Expr::Interval(interval))
                     } else {
-                        not_impl_err!("Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard")
+                        not_impl_err!(
+                            "Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard"
+                        )
                     }
                 }
                 _ => not_impl_err!(
@@ -1782,15 +1800,16 @@ mod tests {
     use arrow::array::{LargeListArray, ListArray};
     use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit};
     use ast::ObjectName;
+    use datafusion_common::datatype::DataTypeExt;
     use datafusion_common::{Spans, TableReference};
     use datafusion_expr::expr::WildcardOptions;
     use datafusion_expr::{
-        case, cast, col, cube, exists, grouping_set, interval_datetime_lit,
-        interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup,
-        table_scan, try_cast, when, ColumnarValue, ScalarFunctionArgs, ScalarUDF,
-        ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition,
+        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
+        Volatility, WindowFrame, WindowFunctionDefinition, case, cast, col, cube, exists,
+        grouping_set, interval_datetime_lit, interval_year_month_lit, lit, not,
+        not_exists, out_ref_col, placeholder, rollup, table_scan, try_cast, when,
     };
-    use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
+    use datafusion_expr::{ExprFunctionExt, interval_month_day_nano_lit};
     use datafusion_functions::datetime::from_unixtime::FromUnixtimeFunc;
     use datafusion_functions::expr_fn::{get_field, named_struct};
     use datafusion_functions_aggregate::count::count_udaf;
@@ -2168,12 +2187,15 @@ mod tests {
                 r#"TRY_CAST(a AS INTEGER UNSIGNED)"#,
             ),
             (
-                Expr::ScalarVariable(Int8, vec![String::from("@a")]),
+                Expr::ScalarVariable(
+                    Int8.into_nullable_field_ref(),
+                    vec![String::from("@a")],
+                ),
                 r#"@a"#,
             ),
             (
                 Expr::ScalarVariable(
-                    Int8,
+                    Int8.into_nullable_field_ref(),
                     vec![String::from("@root"), String::from("foo")],
                 ),
                 r#"@root.foo"#,
diff --git a/datafusion/sql/src/unparser/extension_unparser.rs b/datafusion/sql/src/unparser/extension_unparser.rs
index b778130ca5a27..6633b38cf27cc 100644
--- a/datafusion/sql/src/unparser/extension_unparser.rs
+++ b/datafusion/sql/src/unparser/extension_unparser.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::unparser::ast::{QueryBuilder, RelationBuilder, SelectBuilder};
 use crate::unparser::Unparser;
+use crate::unparser::ast::{QueryBuilder, RelationBuilder, SelectBuilder};
 use datafusion_expr::UserDefinedLogicalNode;
 use sqlparser::ast::Statement;
 
@@ -64,7 +64,7 @@ pub enum UnparseWithinStatementResult {
 }
 
 /// The result of unparsing a custom logical node to a statement.
-#[allow(clippy::large_enum_variant)]
+#[expect(clippy::large_enum_variant)]
 pub enum UnparseToStatementResult {
     /// If the custom logical node was successfully unparsed to a statement.
     Modified(Statement),
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 68b42ba05af5f..56bf887dbde43 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -16,21 +16,21 @@
 // under the License.
 
 use super::{
+    Unparser,
     ast::{
         BuilderError, DerivedRelationBuilder, QueryBuilder, RelationBuilder,
         SelectBuilder, TableRelationBuilder, TableWithJoinsBuilder,
     },
     rewrite::{
-        inject_column_aliases_into_subquery, normalize_union_schema,
+        TableAliasRewriter, inject_column_aliases_into_subquery, normalize_union_schema,
         rewrite_plan_for_sort_on_non_projected_fields,
-        subquery_alias_inner_query_and_columns, TableAliasRewriter,
+        subquery_alias_inner_query_and_columns,
     },
     utils::{
         find_agg_node_within_select, find_unnest_node_within_select,
         find_window_nodes_within_select, try_transform_to_simple_table_scan_with_filters,
         unproject_sort_expr, unproject_unnest_expr, unproject_window_exprs,
     },
-    Unparser,
 };
 use crate::unparser::extension_unparser::{
     UnparseToStatementResult, UnparseWithinStatementResult,
@@ -39,15 +39,15 @@ use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_expr
 use crate::unparser::{ast::UnnestRelationBuilder, rewrite::rewrite_qualify};
 use crate::utils::UNNEST_PLACEHOLDER;
 use datafusion_common::{
+    Column, DataFusionError, Result, ScalarValue, TableReference, assert_or_internal_err,
     internal_err, not_impl_err,
     tree_node::{TransformedResult, TreeNode},
-    Column, DataFusionError, Result, ScalarValue, TableReference,
 };
 use datafusion_expr::expr::OUTER_REFERENCE_COLUMN_PREFIX;
 use datafusion_expr::{
-    expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
+    BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
     LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest,
-    UserDefinedLogicalNode,
+    UserDefinedLogicalNode, expr::Alias,
 };
 use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef};
 use std::{sync::Arc, vec};
@@ -340,7 +340,7 @@ impl Unparser<'_> {
     ) -> Result<()> {
         match plan {
             LogicalPlan::TableScan(scan) => {
-                if let Some(unparsed_table_scan) = Self::unparse_table_scan_pushdown(
+                if let Some(unparsed_table_scan) = self.unparse_table_scan_pushdown(
                     plan,
                     None,
                     select.already_projected(),
@@ -384,20 +384,19 @@ impl Unparser<'_> {
                 } else {
                     None
                 };
-                if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() {
-                    if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() {
-                        if let Some(unnest_relation) =
-                            self.try_unnest_to_table_factor_sql(unnest)?
-                        {
-                            relation.unnest(unnest_relation);
-                            return self.select_to_sql_recursively(
-                                p.input.as_ref(),
-                                query,
-                                select,
-                                relation,
-                            );
-                        }
-                    }
+                if self.dialect.unnest_as_table_factor()
+                    && unnest_input_type.is_some()
+                    && let LogicalPlan::Unnest(unnest) = &p.input.as_ref()
+                    && let Some(unnest_relation) =
+                        self.try_unnest_to_table_factor_sql(unnest)?
+                {
+                    relation.unnest(unnest_relation);
+                    return self.select_to_sql_recursively(
+                        p.input.as_ref(),
+                        query,
+                        select,
+                        relation,
+                    );
                 }
 
                 // If it's a unnest projection, we should provide the table column alias
@@ -500,16 +499,6 @@ impl Unparser<'_> {
                 )
             }
             LogicalPlan::Sort(sort) => {
-                // Sort can be top-level plan for derived table
-                if select.already_projected() {
-                    return self.derive_with_dialect_alias(
-                        "derived_sort",
-                        plan,
-                        relation,
-                        false,
-                        vec![],
-                    );
-                }
                 let Some(query_ref) = query else {
                     return internal_err!(
                         "Sort operator only valid in a statement context."
@@ -585,18 +574,17 @@ impl Unparser<'_> {
 
                 // If this distinct is the parent of a Union and we're in a query context,
                 // then we need to unparse as a `UNION` rather than a `UNION ALL`.
-                if let Distinct::All(input) = distinct {
-                    if matches!(input.as_ref(), LogicalPlan::Union(_)) {
-                        if let Some(query_mut) = query.as_mut() {
-                            query_mut.distinct_union();
-                            return self.select_to_sql_recursively(
-                                input.as_ref(),
-                                query,
-                                select,
-                                relation,
-                            );
-                        }
-                    }
+                if let Distinct::All(input) = distinct
+                    && matches!(input.as_ref(), LogicalPlan::Union(_))
+                    && let Some(query_mut) = query.as_mut()
+                {
+                    query_mut.distinct_union();
+                    return self.select_to_sql_recursively(
+                        input.as_ref(),
+                        query,
+                        select,
+                        relation,
+                    );
                 }
 
                 let (select_distinct, input) = match distinct {
@@ -824,7 +812,7 @@ impl Unparser<'_> {
             LogicalPlan::SubqueryAlias(plan_alias) => {
                 let (plan, mut columns) =
                     subquery_alias_inner_query_and_columns(plan_alias);
-                let unparsed_table_scan = Self::unparse_table_scan_pushdown(
+                let unparsed_table_scan = self.unparse_table_scan_pushdown(
                     plan,
                     Some(plan_alias.alias.clone()),
                     select.already_projected(),
@@ -847,7 +835,7 @@ impl Unparser<'_> {
                             Err(e) => {
                                 return internal_err!(
                                     "Failed to transform SubqueryAlias plan: {e}"
-                                )
+                                );
                             }
                         };
 
@@ -887,9 +875,10 @@ impl Unparser<'_> {
                     .map(|input| self.select_to_sql_expr(input, query))
                     .collect::<Result<Vec<_>>>()?;
 
-                if input_exprs.len() < 2 {
-                    return internal_err!("UNION operator requires at least 2 inputs");
-                }
+                assert_or_internal_err!(
+                    input_exprs.len() >= 2,
+                    "UNION operator requires at least 2 inputs"
+                );
 
                 let set_quantifier =
                     if query.as_ref().is_some_and(|q| q.is_distinct_union()) {
@@ -957,12 +946,11 @@ impl Unparser<'_> {
                 }
             }
             LogicalPlan::Unnest(unnest) => {
-                if !unnest.struct_type_columns.is_empty() {
-                    return internal_err!(
-                        "Struct type columns are not currently supported in UNNEST: {:?}",
-                        unnest.struct_type_columns
-                    );
-                }
+                assert_or_internal_err!(
+                    unnest.struct_type_columns.is_empty(),
+                    "Struct type columns are not currently supported in UNNEST: {:?}",
+                    unnest.struct_type_columns
+                );
 
                 // In the case of UNNEST, the Unnest node is followed by a duplicate Projection node that we should skip.
                 // Otherwise, there will be a duplicate SELECT clause.
@@ -1015,15 +1003,14 @@ impl Unparser<'_> {
     ///
     /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
     fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option<UnnestInputType> {
-        if let Expr::Alias(Alias { expr, .. }) = expr {
-            if let Expr::Column(Column { name, .. }) = expr.as_ref() {
-                if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
-                    if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) {
-                        return Some(UnnestInputType::OuterReference);
-                    }
-                    return Some(UnnestInputType::Scalar);
-                }
+        if let Expr::Alias(Alias { expr, .. }) = expr
+            && let Expr::Column(Column { name, .. }) = expr.as_ref()
+            && let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER)
+        {
+            if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) {
+                return Some(UnnestInputType::OuterReference);
             }
+            return Some(UnnestInputType::Scalar);
         }
         None
     }
@@ -1065,6 +1052,7 @@ impl Unparser<'_> {
     /// Try to unparse a table scan with pushdown operations into a new subquery plan.
     /// If the table scan is without any pushdown operations, return None.
     fn unparse_table_scan_pushdown(
+        &self,
         plan: &LogicalPlan,
         alias: Option<TableReference>,
         already_projected: bool,
@@ -1091,42 +1079,37 @@ impl Unparser<'_> {
                 //
                 // Example:
                 //   select t1.c1 from t1 where t1.c1 > 1 -> select a.c1 from t1 as a where a.c1 > 1
-                if let Some(ref alias) = alias {
-                    if table_scan.projection.is_some() || !table_scan.filters.is_empty() {
-                        builder = builder.alias(alias.clone())?;
-                    }
+                if let Some(ref alias) = alias
+                    && (table_scan.projection.is_some() || !table_scan.filters.is_empty())
+                {
+                    builder = builder.alias(alias.clone())?;
                 }
 
                 // Avoid creating a duplicate Projection node, which would result in an additional subquery if a projection already exists.
                 // For example, if the `optimize_projection` rule is applied, there will be a Projection node, and duplicate projection
                 // information included in the TableScan node.
-                if !already_projected {
-                    if let Some(project_vec) = &table_scan.projection {
-                        if project_vec.is_empty() {
-                            builder = builder.project(vec![Expr::Literal(
-                                ScalarValue::Int64(Some(1)),
-                                None,
-                            )])?;
-                        } else {
-                            let project_columns = project_vec
-                                .iter()
-                                .cloned()
-                                .map(|i| {
-                                    let schema = table_scan.source.schema();
-                                    let field = schema.field(i);
-                                    if alias.is_some() {
-                                        Column::new(alias.clone(), field.name().clone())
-                                    } else {
-                                        Column::new(
-                                            Some(table_scan.table_name.clone()),
-                                            field.name().clone(),
-                                        )
-                                    }
-                                })
-                                .collect::<Vec<_>>();
-                            builder = builder.project(project_columns)?;
-                        };
-                    }
+                if !already_projected && let Some(project_vec) = &table_scan.projection {
+                    if project_vec.is_empty() {
+                        builder = builder.project(self.empty_projection_fallback())?;
+                    } else {
+                        let project_columns = project_vec
+                            .iter()
+                            .cloned()
+                            .map(|i| {
+                                let schema = table_scan.source.schema();
+                                let field = schema.field(i);
+                                if alias.is_some() {
+                                    Column::new(alias.clone(), field.name().clone())
+                                } else {
+                                    Column::new(
+                                        Some(table_scan.table_name.clone()),
+                                        field.name().clone(),
+                                    )
+                                }
+                            })
+                            .collect::<Vec<_>>();
+                        builder = builder.project(project_columns)?;
+                    };
                 }
 
                 let filter_expr: Result<Option<Expr>> = table_scan
@@ -1159,32 +1142,33 @@ impl Unparser<'_> {
                 // So we will append the alias to this subquery.
                 // Example:
                 //   select * from t1 limit 10 -> (select * from t1 limit 10) as a
-                if let Some(alias) = alias {
-                    if table_scan.projection.is_none() && table_scan.filters.is_empty() {
-                        builder = builder.alias(alias)?;
-                    }
+                if let Some(alias) = alias
+                    && table_scan.projection.is_none()
+                    && table_scan.filters.is_empty()
+                {
+                    builder = builder.alias(alias)?;
                 }
 
                 Ok(Some(builder.build()?))
             }
             LogicalPlan::SubqueryAlias(subquery_alias) => {
-                let ret = Self::unparse_table_scan_pushdown(
+                let ret = self.unparse_table_scan_pushdown(
                     &subquery_alias.input,
                     Some(subquery_alias.alias.clone()),
                     already_projected,
                 )?;
-                if let Some(alias) = alias {
-                    if let Some(plan) = ret {
-                        let plan = LogicalPlanBuilder::new(plan).alias(alias)?.build()?;
-                        return Ok(Some(plan));
-                    }
+                if let Some(alias) = alias
+                    && let Some(plan) = ret
+                {
+                    let plan = LogicalPlanBuilder::new(plan).alias(alias)?.build()?;
+                    return Ok(Some(plan));
                 }
                 Ok(ret)
             }
             // SubqueryAlias could be rewritten to a plan with a projection as the top node by [rewrite::subquery_alias_inner_query_and_columns].
             // The inner table scan could be a scan with pushdown operations.
             LogicalPlan::Projection(projection) => {
-                if let Some(plan) = Self::unparse_table_scan_pushdown(
+                if let Some(plan) = self.unparse_table_scan_pushdown(
                     &projection.input,
                     alias.clone(),
                     already_projected,
@@ -1417,6 +1401,17 @@ impl Unparser<'_> {
     fn dml_to_sql(&self, plan: &LogicalPlan) -> Result<ast::Statement> {
         not_impl_err!("Unsupported plan: {plan:?}")
     }
+
+    /// Generates appropriate projection expression for empty projection lists.
+    /// Returns an empty vec for dialects supporting empty select lists,
+    /// or a dummy literal `1` for other dialects.
+    fn empty_projection_fallback(&self) -> Vec<Expr> {
+        if self.dialect.supports_empty_select_list() {
+            Vec::new()
+        } else {
+            vec![Expr::Literal(ScalarValue::Int64(Some(1)), None)]
+        }
+    }
 }
 
 impl From<BuilderError> for DataFusionError {
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index 1b6c3433f79f4..ec1b17cd28a91 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -20,8 +20,8 @@ use std::{collections::HashSet, sync::Arc};
 use arrow::datatypes::Schema;
 use datafusion_common::tree_node::TreeNodeContainer;
 use datafusion_common::{
-    tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
     Column, HashMap, Result, TableReference,
+    tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
 };
 use datafusion_expr::expr::{Alias, UNNEST_COLUMN_PREFIX};
 use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
@@ -311,7 +311,7 @@ pub(super) fn subquery_alias_inner_query_and_columns(
     //     Projection: j1.j1_id AS id
     //       Projection: j1.j1_id
     for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
-        let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
+        let Expr::Alias(outer_alias) = &outer_projections.expr[i] else {
             return (plan, vec![]);
         };
 
@@ -360,15 +360,14 @@ pub(super) fn find_unnest_column_alias(
         if projection.expr.len() != 1 {
             return (plan, None);
         }
-        if let Some(Expr::Alias(alias)) = projection.expr.first() {
-            if alias
+        if let Some(Expr::Alias(alias)) = projection.expr.first()
+            && alias
                 .expr
                 .schema_name()
                 .to_string()
                 .starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
-            {
-                return (projection.input.as_ref(), Some(alias.name.clone()));
-            }
+        {
+            return (projection.input.as_ref(), Some(alias.name.clone()));
         }
     }
     (plan, None)
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index 8b3791017a8af..f539c0ddc1e87 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -18,17 +18,17 @@
 use std::{cmp::Ordering, sync::Arc, vec};
 
 use super::{
-    dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle,
-    rewrite::TableAliasRewriter, Unparser,
+    Unparser, dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle,
+    rewrite::TableAliasRewriter,
 };
 use datafusion_common::{
+    Column, DataFusionError, Result, ScalarValue, assert_eq_or_internal_err,
     internal_err,
     tree_node::{Transformed, TransformedResult, TreeNode},
-    Column, DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::{
-    expr, utils::grouping_set_to_exprlist, Aggregate, Expr, LogicalPlan,
-    LogicalPlanBuilder, Projection, SortExpr, Unnest, Window,
+    Aggregate, Expr, LogicalPlan, LogicalPlanBuilder, Projection, SortExpr, Unnest,
+    Window, expr, utils::grouping_set_to_exprlist,
 };
 
 use indexmap::IndexSet;
@@ -166,14 +166,12 @@ pub(crate) fn unproject_unnest_expr(expr: Expr, unnest: &Unnest) -> Result<Expr>
                 // Check if the column is among the columns to run unnest on. 
                 // Currently, only List/Array columns (defined in `list_type_columns`) are supported for unnesting. 
                 if unnest.list_type_columns.iter().any(|e| e.1.output_column.name == col_ref.name) {
-                    if let Ok(idx) = unnest.schema.index_of_column(col_ref) {
-                        if let LogicalPlan::Projection(Projection { expr, .. }) = unnest.input.as_ref() {
-                            if let Some(unprojected_expr) = expr.get(idx) {
+                    if let Ok(idx) = unnest.schema.index_of_column(col_ref)
+                        && let LogicalPlan::Projection(Projection { expr, .. }) = unnest.input.as_ref()
+                            && let Some(unprojected_expr) = expr.get(idx) {
                                 let unnest_expr = Expr::Unnest(expr::Unnest::new(unprojected_expr.clone()));
                                 return Ok(Transformed::yes(unnest_expr));
                             }
-                        }
-                    }
                     return internal_err!(
                         "Tried to unproject unnest expr for column '{}' that was not found in the provided Unnest!", &col_ref.name
                     );
@@ -291,14 +289,14 @@ pub(crate) fn unproject_sort_expr(
                     }
 
                     // In case of aggregation there could be columns containing aggregation functions we need to unproject
-                    if let Some(agg) = agg {
-                        if agg.schema.is_column_from_schema(&col) {
-                            return Ok(Transformed::yes(unproject_agg_exprs(
-                                Expr::Column(col),
-                                agg,
-                                None,
-                            )?));
-                        }
+                    if let Some(agg) = agg
+                        && agg.schema.is_column_from_schema(&col)
+                    {
+                        return Ok(Transformed::yes(unproject_agg_exprs(
+                            Expr::Column(col),
+                            agg,
+                            None,
+                        )?));
                     }
 
                     // If SELECT and ORDER BY contain the same expression with a scalar function, the ORDER BY expression will
@@ -306,14 +304,12 @@ pub(crate) fn unproject_sort_expr(
                     // to transform it back to the actual expression.
                     if let LogicalPlan::Projection(Projection { expr, schema, .. }) =
                         input
+                        && let Ok(idx) = schema.index_of_column(&col)
+                        && let Some(Expr::ScalarFunction(scalar_fn)) = expr.get(idx)
                     {
-                        if let Ok(idx) = schema.index_of_column(&col) {
-                            if let Some(Expr::ScalarFunction(scalar_fn)) = expr.get(idx) {
-                                return Ok(Transformed::yes(Expr::ScalarFunction(
-                                    scalar_fn.clone(),
-                                )));
-                            }
-                        }
+                        return Ok(Transformed::yes(Expr::ScalarFunction(
+                            scalar_fn.clone(),
+                        )));
                     }
 
                     Ok(Transformed::no(Expr::Column(col)))
@@ -520,12 +516,12 @@ pub(crate) fn sqlite_from_unixtime_to_sql(
     unparser: &Unparser,
     from_unixtime_args: &[Expr],
 ) -> Result<Option<ast::Expr>> {
-    if from_unixtime_args.len() != 1 {
-        return internal_err!(
-            "from_unixtime for SQLite expects 1 argument, found {}",
-            from_unixtime_args.len()
-        );
-    }
+    assert_eq_or_internal_err!(
+        from_unixtime_args.len(),
+        1,
+        "from_unixtime for SQLite expects 1 argument, found {}",
+        from_unixtime_args.len()
+    );
 
     Ok(Some(unparser.scalar_function_to_sql(
         "datetime",
@@ -547,12 +543,12 @@ pub(crate) fn sqlite_date_trunc_to_sql(
     unparser: &Unparser,
     date_trunc_args: &[Expr],
 ) -> Result<Option<ast::Expr>> {
-    if date_trunc_args.len() != 2 {
-        return internal_err!(
-            "date_trunc for SQLite expects 2 arguments, found {}",
-            date_trunc_args.len()
-        );
-    }
+    assert_eq_or_internal_err!(
+        date_trunc_args.len(),
+        2,
+        "date_trunc for SQLite expects 2 arguments, found {}",
+        date_trunc_args.len()
+    );
 
     if let Expr::Literal(ScalarValue::Utf8(Some(unit)), _) = &date_trunc_args[0] {
         let format = match unit.to_lowercase().as_str() {
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 042ee53730937..af2e1c79427cc 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -20,14 +20,14 @@
 use std::vec;
 
 use arrow::datatypes::{
-    DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE,
+    DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType,
 };
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
 };
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, plan_err, Column, DFSchemaRef,
-    Diagnostic, HashMap, Result, ScalarValue,
+    Column, DFSchemaRef, Diagnostic, HashMap, Result, ScalarValue,
+    assert_or_internal_err, exec_datafusion_err, exec_err, internal_err, plan_err,
 };
 use datafusion_expr::builder::get_struct_unnested_columns;
 use datafusion_expr::expr::{
@@ -35,7 +35,7 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
 use datafusion_expr::{
-    col, expr_vec_fmt, ColumnUnnestList, Expr, ExprSchemable, LogicalPlan,
+    ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, col, expr_vec_fmt,
 };
 
 use indexmap::IndexMap;
@@ -97,6 +97,7 @@ pub(crate) enum CheckColumnsMustReferenceAggregatePurpose {
     Projection,
     Having,
     Qualify,
+    OrderBy,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -116,11 +117,16 @@ impl CheckColumnsSatisfyExprsPurpose {
             Self::Aggregate(CheckColumnsMustReferenceAggregatePurpose::Qualify) => {
                 "Column in QUALIFY must be in GROUP BY or an aggregate function"
             }
+            Self::Aggregate(CheckColumnsMustReferenceAggregatePurpose::OrderBy) => {
+                "Column in ORDER BY must be in GROUP BY or an aggregate function"
+            }
         }
     }
 
     fn diagnostic_message(&self, expr: &Expr) -> String {
-        format!("'{expr}' must appear in GROUP BY clause because it's not an aggregate expression")
+        format!(
+            "'{expr}' must appear in GROUP BY clause because it's not an aggregate expression"
+        )
     }
 }
 
@@ -219,7 +225,8 @@ pub(crate) fn resolve_positions_to_exprs(
         }
         Expr::Literal(ScalarValue::Int64(Some(position)), _) => plan_err!(
             "Cannot find column with position {} in SELECT clause. Valid columns: 1 to {}",
-            position, select_exprs.len()
+            position,
+            select_exprs.len()
         ),
         _ => Ok(expr),
     }
@@ -288,7 +295,7 @@ pub(crate) fn make_decimal_type(
         (Some(p), Some(s)) => (p as u8, s as i8),
         (Some(p), None) => (p as u8, 0),
         (None, Some(_)) => {
-            return plan_err!("Cannot specify only scale for decimal data type")
+            return plan_err!("Cannot specify only scale for decimal data type");
         }
         (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE),
     };
@@ -417,26 +424,25 @@ impl RecursiveUnnestRewriter<'_> {
         // This is due to the fact that unnest transformation should keep the original
         // column name as is, to comply with group by and order by
         let placeholder_column = Column::from_name(placeholder_name.clone());
-
-        let (data_type, _) = expr_in_unnest.data_type_and_nullable(self.input_schema)?;
+        let field = expr_in_unnest.to_field(self.input_schema)?.1;
+        let data_type = field.data_type();
 
         match data_type {
             DataType::Struct(inner_fields) => {
-                if !struct_allowed {
-                    return internal_err!("unnest on struct can only be applied at the root level of select expression");
-                }
+                assert_or_internal_err!(
+                    struct_allowed,
+                    "unnest on struct can only be applied at the root level of select expression"
+                );
                 push_projection_dedupl(
                     self.inner_projection_exprs,
                     expr_in_unnest.clone().alias(placeholder_name.clone()),
                 );
                 self.columns_unnestings
                     .insert(Column::from_name(placeholder_name.clone()), None);
-                Ok(
-                    get_struct_unnested_columns(&placeholder_name, &inner_fields)
-                        .into_iter()
-                        .map(Expr::Column)
-                        .collect(),
-                )
+                Ok(get_struct_unnested_columns(&placeholder_name, inner_fields)
+                    .into_iter()
+                    .map(Expr::Column)
+                    .collect())
             }
             DataType::List(_)
             | DataType::FixedSizeList(_, _)
@@ -477,8 +483,8 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
     ///   is used to detect if some recursive unnest expr exists (e.g **unnest(unnest(unnest(3d column))))**
     fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
         if let Expr::Unnest(ref unnest_expr) = expr {
-            let (data_type, _) =
-                unnest_expr.expr.data_type_and_nullable(self.input_schema)?;
+            let field = unnest_expr.expr.to_field(self.input_schema)?.1;
+            let data_type = field.data_type();
             self.consecutive_unnest.push(Some(unnest_expr.clone()));
             // if expr inside unnest is a struct, do not consider
             // the next unnest as consecutive unnest (if any)
@@ -672,7 +678,7 @@ mod tests {
     use arrow::datatypes::{DataType as ArrowDataType, Field, Fields, Schema};
     use datafusion_common::{Column, DFSchema, Result};
     use datafusion_expr::{
-        col, lit, unnest, ColumnUnnestList, EmptyRelation, LogicalPlan,
+        ColumnUnnestList, EmptyRelation, LogicalPlan, col, lit, unnest,
     };
     use datafusion_functions::core::expr_ext::FieldAccessor;
     use datafusion_functions_aggregate::expr_fn::count;
@@ -744,13 +750,15 @@ mod tests {
         // Only the bottom most unnest exprs are transformed
         assert_eq!(
             transformed_exprs,
-            vec![col("__unnest_placeholder(3d_col,depth=2)")
-                .alias("UNNEST(UNNEST(3d_col))")
-                .add(
-                    col("__unnest_placeholder(3d_col,depth=2)")
-                        .alias("UNNEST(UNNEST(3d_col))")
-                )
-                .add(col("i64_col"))]
+            vec![
+                col("__unnest_placeholder(3d_col,depth=2)")
+                    .alias("UNNEST(UNNEST(3d_col))")
+                    .add(
+                        col("__unnest_placeholder(3d_col,depth=2)")
+                            .alias("UNNEST(UNNEST(3d_col))")
+                    )
+                    .add(col("i64_col"))
+            ]
         );
         column_unnests_eq(
             vec![
@@ -786,7 +794,9 @@ mod tests {
             ]
         );
         column_unnests_eq(
-            vec!["__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2, __unnest_placeholder(3d_col,depth=1)|depth=1]"],
+            vec![
+                "__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2, __unnest_placeholder(3d_col,depth=1)|depth=1]",
+            ],
             &unnest_placeholder_columns,
         );
         // Still reference struct_col in original schema but with alias,
@@ -878,9 +888,11 @@ mod tests {
         // Only transform the unnest children
         assert_eq!(
             transformed_exprs,
-            vec![col("__unnest_placeholder(array_col,depth=1)")
-                .alias("UNNEST(array_col)")
-                .add(lit(1i64))]
+            vec![
+                col("__unnest_placeholder(array_col,depth=1)")
+                    .alias("UNNEST(array_col)")
+                    .add(lit(1i64))
+            ]
         );
 
         // Keep appending to the current vector
diff --git a/datafusion/sql/tests/cases/collection.rs b/datafusion/sql/tests/cases/collection.rs
index 59704d6445b35..06a876dcfc9eb 100644
--- a/datafusion/sql/tests/cases/collection.rs
+++ b/datafusion/sql/tests/cases/collection.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{assert_contains, DataFusionError};
+use datafusion_common::{DataFusionError, assert_contains};
 use datafusion_sql::planner::SqlToRel;
 use sqlparser::{dialect::GenericDialect, parser::Parser};
 
@@ -42,9 +42,11 @@ fn test_collect_select_items() {
     let error = do_query(query);
     let errors = error.iter().collect::<Vec<_>>();
     assert_eq!(errors.len(), 2);
-    assert!(errors[0]
-        .to_string()
-        .contains("No field named first_namex."));
+    assert!(
+        errors[0]
+            .to_string()
+            .contains("No field named first_namex.")
+    );
     assert_contains!(errors[1].to_string(), "No field named last_namex.");
 }
 
diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs
index 7ae839851d04f..7a729739469d3 100644
--- a/datafusion/sql/tests/cases/diagnostic.rs
+++ b/datafusion/sql/tests/cases/diagnostic.rs
@@ -204,8 +204,7 @@ fn test_ambiguous_reference() -> Result<()> {
 
 #[test]
 fn test_incompatible_types_binary_arithmetic() -> Result<()> {
-    let query =
-        "SELECT /*whole+left*/id/*left*/ + /*right*/first_name/*right+whole*/ FROM person";
+    let query = "SELECT /*whole+left*/id/*left*/ + /*right*/first_name/*right+whole*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
     assert_snapshot!(diag.message, @"expressions have incompatible types");
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index 147628656d8f3..396f619400c74 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -18,9 +18,8 @@
 use crate::logical_plan;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::{
-    assert_contains,
-    metadata::{format_type_and_metadata, ScalarAndMetadata},
-    ParamValues, ScalarValue,
+    ParamValues, ScalarValue, assert_contains,
+    metadata::{ScalarAndMetadata, format_type_and_metadata},
 };
 use datafusion_expr::{LogicalPlan, Prepare, Statement};
 use insta::assert_snapshot;
@@ -104,9 +103,7 @@ fn test_prepare_statement_to_plan_panic_param_format() {
 
     assert_snapshot!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        @r###"
-        Error during planning: Invalid placeholder, not a number: $foo
-        "###
+        @"Error during planning: Unknown placeholder: $foo"
     );
 }
 
@@ -118,9 +115,7 @@ fn test_prepare_statement_to_plan_panic_param_zero() {
 
     assert_snapshot!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        @r###"
-        Error during planning: Invalid placeholder, zero is not a valid index: $0
-        "###
+        @"Error during planning: Invalid placeholder, zero is not a valid index: $0"
     );
 }
 
@@ -129,10 +124,12 @@ fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() {
     // param is not number following the $ sign
     // panic due to error returned from the parser
     let sql = "PREPARE AS SELECT id, age  FROM person WHERE age = $foo";
-    assert!(logical_plan(sql)
-        .unwrap_err()
-        .strip_backtrace()
-        .contains("Expected: AS, found: SELECT"))
+    assert!(
+        logical_plan(sql)
+            .unwrap_err()
+            .strip_backtrace()
+            .contains("Expected: AS, found: SELECT")
+    )
 }
 
 #[test]
@@ -142,7 +139,7 @@ fn test_prepare_statement_to_plan_panic_no_relation_and_constant_param() {
     let plan = logical_plan(sql).unwrap_err().strip_backtrace();
     assert_snapshot!(
         plan,
-        @r"Schema error: No field named id."
+        @"Schema error: No field named id."
     );
 }
 
@@ -195,7 +192,7 @@ fn test_prepare_statement_to_plan_no_param() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Int32"#);
+    assert_snapshot!(dt, @"Int32");
 
     ///////////////////
     // replace params with values
@@ -223,7 +220,7 @@ fn test_prepare_statement_to_plan_no_param() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#""#);
+    assert_snapshot!(dt, @"");
 
     ///////////////////
     // replace params with values
@@ -251,9 +248,7 @@ fn test_prepare_statement_to_plan_one_param_no_value_panic() {
         plan.with_param_values(param_values)
         .unwrap_err()
         .strip_backtrace(),
-        @r###"
-        Error during planning: Expected 1 parameters, got 0
-        "###);
+        @"Error during planning: Expected 1 parameters, got 0");
 }
 
 #[test]
@@ -268,9 +263,7 @@ fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() {
         plan.with_param_values(param_values)
             .unwrap_err()
             .strip_backtrace(),
-        @r###"
-        Error during planning: Expected parameter of type Int32, got Float64 at index 0
-        "###
+        @"Error during planning: Expected parameter of type Int32, got Float64 at index 0"
     );
 }
 
@@ -286,9 +279,7 @@ fn test_prepare_statement_to_plan_no_param_on_value_panic() {
         plan.with_param_values(param_values)
             .unwrap_err()
             .strip_backtrace(),
-        @r###"
-        Error during planning: Expected 0 parameters, got 1
-        "###
+        @"Error during planning: Expected 0 parameters, got 1"
     );
 }
 
@@ -304,7 +295,7 @@ fn test_prepare_statement_to_plan_params_as_constants() {
         EmptyRelation: rows=1
     "#
     );
-    assert_snapshot!(dt, @r#"Int32"#);
+    assert_snapshot!(dt, @"Int32");
 
     ///////////////////
     // replace params with values
@@ -329,7 +320,7 @@ fn test_prepare_statement_to_plan_params_as_constants() {
         EmptyRelation: rows=1
     "#
     );
-    assert_snapshot!(dt, @r#"Int32"#);
+    assert_snapshot!(dt, @"Int32");
 
     ///////////////////
     // replace params with values
@@ -354,7 +345,7 @@ fn test_prepare_statement_to_plan_params_as_constants() {
         EmptyRelation: rows=1
     "#
     );
-    assert_snapshot!(dt, @r#"Int32, Float64"#);
+    assert_snapshot!(dt, @"Int32, Float64");
 
     ///////////////////
     // replace params with values
@@ -375,8 +366,7 @@ fn test_prepare_statement_to_plan_params_as_constants() {
 #[test]
 fn test_infer_types_from_join() {
     let test = ParameterTest {
-        sql:
-            "SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1",
+        sql: "SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1",
         expected_types: vec![("$1", Some(DataType::Int32))],
         param_values: vec![ScalarValue::Int32(Some(10))],
     };
@@ -403,7 +393,7 @@ fn test_prepare_statement_infer_types_from_join() {
     let test = ParameterTest {
         sql: "PREPARE my_plan AS SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1",
         expected_types: vec![("$1", Some(DataType::Int32))],
-        param_values: vec![ScalarValue::Int32(Some(10))]
+        param_values: vec![ScalarValue::Int32(Some(10))],
     };
 
     assert_snapshot!(
@@ -527,7 +517,7 @@ fn test_infer_types_subquery() {
     let test = ParameterTest {
         sql: "SELECT id, age FROM person WHERE age = (select max(age) from person where id = $1)",
         expected_types: vec![("$1", Some(DataType::UInt32))],
-        param_values: vec![ScalarValue::UInt32(Some(10))]
+        param_values: vec![ScalarValue::UInt32(Some(10))],
     };
 
     assert_snapshot!(
@@ -560,7 +550,7 @@ fn test_prepare_statement_infer_types_subquery() {
     let test = ParameterTest {
         sql: "PREPARE my_plan AS SELECT id, age FROM person WHERE age = (select max(age) from person where id = $1)",
         expected_types: vec![("$1", Some(DataType::UInt32))],
-        param_values: vec![ScalarValue::UInt32(Some(10))]
+        param_values: vec![ScalarValue::UInt32(Some(10))],
     };
 
     assert_snapshot!(
@@ -690,7 +680,7 @@ fn test_prepare_statement_insert_infer() {
             ScalarValue::UInt32(Some(1)),
             ScalarValue::from("Alan"),
             ScalarValue::from("Turing"),
-        ]
+        ],
     };
     assert_snapshot!(
         test.run(),
@@ -721,7 +711,7 @@ fn test_prepare_statement_to_plan_one_param() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Int32"#);
+    assert_snapshot!(dt, @"Int32");
 
     ///////////////////
     // replace params with values
@@ -788,7 +778,7 @@ fn test_update_infer_with_metadata() {
     let test = ParameterTestWithMetadata {
         sql: "PREPARE my_plan AS update person_with_uuid_extension set last_name=$1 where id=$2",
         expected_types,
-        param_values
+        param_values,
     };
 
     assert_snapshot!(
@@ -839,7 +829,7 @@ fn test_insert_infer_with_metadata() {
     let test = ParameterTestWithMetadata {
         sql: "insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
         expected_types: expected_types.clone(),
-        param_values: param_values.clone()
+        param_values: param_values.clone(),
     };
 
     assert_snapshot!(
@@ -860,7 +850,7 @@ fn test_insert_infer_with_metadata() {
     let test = ParameterTestWithMetadata {
         sql: "PREPARE my_plan AS insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
         expected_types,
-        param_values
+        param_values,
     };
 
     assert_snapshot!(
@@ -895,7 +885,7 @@ fn test_prepare_statement_to_plan_data_type() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Float64"#);
+    assert_snapshot!(dt, @"Float64");
 
     ///////////////////
     // replace params with values still succeed and use Float64
@@ -928,7 +918,7 @@ fn test_prepare_statement_to_plan_multi_params() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Int32, Utf8View, Float64, Int32, Float64, Utf8View"#);
+    assert_snapshot!(dt, @"Int32, Utf8View, Float64, Int32, Float64, Utf8View");
 
     ///////////////////
     // replace params with values
@@ -973,7 +963,7 @@ fn test_prepare_statement_to_plan_having() {
               TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Int32, Float64, Float64, Float64"#);
+    assert_snapshot!(dt, @"Int32, Float64, Float64, Float64");
 
     ///////////////////
     // replace params with values
@@ -987,13 +977,13 @@ fn test_prepare_statement_to_plan_having() {
     let plan_with_params = plan.with_param_values(param_values).unwrap();
     assert_snapshot!(
         plan_with_params,
-        @r#"
+        @r"
     Projection: person.id, sum(person.age)
       Filter: sum(person.age) < Int32(10) AND sum(person.age) > Int64(10) OR sum(person.age) IN ([Float64(200), Float64(300)])
         Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]]
           Filter: person.salary > Float64(100)
             TableScan: person
-    "#
+    "
     );
 }
 
@@ -1012,18 +1002,18 @@ fn test_prepare_statement_to_plan_limit() {
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"Int64, Int64"#);
+    assert_snapshot!(dt, @"Int64, Int64");
 
     // replace params with values
     let param_values = vec![ScalarValue::Int64(Some(10)), ScalarValue::Int64(Some(200))];
     let plan_with_params = plan.with_param_values(param_values).unwrap();
     assert_snapshot!(
         plan_with_params,
-        @r#"
+        @r"
     Limit: skip=10, fetch=200
       Projection: person.id
         TableScan: person
-    "#
+    "
     );
 }
 
@@ -1058,5 +1048,8 @@ fn test_prepare_statement_bad_list_idx() {
     let param_values = ParamValues::List(vec![]);
 
     let err = plan.replace_params_with_values(&param_values).unwrap_err();
-    assert_contains!(err.to_string(), "Error during planning: Failed to parse placeholder id: invalid digit found in string");
+    assert_contains!(
+        err.to_string(),
+        "Error during planning: Failed to parse placeholder id: invalid digit found in string"
+    );
 }
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 5f76afb763cff..46a42ae534af0 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -18,17 +18,17 @@
 use arrow::datatypes::{DataType, Field, Schema};
 
 use datafusion_common::{
-    assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
-    TableReference,
+    Column, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference,
+    assert_contains,
 };
 use datafusion_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion_expr::test::function_stub::{
     count_udaf, max_udaf, min_udaf, sum, sum_udaf,
 };
 use datafusion_expr::{
-    cast, col, lit, table_scan, wildcard, EmptyRelation, Expr, Extension, LogicalPlan,
-    LogicalPlanBuilder, Union, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
-    WindowFrame, WindowFunctionDefinition,
+    EmptyRelation, Expr, Extension, LogicalPlan, LogicalPlanBuilder, Union,
+    UserDefinedLogicalNode, UserDefinedLogicalNodeCore, WindowFrame,
+    WindowFunctionDefinition, cast, col, lit, table_scan, wildcard,
 };
 use datafusion_functions::unicode;
 use datafusion_functions_aggregate::grouping::grouping_udaf;
@@ -41,7 +41,7 @@ use datafusion_sql::unparser::dialect::{
     DefaultDialect, Dialect as UnparserDialect, MySqlDialect as UnparserMySqlDialect,
     PostgreSqlDialect as UnparserPostgreSqlDialect, SqliteDialect,
 };
-use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser};
+use datafusion_sql::unparser::{Unparser, expr_to_sql, plan_to_sql};
 use insta::assert_snapshot;
 use sqlparser::ast::Statement;
 use std::hash::Hash;
@@ -70,26 +70,26 @@ use sqlparser::parser::Parser;
 #[test]
 fn test_roundtrip_expr_1() {
     let expr = roundtrip_expr(TableReference::bare("person"), "age > 35").unwrap();
-    assert_snapshot!(expr, @r#"(age > 35)"#);
+    assert_snapshot!(expr, @"(age > 35)");
 }
 
 #[test]
 fn test_roundtrip_expr_2() {
     let expr = roundtrip_expr(TableReference::bare("person"), "id = '10'").unwrap();
-    assert_snapshot!(expr, @r#"(id = '10')"#);
+    assert_snapshot!(expr, @"(id = '10')");
 }
 
 #[test]
 fn test_roundtrip_expr_3() {
     let expr =
         roundtrip_expr(TableReference::bare("person"), "CAST(id AS VARCHAR)").unwrap();
-    assert_snapshot!(expr, @r#"CAST(id AS VARCHAR)"#);
+    assert_snapshot!(expr, @"CAST(id AS VARCHAR)");
 }
 
 #[test]
 fn test_roundtrip_expr_4() {
     let expr = roundtrip_expr(TableReference::bare("person"), "sum((age * 2))").unwrap();
-    assert_snapshot!(expr, @r#"sum((age * 2))"#);
+    assert_snapshot!(expr, @"sum((age * 2))");
 }
 
 fn roundtrip_expr(table: TableReference, sql: &str) -> Result<String> {
@@ -334,9 +334,7 @@ fn roundtrip_statement_with_dialect_1() -> Result<(), DataFusionError> {
         sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
         parser_dialect: MySqlDialect {},
         unparser_dialect: UnparserMySqlDialect {},
-        // top projection sort gets derived into a subquery
-        // for MySQL, this subquery needs an alias
-        expected: @"SELECT `j1_min` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, min(`ta`.`j1_id`) FROM `j1` AS `ta` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
+        expected: @"SELECT min(`ta`.`j1_id`) AS `j1_min` FROM `j1` AS `ta` ORDER BY `j1_min` ASC LIMIT 10",
     );
     Ok(())
 }
@@ -347,9 +345,7 @@ fn roundtrip_statement_with_dialect_2() -> Result<(), DataFusionError> {
         sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        // top projection sort still gets derived into a subquery in default dialect
-        // except for the default dialect, the subquery is left non-aliased
-        expected: @"SELECT j1_min FROM (SELECT min(ta.j1_id) AS j1_min, min(ta.j1_id) FROM j1 AS ta ORDER BY min(ta.j1_id) ASC NULLS LAST) LIMIT 10",
+        expected: @"SELECT min(ta.j1_id) AS j1_min FROM j1 AS ta ORDER BY j1_min ASC NULLS LAST LIMIT 10",
     );
     Ok(())
 }
@@ -360,7 +356,7 @@ fn roundtrip_statement_with_dialect_3() -> Result<(), DataFusionError> {
         sql: "select min(ta.j1_id) as j1_min, max(tb.j1_max) from j1 ta, (select distinct max(ta.j1_id) as j1_max from j1 ta order by max(ta.j1_id)) tb order by min(ta.j1_id) limit 10;",
         parser_dialect: MySqlDialect {},
         unparser_dialect: UnparserMySqlDialect {},
-        expected: @"SELECT `j1_min`, `max(tb.j1_max)` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, max(`tb`.`j1_max`), min(`ta`.`j1_id`) FROM `j1` AS `ta` CROSS JOIN (SELECT `j1_max` FROM (SELECT DISTINCT max(`ta`.`j1_id`) AS `j1_max` FROM `j1` AS `ta`) AS `derived_distinct`) AS `tb` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
+        expected: @"SELECT min(`ta`.`j1_id`) AS `j1_min`, max(`tb`.`j1_max`) FROM `j1` AS `ta` CROSS JOIN (SELECT DISTINCT max(`ta`.`j1_id`) AS `j1_max` FROM `j1` AS `ta`) AS `tb` ORDER BY `j1_min` ASC LIMIT 10",
     );
     Ok(())
 }
@@ -404,7 +400,7 @@ fn roundtrip_statement_with_dialect_7() -> Result<(), DataFusionError> {
         sql: "select ta.j1_id from j1 ta order by j1_id limit 10;",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT ta.j1_id FROM j1 AS ta ORDER BY ta.j1_id ASC NULLS LAST LIMIT 10"#,
+        expected: @"SELECT ta.j1_id FROM j1 AS ta ORDER BY ta.j1_id ASC NULLS LAST LIMIT 10",
     );
     Ok(())
 }
@@ -419,7 +415,7 @@ fn roundtrip_statement_with_dialect_8() -> Result<(), DataFusionError> {
                   LIMIT 10;",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT j1.j1_id FROM j1 UNION ALL SELECT tb.j2_id AS j1_id FROM j2 AS tb ORDER BY j1_id ASC NULLS LAST LIMIT 10"#,
+        expected: @"SELECT j1.j1_id FROM j1 UNION ALL SELECT tb.j2_id AS j1_id FROM j2 AS tb ORDER BY j1_id ASC NULLS LAST LIMIT 10",
     );
     Ok(())
 }
@@ -431,7 +427,7 @@ fn roundtrip_statement_with_dialect_9() -> Result<(), DataFusionError> {
         sql: "SELECT j1_string from j1 order by j1_id",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT j1.j1_string FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+        expected: @"SELECT j1.j1_string FROM j1 ORDER BY j1.j1_id ASC NULLS LAST",
     );
     Ok(())
 }
@@ -442,7 +438,7 @@ fn roundtrip_statement_with_dialect_10() -> Result<(), DataFusionError> {
         sql: "SELECT j1_string AS a from j1 order by j1_id",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT j1.j1_string AS a FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+        expected: @"SELECT j1.j1_string AS a FROM j1 ORDER BY j1.j1_id ASC NULLS LAST",
     );
     Ok(())
 }
@@ -453,7 +449,7 @@ fn roundtrip_statement_with_dialect_11() -> Result<(), DataFusionError> {
         sql: "SELECT j1_string from j1 join j2 on j1.j1_id = j2.j2_id order by j1_id",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT j1.j1_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#,
+        expected: @"SELECT j1.j1_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST",
     );
     Ok(())
 }
@@ -483,7 +479,7 @@ fn roundtrip_statement_with_dialect_12() -> Result<(), DataFusionError> {
                   abc.j2_string",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+        expected: @"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST",
     );
     Ok(())
 }
@@ -505,7 +501,7 @@ fn roundtrip_statement_with_dialect_13() -> Result<(), DataFusionError> {
             ",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT OUTER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
+        expected: @"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT OUTER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)",
     );
     Ok(())
 }
@@ -539,7 +535,7 @@ fn roundtrip_statement_with_dialect_14() -> Result<(), DataFusionError> {
                   abc.j2_string",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+        expected: @"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST",
     );
     Ok(())
 }
@@ -569,7 +565,7 @@ fn roundtrip_statement_with_dialect_15() -> Result<(), DataFusionError> {
                   j2_string",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+        expected: @"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST",
     );
     Ok(())
 }
@@ -580,7 +576,7 @@ fn roundtrip_statement_with_dialect_16() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)",
     );
     Ok(())
 }
@@ -591,7 +587,7 @@ fn roundtrip_statement_with_dialect_17() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
+        expected: @"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c",
     );
     Ok(())
 }
@@ -603,7 +599,7 @@ fn roundtrip_statement_with_dialect_18() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT j1_id + 1 * 3 from j1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT (j1.j1_id + (1 * 3)) FROM j1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT (j1.j1_id + (1 * 3)) FROM j1) AS c (id)",
     );
     Ok(())
 }
@@ -615,7 +611,7 @@ fn roundtrip_statement_with_dialect_19() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT distinct (j1_id + 1 * 3) FROM j1 LIMIT 1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT DISTINCT (j1.j1_id + (1 * 3)) FROM j1 LIMIT 1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT DISTINCT (j1.j1_id + (1 * 3)) FROM j1 LIMIT 1) AS c (id)",
     );
     Ok(())
 }
@@ -626,7 +622,7 @@ fn roundtrip_statement_with_dialect_20() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT j1_id + 1 FROM j1 ORDER BY j1_id DESC LIMIT 1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT (j1.j1_id + 1) FROM j1 ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT (j1.j1_id + 1) FROM j1 ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 1) AS c (id)",
     );
     Ok(())
 }
@@ -637,7 +633,7 @@ fn roundtrip_statement_with_dialect_21() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT CAST((CAST(j1_id as BIGINT) + 1) as int) * 10 FROM j1 LIMIT 1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT (CAST((CAST(j1.j1_id AS BIGINT) + 1) AS INTEGER) * 10) FROM j1 LIMIT 1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT (CAST((CAST(j1.j1_id AS BIGINT) + 1) AS INTEGER) * 10) FROM j1 LIMIT 1) AS c (id)",
     );
     Ok(())
 }
@@ -648,7 +644,7 @@ fn roundtrip_statement_with_dialect_22() -> Result<(), DataFusionError> {
         sql: "SELECT id FROM (SELECT CAST(j1_id as BIGINT) + 1 FROM j1 ORDER BY j1_id LIMIT 1) AS c (id)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT c.id FROM (SELECT (CAST(j1.j1_id AS BIGINT) + 1) FROM j1 ORDER BY j1.j1_id ASC NULLS LAST LIMIT 1) AS c (id)"#,
+        expected: @"SELECT c.id FROM (SELECT (CAST(j1.j1_id AS BIGINT) + 1) FROM j1 ORDER BY j1.j1_id ASC NULLS LAST LIMIT 1) AS c (id)",
     );
     Ok(())
 }
@@ -659,7 +655,7 @@ fn roundtrip_statement_with_dialect_23() -> Result<(), DataFusionError> {
         sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
         parser_dialect: GenericDialect {},
         unparser_dialect: UnparserDefaultDialect {},
-        expected: @r#"SELECT temp_j.id2 FROM (SELECT j1.j1_id, j1.j1_string FROM j1) AS temp_j (id2, string2)"#,
+        expected: @"SELECT temp_j.id2 FROM (SELECT j1.j1_id, j1.j1_string FROM j1) AS temp_j (id2, string2)",
     );
     Ok(())
 }
@@ -670,7 +666,7 @@ fn roundtrip_statement_with_dialect_24() -> Result<(), DataFusionError> {
         sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
         parser_dialect: GenericDialect {},
         unparser_dialect: SqliteDialect {},
-        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2`, `j1`.`j1_string` AS `string2` FROM `j1`) AS `temp_j`"#,
+        expected: @"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2`, `j1`.`j1_string` AS `string2` FROM `j1`) AS `temp_j`",
     );
     Ok(())
 }
@@ -681,7 +677,7 @@ fn roundtrip_statement_with_dialect_25() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM (SELECT j1_id + 1 FROM j1) AS temp_j(id2)",
         parser_dialect: GenericDialect {},
         unparser_dialect: SqliteDialect {},
-        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`"#,
+        expected: @"SELECT `temp_j`.`id2` FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`",
     );
     Ok(())
 }
@@ -692,7 +688,7 @@ fn roundtrip_statement_with_dialect_26() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM (SELECT j1_id FROM j1 LIMIT 1) AS temp_j(id2)",
         parser_dialect: GenericDialect {},
         unparser_dialect: SqliteDialect {},
-        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`"#,
+        expected: @"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`",
     );
     Ok(())
 }
@@ -761,7 +757,7 @@ fn roundtrip_statement_with_dialect_32() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM UNNEST([1,2,3])",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#,
+        expected: @"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])",
     );
     Ok(())
 }
@@ -786,7 +782,7 @@ fn roundtrip_statement_with_dialect_34() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#,
+        expected: @"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)",
     );
     Ok(())
 }
@@ -800,7 +796,7 @@ fn roundtrip_statement_with_dialect_35() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM UNNEST([1,2,3]), j1",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#,
+        expected: @"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1",
     );
     Ok(())
 }
@@ -814,7 +810,7 @@ fn roundtrip_statement_with_dialect_36() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#,
+        expected: @"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)",
     );
     Ok(())
 }
@@ -828,7 +824,7 @@ fn roundtrip_statement_with_dialect_37() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT u.c1 FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT u.c1 FROM UNNEST([4, 5, 6]) AS u (c1)"#,
+        expected: @"SELECT u.c1 FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT u.c1 FROM UNNEST([4, 5, 6]) AS u (c1)",
     );
     Ok(())
 }
@@ -842,7 +838,7 @@ fn roundtrip_statement_with_dialect_38() -> Result<(), DataFusionError> {
         sql: "SELECT UNNEST([1,2,3])",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT * FROM UNNEST([1, 2, 3])"#,
+        expected: @"SELECT * FROM UNNEST([1, 2, 3])",
     );
     Ok(())
 }
@@ -856,7 +852,7 @@ fn roundtrip_statement_with_dialect_39() -> Result<(), DataFusionError> {
         sql: "SELECT UNNEST([1,2,3]) as c1",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT UNNEST([1, 2, 3]) AS c1"#,
+        expected: @"SELECT UNNEST([1, 2, 3]) AS c1",
     );
     Ok(())
 }
@@ -870,7 +866,7 @@ fn roundtrip_statement_with_dialect_40() -> Result<(), DataFusionError> {
         sql: "SELECT UNNEST([1,2,3]), 1",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))), Int64(1)"#,
+        expected: @"SELECT UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))), Int64(1)",
     );
     Ok(())
 }
@@ -884,7 +880,7 @@ fn roundtrip_statement_with_dialect_41() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
+        expected: @"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)",
     );
     Ok(())
 }
@@ -898,7 +894,7 @@ fn roundtrip_statement_with_dialect_42() -> Result<(), DataFusionError> {
         sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#,
+        expected: @"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)",
     );
     Ok(())
 }
@@ -912,7 +908,7 @@ fn roundtrip_statement_with_dialect_43() -> Result<(), DataFusionError> {
         sql: "SELECT unnest([1, 2, 3, 4]) from unnest([1, 2, 3]);",
         parser_dialect: GenericDialect {},
         unparser_dialect: unparser,
-        expected: @r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])"#,
+        expected: @"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])",
     );
     Ok(())
 }
@@ -934,25 +930,25 @@ fn roundtrip_statement_with_dialect_special_char_alias() -> Result<(), DataFusio
         sql: "select min(a) as \"min(a)\" from (select 1 as a)",
         parser_dialect: GenericDialect {},
         unparser_dialect: BigQueryDialect {},
-        expected: @r#"SELECT min(`a`) AS `min_40a_41` FROM (SELECT 1 AS `a`)"#,
+        expected: @"SELECT min(`a`) AS `min_40a_41` FROM (SELECT 1 AS `a`)",
     );
     roundtrip_statement_with_dialect_helper!(
         sql: "select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)",
         parser_dialect: GenericDialect {},
         unparser_dialect: BigQueryDialect {},
-        expected: @r#"SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)"#,
+        expected: @"SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)",
     );
     roundtrip_statement_with_dialect_helper!(
         sql: "select a as \"a*\", b , c as \"c@\" from (select 1 as a , 2 as b, 3 as c)",
         parser_dialect: GenericDialect {},
         unparser_dialect: BigQueryDialect {},
-        expected: @r#"SELECT `a` AS `a_42`, `b`, `c` AS `c_64` FROM (SELECT 1 AS `a`, 2 AS `b`, 3 AS `c`)"#,
+        expected: @"SELECT `a` AS `a_42`, `b`, `c` AS `c_64` FROM (SELECT 1 AS `a`, 2 AS `b`, 3 AS `c`)",
     );
     roundtrip_statement_with_dialect_helper!(
         sql: "select * from (select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)) where \"a*\" = 1",
         parser_dialect: GenericDialect {},
         unparser_dialect: BigQueryDialect {},
-        expected: @r#"SELECT `a_42`, `b_64` FROM (SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)) WHERE (`a_42` = 1)"#,
+        expected: @"SELECT `a_42`, `b_64` FROM (SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)) WHERE (`a_42` = 1)",
     );
     roundtrip_statement_with_dialect_helper!(
         sql: "select * from (select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)) where \"a*\" = 1",
@@ -979,11 +975,12 @@ fn test_unnest_logical_plan() -> Result<()> {
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: __unnest_placeholder(unnest_table.struct_col).field1, __unnest_placeholder(unnest_table.struct_col).field2, __unnest_placeholder(unnest_table.array_col,depth=1) AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
-  Unnest: lists[__unnest_placeholder(unnest_table.array_col)|depth=1] structs[__unnest_placeholder(unnest_table.struct_col)]
-    Projection: unnest_table.struct_col AS __unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS __unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
-      TableScan: unnest_table"#
+        @r"
+    Projection: __unnest_placeholder(unnest_table.struct_col).field1, __unnest_placeholder(unnest_table.struct_col).field2, __unnest_placeholder(unnest_table.array_col,depth=1) AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
+      Unnest: lists[__unnest_placeholder(unnest_table.array_col)|depth=1] structs[__unnest_placeholder(unnest_table.struct_col)]
+        Projection: unnest_table.struct_col AS __unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS __unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
+          TableScan: unnest_table
+    "
     );
 
     Ok(())
@@ -1237,6 +1234,84 @@ fn test_table_scan_with_empty_projection_in_plan_to_sql_3() {
     );
 }
 
+#[test]
+fn test_table_scan_with_empty_projection_in_plan_to_sql_postgres() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name,
+        schema,
+        Some(vec![]),
+    );
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT FROM "table""#
+    );
+}
+
+#[test]
+fn test_table_scan_with_empty_projection_in_plan_to_sql_default_dialect() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name,
+        schema,
+        Some(vec![]),
+    );
+    let unparser = Unparser::new(&UnparserDefaultDialect {});
+    let sql = unparser.plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT 1 FROM "table""#
+    );
+}
+
+#[test]
+fn test_table_scan_with_empty_projection_and_filter_postgres() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_filter_and_fetch(
+        Some(table_name),
+        &schema,
+        Some(vec![]),
+        vec![col("id").gt(lit(10))],
+        None,
+    )
+    .unwrap()
+    .build()
+    .unwrap();
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT FROM "table" WHERE ("table"."id" > 10)"#
+    );
+}
+
+#[test]
+fn test_table_scan_with_empty_projection_and_filter_default_dialect() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_filter_and_fetch(
+        Some(table_name),
+        &schema,
+        Some(vec![]),
+        vec![col("id").gt(lit(10))],
+        None,
+    )
+    .unwrap()
+    .build()
+    .unwrap();
+    let unparser = Unparser::new(&UnparserDefaultDialect {});
+    let sql = unparser.plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT 1 FROM "table" WHERE ("table".id > 10)"#
+    );
+}
+
 fn table_scan_with_empty_projection_and_none_projection_helper(
     table_name: &str,
     table_schema: Schema,
@@ -1390,7 +1465,7 @@ fn test_table_scan_alias() -> Result<()> {
     let sql = plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT * FROM (SELECT t1.id FROM t1 WHERE (t1.id > 5)) AS a"#
+        @"SELECT * FROM (SELECT t1.id FROM t1 WHERE (t1.id > 5)) AS a"
     );
 
     let table_scan_with_two_filter = table_scan_with_filters(
@@ -1405,7 +1480,7 @@ fn test_table_scan_alias() -> Result<()> {
     let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
     assert_snapshot!(
         table_scan_with_two_filter,
-        @r#"SELECT a.id FROM t1 AS a WHERE ((a.id > 1) AND (a.age < 2))"#
+        @"SELECT a.id FROM t1 AS a WHERE ((a.id > 1) AND (a.age < 2))"
     );
 
     let table_scan_with_fetch =
@@ -1416,7 +1491,7 @@ fn test_table_scan_alias() -> Result<()> {
     let table_scan_with_fetch = plan_to_sql(&table_scan_with_fetch)?;
     assert_snapshot!(
         table_scan_with_fetch,
-        @r#"SELECT a.id FROM (SELECT * FROM t1 LIMIT 10) AS a"#
+        @"SELECT a.id FROM (SELECT * FROM t1 LIMIT 10) AS a"
     );
 
     let table_scan_with_pushdown_all = table_scan_with_filter_and_fetch(
@@ -1432,7 +1507,7 @@ fn test_table_scan_alias() -> Result<()> {
     let table_scan_with_pushdown_all = plan_to_sql(&table_scan_with_pushdown_all)?;
     assert_snapshot!(
         table_scan_with_pushdown_all,
-        @r#"SELECT a.id FROM (SELECT a.id, a.age FROM t1 AS a WHERE (a.id > 1) LIMIT 10) AS a"#
+        @"SELECT a.id FROM (SELECT a.id, a.age FROM t1 AS a WHERE (a.id > 1) LIMIT 10) AS a"
     );
     Ok(())
 }
@@ -1448,21 +1523,21 @@ fn test_table_scan_pushdown() -> Result<()> {
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
     assert_snapshot!(
         scan_with_projection,
-        @r#"SELECT t1.id, t1.age FROM t1"#
+        @"SELECT t1.id, t1.age FROM t1"
     );
 
     let scan_with_projection = table_scan(Some("t1"), &schema, Some(vec![1]))?.build()?;
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
     assert_snapshot!(
         scan_with_projection,
-        @r#"SELECT t1.age FROM t1"#
+        @"SELECT t1.age FROM t1"
     );
 
     let scan_with_no_projection = table_scan(Some("t1"), &schema, None)?.build()?;
     let scan_with_no_projection = plan_to_sql(&scan_with_no_projection)?;
     assert_snapshot!(
         scan_with_no_projection,
-        @r#"SELECT * FROM t1"#
+        @"SELECT * FROM t1"
     );
 
     let table_scan_with_projection_alias =
@@ -1473,7 +1548,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_projection_alias)?;
     assert_snapshot!(
         table_scan_with_projection_alias,
-        @r#"SELECT ta.id, ta.age FROM t1 AS ta"#
+        @"SELECT ta.id, ta.age FROM t1 AS ta"
     );
 
     let table_scan_with_projection_alias =
@@ -1484,7 +1559,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_projection_alias)?;
     assert_snapshot!(
         table_scan_with_projection_alias,
-        @r#"SELECT ta.age FROM t1 AS ta"#
+        @"SELECT ta.age FROM t1 AS ta"
     );
 
     let table_scan_with_no_projection_alias = table_scan(Some("t1"), &schema, None)?
@@ -1494,7 +1569,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_no_projection_alias)?;
     assert_snapshot!(
         table_scan_with_no_projection_alias,
-        @r#"SELECT * FROM t1 AS ta"#
+        @"SELECT * FROM t1 AS ta"
     );
 
     let query_from_table_scan_with_projection = LogicalPlanBuilder::from(
@@ -1506,7 +1581,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&query_from_table_scan_with_projection)?;
     assert_snapshot!(
         query_from_table_scan_with_projection,
-        @r#"SELECT t1.id, t1.age FROM t1"#
+        @"SELECT t1.id, t1.age FROM t1"
     );
 
     let query_from_table_scan_with_two_projections = LogicalPlanBuilder::from(
@@ -1519,7 +1594,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&query_from_table_scan_with_two_projections)?;
     assert_snapshot!(
         query_from_table_scan_with_two_projections,
-        @r#"SELECT t1.id, t1.age FROM (SELECT t1.id, t1.age FROM t1)"#
+        @"SELECT t1.id, t1.age FROM (SELECT t1.id, t1.age FROM t1)"
     );
 
     let table_scan_with_filter = table_scan_with_filters(
@@ -1532,7 +1607,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_filter = plan_to_sql(&table_scan_with_filter)?;
     assert_snapshot!(
         table_scan_with_filter,
-        @r#"SELECT * FROM t1 WHERE (t1.id > t1.age)"#
+        @"SELECT * FROM t1 WHERE (t1.id > t1.age)"
     );
 
     let table_scan_with_two_filter = table_scan_with_filters(
@@ -1545,7 +1620,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
     assert_snapshot!(
         table_scan_with_two_filter,
-        @r#"SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"#
+        @"SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"
     );
 
     let table_scan_with_filter_alias = table_scan_with_filters(
@@ -1559,7 +1634,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
     assert_snapshot!(
         table_scan_with_filter_alias,
-        @r#"SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"#
+        @"SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
     );
 
     let table_scan_with_projection_and_filter = table_scan_with_filters(
@@ -1573,7 +1648,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_projection_and_filter)?;
     assert_snapshot!(
         table_scan_with_projection_and_filter,
-        @r#"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age)"#
+        @"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age)"
     );
 
     let table_scan_with_projection_and_filter = table_scan_with_filters(
@@ -1587,7 +1662,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_projection_and_filter)?;
     assert_snapshot!(
         table_scan_with_projection_and_filter,
-        @r#"SELECT t1.age FROM t1 WHERE (t1.id > t1.age)"#
+        @"SELECT t1.age FROM t1 WHERE (t1.id > t1.age)"
     );
 
     let table_scan_with_inline_fetch =
@@ -1596,7 +1671,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_inline_fetch = plan_to_sql(&table_scan_with_inline_fetch)?;
     assert_snapshot!(
         table_scan_with_inline_fetch,
-        @r#"SELECT * FROM t1 LIMIT 10"#
+        @"SELECT * FROM t1 LIMIT 10"
     );
 
     let table_scan_with_projection_and_inline_fetch = table_scan_with_filter_and_fetch(
@@ -1611,7 +1686,7 @@ fn test_table_scan_pushdown() -> Result<()> {
         plan_to_sql(&table_scan_with_projection_and_inline_fetch)?;
     assert_snapshot!(
         table_scan_with_projection_and_inline_fetch,
-        @r#"SELECT t1.id, t1.age FROM t1 LIMIT 10"#
+        @"SELECT t1.id, t1.age FROM t1 LIMIT 10"
     );
 
     let table_scan_with_all = table_scan_with_filter_and_fetch(
@@ -1625,7 +1700,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_all = plan_to_sql(&table_scan_with_all)?;
     assert_snapshot!(
         table_scan_with_all,
-        @r#"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10"#
+        @"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10"
     );
 
     let table_scan_with_additional_filter = table_scan_with_filters(
@@ -1639,7 +1714,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_filter = plan_to_sql(&table_scan_with_additional_filter)?;
     assert_snapshot!(
         table_scan_with_filter,
-        @r#"SELECT * FROM t1 WHERE (t1.id = 5) AND (t1.id > t1.age)"#
+        @"SELECT * FROM t1 WHERE (t1.id = 5) AND (t1.id > t1.age)"
     );
 
     Ok(())
@@ -1660,7 +1735,7 @@ fn test_sort_with_push_down_fetch() -> Result<()> {
     let sql = plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT t1.id, t1.age FROM t1 ORDER BY t1.age ASC NULLS FIRST LIMIT 10"#
+        @"SELECT t1.id, t1.age FROM t1 ORDER BY t1.age ASC NULLS FIRST LIMIT 10"
     );
     Ok(())
 }
@@ -1790,7 +1865,7 @@ fn test_interval_lhs_eq() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')"#
+        @"SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')"
     )
 }
 
@@ -1802,7 +1877,7 @@ fn test_interval_lhs_lt() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')"#
+        @"SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')"
     )
 }
 
@@ -1811,7 +1886,7 @@ fn test_without_offset() {
     let statement = generate_round_trip_statement(MySqlDialect {}, "select 1");
     assert_snapshot!(
         statement,
-        @r#"SELECT 1"#
+        @"SELECT 1"
     )
 }
 
@@ -1820,7 +1895,7 @@ fn test_with_offset0() {
     let statement = generate_round_trip_statement(MySqlDialect {}, "select 1 offset 0");
     assert_snapshot!(
         statement,
-        @r#"SELECT 1 OFFSET 0"#
+        @"SELECT 1 OFFSET 0"
     )
 }
 
@@ -1829,7 +1904,7 @@ fn test_with_offset95() {
     let statement = generate_round_trip_statement(MySqlDialect {}, "select 1 offset 95");
     assert_snapshot!(
         statement,
-        @r#"SELECT 1 OFFSET 95"#
+        @"SELECT 1 OFFSET 95"
     )
 }
 
@@ -1842,7 +1917,7 @@ fn test_order_by_to_sql_1() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT person.id, person.first_name, sum(person.id) FROM person GROUP BY person.id, person.first_name ORDER BY sum(person.id) ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#
+        @"SELECT person.id, person.first_name, sum(person.id) FROM person GROUP BY person.id, person.first_name ORDER BY sum(person.id) ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"
     );
 }
 
@@ -1855,7 +1930,7 @@ fn test_order_by_to_sql_2() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT person.id, person.first_name, sum(person.id) AS total_sum FROM person GROUP BY person.id, person.first_name ORDER BY total_sum ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#
+        @"SELECT person.id, person.first_name, sum(person.id) AS total_sum FROM person GROUP BY person.id, person.first_name ORDER BY total_sum ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"
     );
 }
 
@@ -1867,7 +1942,7 @@ fn test_order_by_to_sql_3() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT person.id, person.first_name, substr(person.first_name, 0, 5) FROM person ORDER BY person.id ASC NULLS LAST, substr(person.first_name, 0, 5) ASC NULLS LAST"#
+        @"SELECT person.id, person.first_name, substr(person.first_name, 0, 5) FROM person ORDER BY person.id ASC NULLS LAST, substr(person.first_name, 0, 5) ASC NULLS LAST"
     );
 }
 
@@ -1909,7 +1984,7 @@ fn test_complex_order_by_with_grouping() -> Result<()> {
     }, {
         assert_snapshot!(
             sql,
-            @r#"SELECT j1.j1_id, j1.j1_string, lochierarchy FROM (SELECT j1.j1_id, j1.j1_string, (grouping(j1.j1_id) + grouping(j1.j1_string)) AS lochierarchy, grouping(j1.j1_string), grouping(j1.j1_id) FROM j1 GROUP BY ROLLUP (j1.j1_id, j1.j1_string) ORDER BY (grouping(j1.j1_id) + grouping(j1.j1_string)) DESC NULLS FIRST, CASE WHEN ((grouping(j1.j1_id) + grouping(j1.j1_string)) = 0) THEN j1.j1_id END ASC NULLS LAST) LIMIT 100"#
+            @r#"SELECT j1.j1_id, j1.j1_string, lochierarchy FROM (SELECT j1.j1_id, j1.j1_string, (grouping(j1.j1_id) + grouping(j1.j1_string)) AS lochierarchy, grouping(j1.j1_string), grouping(j1.j1_id) FROM j1 GROUP BY ROLLUP (j1.j1_id, j1.j1_string)) ORDER BY lochierarchy DESC NULLS FIRST, CASE WHEN (("grouping(j1.j1_id)" + "grouping(j1.j1_string)") = 0) THEN j1.j1_id END ASC NULLS LAST LIMIT 100"#
         );
     });
 
@@ -1942,7 +2017,7 @@ fn test_unnest_to_sql_1() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT UNNEST(unnest_table.array_col) AS u1, unnest_table.struct_col, unnest_table.array_col FROM unnest_table WHERE (unnest_table.array_col <> NULL) ORDER BY unnest_table.struct_col ASC NULLS LAST, unnest_table.array_col ASC NULLS LAST"#
+        @"SELECT UNNEST(unnest_table.array_col) AS u1, unnest_table.struct_col, unnest_table.array_col FROM unnest_table WHERE (unnest_table.array_col <> NULL) ORDER BY unnest_table.struct_col ASC NULLS LAST, unnest_table.array_col ASC NULLS LAST"
     );
 }
 
@@ -1954,7 +2029,7 @@ fn test_unnest_to_sql_2() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT UNNEST([1, 2, 2, 5, NULL]) AS u1"#
+        @"SELECT UNNEST([1, 2, 2, 5, NULL]) AS u1"
     );
 }
 
@@ -1966,7 +2041,7 @@ fn test_join_with_no_conditions() {
     );
     assert_snapshot!(
         statement,
-        @r#"SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2"#
+        @"SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2"
     );
 }
 
@@ -2069,13 +2144,14 @@ fn test_unparse_extension_to_statement() -> Result<()> {
     let sql = unparser.plan_to_sql(&extension)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT j1.j1_id, j1.j1_string FROM j1"#
+        @"SELECT j1.j1_id, j1.j1_string FROM j1"
     );
 
     if let Some(err) = plan_to_sql(&extension).err() {
         assert_contains!(
             err.to_string(),
-            "This feature is not implemented: Unsupported extension node: MockUserDefinedLogicalPlan");
+            "This feature is not implemented: Unsupported extension node: MockUserDefinedLogicalPlan"
+        );
     } else {
         panic!("Expected error");
     }
@@ -2134,7 +2210,7 @@ fn test_unparse_extension_to_sql() -> Result<()> {
     let sql = unparser.plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id, j1.j1_string FROM j1)"#
+        @"SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id, j1.j1_string FROM j1)"
     );
 
     if let Some(err) = plan_to_sql(&plan).err() {
@@ -2175,15 +2251,13 @@ fn test_unparse_optimized_multi_union() -> Result<()> {
     });
     assert_snapshot!(
         unparser.plan_to_sql(&plan)?,
-        @r#"SELECT 1 AS x, 'a' AS y UNION ALL SELECT 1 AS x, 'b' AS y UNION ALL SELECT 2 AS x, 'a' AS y UNION ALL SELECT 2 AS x, 'c' AS y"#
+        @"SELECT 1 AS x, 'a' AS y UNION ALL SELECT 1 AS x, 'b' AS y UNION ALL SELECT 2 AS x, 'a' AS y UNION ALL SELECT 2 AS x, 'c' AS y"
     );
 
     let plan = LogicalPlan::Union(Union {
-        inputs: vec![project(
-            empty.clone(),
-            vec![lit(1).alias("x"), lit("a").alias("y")],
-        )?
-        .into()],
+        inputs: vec![
+            project(empty.clone(), vec![lit(1).alias("x"), lit("a").alias("y")])?.into(),
+        ],
         schema: dfschema.clone(),
     });
 
@@ -2256,7 +2330,7 @@ fn test_unparse_subquery_alias_with_table_pushdown() -> Result<()> {
     let sql = unparser.plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT customer_view.c_custkey, customer_view.c_name, customer_view.custkey_plus FROM (SELECT customer.c_custkey, (CAST(customer.c_custkey AS BIGINT) + 1) AS custkey_plus, customer.c_name FROM (SELECT customer.c_custkey, customer.c_name FROM customer AS customer) AS customer) AS customer_view"#
+        @"SELECT customer_view.c_custkey, customer_view.c_name, customer_view.custkey_plus FROM (SELECT customer.c_custkey, (CAST(customer.c_custkey AS BIGINT) + 1) AS custkey_plus, customer.c_name FROM (SELECT customer.c_custkey, customer.c_name FROM customer AS customer) AS customer) AS customer_view"
     );
     Ok(())
 }
@@ -2567,21 +2641,21 @@ fn test_unparse_window() -> Result<()> {
     let sql = unparser.plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+        @"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"
     );
 
     let unparser = Unparser::new(&SqliteDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+        @"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"
     );
 
     let unparser = Unparser::new(&DefaultDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
     assert_snapshot!(
         sql,
-        @r#"SELECT test.k, test.v, rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM test QUALIFY (rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1)"#
+        @"SELECT test.k, test.v, rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM test QUALIFY (rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1)"
     );
 
     // without table qualifier
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index 5d9fd9f2c3740..44dd7cec89cb6 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -24,7 +24,7 @@ use std::{sync::Arc, vec};
 use arrow::datatypes::*;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::file_options::file_type::FileType;
-use datafusion_common::{plan_err, DFSchema, GetExt, Result, TableReference};
+use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err};
 use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner};
 use datafusion_expr::{AggregateUDF, Expr, ScalarUDF, TableSource, WindowUDF};
 use datafusion_functions_nested::expr_fn::make_array;
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 96d9f23522f1f..969d56afdae0a 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// This lint violation is acceptable for tests, so suppress for now
+// Issue: <https://github.com/apache/datafusion/issues/18503>
+#![expect(clippy::needless_pass_by_value)]
+
 use std::any::Any;
 use std::hash::Hash;
 #[cfg(test)]
@@ -23,11 +27,11 @@ use std::vec;
 
 use arrow::datatypes::{TimeUnit::Nanosecond, *};
 use common::MockContextProvider;
-use datafusion_common::{assert_contains, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result, assert_contains};
 use datafusion_expr::{
-    col, logical_plan::LogicalPlan, test::function_stub::sum_udaf, ColumnarValue,
-    CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
-    Volatility,
+    ColumnarValue, CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF,
+    ScalarUDFImpl, Signature, Volatility, col, logical_plan::LogicalPlan,
+    test::function_stub::sum_udaf,
 };
 use datafusion_functions::{string, unicode};
 use datafusion_sql::{
@@ -38,10 +42,12 @@ use datafusion_sql::{
 use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState};
 use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::{
-    approx_median::approx_median_udaf, count::count_udaf, min_max::max_udaf,
-    min_max::min_udaf,
+    approx_median::approx_median_udaf,
+    average::avg_udaf,
+    count::count_udaf,
+    grouping::grouping_udaf,
+    min_max::{max_udaf, min_udaf},
 };
-use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf};
 use datafusion_functions_nested::make_array::make_array_udf;
 use datafusion_functions_window::{rank::rank_udwf, row_number::row_number_udwf};
 use insta::{allow_duplicates, assert_snapshot};
@@ -212,10 +218,10 @@ fn parse_ident_normalization_3() {
     let plan = logical_plan_with_options(sql, parser_option).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age
-          TableScan: person
-        "#
+        @r"
+    Projection: person.age
+      TableScan: person
+    "
     );
 }
 
@@ -226,10 +232,26 @@ fn parse_ident_normalization_4() {
     let plan = logical_plan_with_options(sql, parser_option).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age
-          TableScan: person
-        "#
+        @r"
+    Projection: person.age
+      TableScan: person
+    "
+    );
+}
+
+#[test]
+fn within_group_rejected_for_non_ordered_set_udaf() {
+    // MIN is order-sensitive by nature but does not implement the
+    // ordered-set `WITHIN GROUP` opt-in. The planner must reject
+    // explicit `WITHIN GROUP` syntax for functions that do not
+    // advertise `supports_within_group_clause()`.
+    let sql = "SELECT min(c1) WITHIN GROUP (ORDER BY c1) FROM person";
+    let err = logical_plan(sql)
+        .expect_err("expected planning to fail for MIN WITHIN GROUP")
+        .to_string();
+    assert_contains!(
+        err,
+        "WITHIN GROUP is only supported for ordered-set aggregate functions"
     );
 }
 
@@ -242,9 +264,7 @@ fn parse_ident_normalization_5() {
         .strip_backtrace();
     assert_snapshot!(
         plan,
-        @r#"
-        Error during planning: No table named: PERSON found
-        "#
+        @"Error during planning: No table named: PERSON found"
     );
 }
 
@@ -255,10 +275,10 @@ fn parse_ident_normalization_6() {
     let plan = logical_plan_with_options(sql, parser_option).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: UPPERCASE_test.Id
-          TableScan: UPPERCASE_test
-        "#
+        @r"
+    Projection: UPPERCASE_test.Id
+      TableScan: UPPERCASE_test
+    "
     );
 }
 
@@ -269,10 +289,10 @@ fn parse_ident_normalization_7() {
     let plan = logical_plan_with_options(sql, parser_option).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: UPPERCASE_test.Id, UPPERCASE_test.lower
-          TableScan: UPPERCASE_test
-        "#
+        @r"
+    Projection: UPPERCASE_test.Id, UPPERCASE_test.lower
+      TableScan: UPPERCASE_test
+    "
     );
 }
 
@@ -354,11 +374,11 @@ fn try_cast_from_aggregation() {
     let plan = logical_plan("SELECT TRY_CAST(sum(age) AS FLOAT) FROM person").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: TRY_CAST(sum(person.age) AS Float32)
-          Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: TRY_CAST(sum(person.age) AS Float32)
+      Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -370,7 +390,7 @@ fn cast_to_invalid_decimal_type_precision_0() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Error during planning: Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+        @"Error during planning: Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
     );
 }
 
@@ -396,7 +416,7 @@ fn cast_to_invalid_decimal_type_precision_gt_76() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Error during planning: Decimal(precision = 79, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+        @"Error during planning: Decimal(precision = 79, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
     );
 }
 
@@ -408,7 +428,7 @@ fn cast_to_invalid_decimal_type_precision_lt_scale() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Error during planning: Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+        @"Error during planning: Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 76`, and `scale <= precision`."
     );
 }
 
@@ -514,9 +534,7 @@ fn plan_start_transaction() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionStart: ReadWrite Serializable
-        "#
+        @"TransactionStart: ReadWrite Serializable"
     );
 }
 
@@ -526,9 +544,7 @@ fn plan_start_transaction_isolation() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionStart: ReadWrite ReadCommitted
-        "#
+        @"TransactionStart: ReadWrite ReadCommitted"
     );
 }
 
@@ -538,9 +554,7 @@ fn plan_start_transaction_read_only() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionStart: ReadOnly Serializable
-        "#
+        @"TransactionStart: ReadOnly Serializable"
     );
 }
 
@@ -550,9 +564,7 @@ fn plan_start_transaction_fully_qualified() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionStart: ReadOnly ReadCommitted
-        "#
+        @"TransactionStart: ReadOnly ReadCommitted"
     );
 }
 
@@ -566,9 +578,7 @@ isolation level repeatable read
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionStart: ReadOnly RepeatableRead
-        "#
+        @"TransactionStart: ReadOnly RepeatableRead"
     );
 }
 
@@ -578,9 +588,7 @@ fn plan_commit_transaction() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionEnd: Commit chain:=false
-        "#
+        @"TransactionEnd: Commit chain:=false"
     );
 }
 
@@ -590,9 +598,7 @@ fn plan_commit_transaction_chained() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionEnd: Commit chain:=true
-        "#
+        @"TransactionEnd: Commit chain:=true"
     );
 }
 
@@ -602,9 +608,7 @@ fn plan_rollback_transaction() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionEnd: Rollback chain:=false
-        "#
+        @"TransactionEnd: Rollback chain:=false"
     );
 }
 
@@ -614,9 +618,7 @@ fn plan_rollback_transaction_chained() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        TransactionEnd: Rollback chain:=true
-        "#
+        @"TransactionEnd: Rollback chain:=true"
     );
 }
 
@@ -626,10 +628,10 @@ fn plan_copy_to() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        CopyTo: format=csv output_url=output.csv options: ()
-          TableScan: test_decimal
-        "#
+        @r"
+    CopyTo: format=csv output_url=output.csv options: ()
+      TableScan: test_decimal
+    "
     );
 }
 
@@ -639,11 +641,11 @@ fn plan_explain_copy_to() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Explain
-          CopyTo: format=csv output_url=output.csv options: ()
-            TableScan: test_decimal
-        "#
+        @r"
+    Explain
+      CopyTo: format=csv output_url=output.csv options: ()
+        TableScan: test_decimal
+    "
     );
 }
 
@@ -653,11 +655,11 @@ fn plan_explain_copy_to_format() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Explain
-          CopyTo: format=csv output_url=output.tbl options: ()
-            TableScan: test_decimal
-        "#
+        @r"
+    Explain
+      CopyTo: format=csv output_url=output.tbl options: ()
+        TableScan: test_decimal
+    "
     );
 }
 
@@ -682,11 +684,11 @@ fn plan_insert_no_target_columns() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Dml: op=[Insert Into] table=[test_decimal]
-          Projection: column1 AS id, column2 AS price
-            Values: (CAST(Int64(1) AS Int32), CAST(Int64(2) AS Decimal128(10, 2))), (CAST(Int64(3) AS Int32), CAST(Int64(4) AS Decimal128(10, 2)))
-        "#
+        @r"
+    Dml: op=[Insert Into] table=[test_decimal]
+      Projection: column1 AS id, column2 AS price
+        Values: (CAST(Int64(1) AS Int32), CAST(Int64(2) AS Decimal128(10, 2))), (CAST(Int64(3) AS Int32), CAST(Int64(4) AS Decimal128(10, 2)))
+    "
     );
 }
 
@@ -729,11 +731,11 @@ fn plan_update() {
     assert_snapshot!(
         plan,
         @r#"
-        Dml: op=[Update] table=[person]
-          Projection: person.id AS id, person.first_name AS first_name, Utf8("Kay") AS last_name, person.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
-            Filter: person.id = Int64(1)
-              TableScan: person
-        "#
+    Dml: op=[Update] table=[person]
+      Projection: person.id AS id, person.first_name AS first_name, Utf8("Kay") AS last_name, person.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
+        Filter: person.id = Int64(1)
+          TableScan: person
+    "#
     );
 }
 
@@ -755,11 +757,11 @@ fn plan_delete() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Dml: op=[Delete] table=[person]
-          Filter: person.id = Int64(1)
-            TableScan: person
-        "#
+        @r"
+    Dml: op=[Delete] table=[person]
+      Filter: person.id = Int64(1)
+        TableScan: person
+    "
     );
 }
 
@@ -770,11 +772,11 @@ fn plan_delete_quoted_identifier_case_sensitive() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Dml: op=[Delete] table=[SomeCatalog.SomeSchema.UPPERCASE_test]
-          Filter: SomeCatalog.SomeSchema.UPPERCASE_test.Id = Int64(1)
-            TableScan: SomeCatalog.SomeSchema.UPPERCASE_test
-        "#
+        @r"
+    Dml: op=[Delete] table=[SomeCatalog.SomeSchema.UPPERCASE_test]
+      Filter: SomeCatalog.SomeSchema.UPPERCASE_test.Id = Int64(1)
+        TableScan: SomeCatalog.SomeSchema.UPPERCASE_test
+    "
     );
 }
 
@@ -792,9 +794,7 @@ fn select_repeated_column() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Projections require unique expression names but the expression "person.age" at position 0 and "person.age" at position 1 have the same name. Consider aliasing ("AS") one of them.
-        "#
+        @r#"Error during planning: Projections require unique expression names but the expression "person.age" at position 0 and "person.age" at position 1 have the same name. Consider aliasing ("AS") one of them."#
     );
 }
 
@@ -818,10 +818,10 @@ fn select_simple_filter() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.id, person.first_name, person.last_name
-          Filter: person.state = Utf8("CO")
-            TableScan: person
-        "#
+    Projection: person.id, person.first_name, person.last_name
+      Filter: person.state = Utf8("CO")
+        TableScan: person
+    "#
     );
 }
 
@@ -846,11 +846,11 @@ fn select_neg_filter() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.id, person.first_name, person.last_name
-          Filter: NOT person.state
-            TableScan: person
-        "#
+        @r"
+    Projection: person.id, person.first_name, person.last_name
+      Filter: NOT person.state
+        TableScan: person
+    "
     );
 }
 
@@ -862,10 +862,10 @@ fn select_compound_filter() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.id, person.first_name, person.last_name
-          Filter: person.state = Utf8("CO") AND person.age >= Int64(21) AND person.age <= Int64(65)
-            TableScan: person
-        "#
+    Projection: person.id, person.first_name, person.last_name
+      Filter: person.state = Utf8("CO") AND person.age >= Int64(21) AND person.age <= Int64(65)
+        TableScan: person
+    "#
     );
 }
 
@@ -890,10 +890,10 @@ fn test_date_filter() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.state
-          Filter: person.birth_date < CAST(Utf8("2020-01-01") AS Date32)
-            TableScan: person
-        "#
+    Projection: person.state
+      Filter: person.birth_date < CAST(Utf8("2020-01-01") AS Date32)
+        TableScan: person
+    "#
     );
 }
 
@@ -910,11 +910,11 @@ fn select_all_boolean_operators() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age, person.first_name, person.last_name
-          Filter: person.age = Int64(21) AND person.age != Int64(21) AND person.age > Int64(21) AND person.age >= Int64(21) AND person.age < Int64(65) AND person.age <= Int64(65)
-            TableScan: person
-        "#
+        @r"
+    Projection: person.age, person.first_name, person.last_name
+      Filter: person.age = Int64(21) AND person.age != Int64(21) AND person.age > Int64(21) AND person.age >= Int64(21) AND person.age < Int64(65) AND person.age <= Int64(65)
+        TableScan: person
+    "
     );
 }
 
@@ -924,11 +924,11 @@ fn select_between() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state
-          Filter: person.age BETWEEN Int64(21) AND Int64(65)
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state
+      Filter: person.age BETWEEN Int64(21) AND Int64(65)
+        TableScan: person
+    "
     );
 }
 
@@ -938,11 +938,11 @@ fn select_between_negated() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state
-          Filter: person.age NOT BETWEEN Int64(21) AND Int64(65)
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state
+      Filter: person.age NOT BETWEEN Int64(21) AND Int64(65)
+        TableScan: person
+    "
     );
 }
 
@@ -959,14 +959,14 @@ fn select_nested() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: b.fn2, b.last_name
-          SubqueryAlias: b
-            Projection: a.fn1 AS fn2, a.last_name, a.birth_date
-              SubqueryAlias: a
-                Projection: person.first_name AS fn1, person.last_name, person.birth_date, person.age
-                  TableScan: person
-        "#
+        @r"
+    Projection: b.fn2, b.last_name
+      SubqueryAlias: b
+        Projection: a.fn1 AS fn2, a.last_name, a.birth_date
+          SubqueryAlias: a
+            Projection: person.first_name AS fn1, person.last_name, person.birth_date, person.age
+              TableScan: person
+    "
     );
 }
 
@@ -983,13 +983,13 @@ fn select_nested_with_filters() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: a.fn1, a.age
-          Filter: a.fn1 = Utf8("X") AND a.age < Int64(30)
-            SubqueryAlias: a
-              Projection: person.first_name AS fn1, person.age
-                Filter: person.age > Int64(20)
-                  TableScan: person
-        "#
+    Projection: a.fn1, a.age
+      Filter: a.fn1 = Utf8("X") AND a.age < Int64(30)
+        SubqueryAlias: a
+          Projection: person.first_name AS fn1, person.age
+            Filter: person.age > Int64(20)
+              TableScan: person
+    "#
     );
 }
 
@@ -1000,12 +1000,12 @@ fn table_with_column_alias() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: l.a, l.b, l.c
-          SubqueryAlias: l
-            Projection: lineitem.l_item_id AS a, lineitem.l_description AS b, lineitem.price AS c
-              TableScan: lineitem
-        "#
+        @r"
+    Projection: l.a, l.b, l.c
+      SubqueryAlias: l
+        Projection: lineitem.l_item_id AS a, lineitem.l_description AS b, lineitem.price AS c
+          TableScan: lineitem
+    "
     );
 }
 
@@ -1017,7 +1017,7 @@ fn table_with_column_alias_number_cols() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Error during planning: Source table contains 3 columns but only 2 names given as column alias"
+        @"Error during planning: Source table contains 3 columns but only 2 names given as column alias"
     );
 }
 
@@ -1028,7 +1028,7 @@ fn select_with_ambiguous_column() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Schema error: Ambiguous reference to unqualified field id"
+        @"Schema error: Ambiguous reference to unqualified field id"
     );
 }
 
@@ -1039,14 +1039,14 @@ fn join_with_ambiguous_column() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: a.id
-          Inner Join: Using a.id = b.id
-            SubqueryAlias: a
-              TableScan: person
-            SubqueryAlias: b
-              TableScan: person
-        "#
+        @r"
+    Projection: a.id
+      Inner Join: Using a.id = b.id
+        SubqueryAlias: a
+          TableScan: person
+        SubqueryAlias: b
+          TableScan: person
+    "
     );
 }
 
@@ -1056,14 +1056,14 @@ fn natural_left_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: a.l_item_id
-          Left Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
-            SubqueryAlias: a
-              TableScan: lineitem
-            SubqueryAlias: b
-              TableScan: lineitem
-        "#
+        @r"
+    Projection: a.l_item_id
+      Left Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
+        SubqueryAlias: a
+          TableScan: lineitem
+        SubqueryAlias: b
+          TableScan: lineitem
+    "
     );
 }
 
@@ -1073,14 +1073,14 @@ fn natural_right_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: a.l_item_id
-          Right Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
-            SubqueryAlias: a
-              TableScan: lineitem
-            SubqueryAlias: b
-              TableScan: lineitem
-        "#
+        @r"
+    Projection: a.l_item_id
+      Right Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
+        SubqueryAlias: a
+          TableScan: lineitem
+        SubqueryAlias: b
+          TableScan: lineitem
+    "
     );
 }
 
@@ -1093,7 +1093,7 @@ fn select_with_having() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r"Error during planning: HAVING clause references: person.age > Int64(100) AND person.age < Int64(200) must appear in the GROUP BY clause or be used in an aggregate function"
+        @"Error during planning: HAVING clause references: person.age > Int64(100) AND person.age < Int64(200) must appear in the GROUP BY clause or be used in an aggregate function"
     );
 }
 
@@ -1106,9 +1106,7 @@ fn select_with_having_referencing_column_not_in_select() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: HAVING clause references: person.first_name = Utf8("M") must appear in the GROUP BY clause or be used in an aggregate function
-        "#
+        @r#"Error during planning: HAVING clause references: person.first_name = Utf8("M") must appear in the GROUP BY clause or be used in an aggregate function"#
     );
 }
 
@@ -1122,9 +1120,7 @@ fn select_with_having_refers_to_invalid_column() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.id, max(person.age)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.id, max(person.age)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -1137,9 +1133,7 @@ fn select_with_having_referencing_column_nested_in_select_expression() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: HAVING clause references: person.age > Int64(100) must appear in the GROUP BY clause or be used in an aggregate function
-        "#
+        @"Error during planning: HAVING clause references: person.age > Int64(100) must appear in the GROUP BY clause or be used in an aggregate function"
     );
 }
 
@@ -1164,12 +1158,12 @@ fn select_aggregate_with_having_that_reuses_aggregate() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: max(person.age)
-          Filter: max(person.age) < Int64(30)
-            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: max(person.age)
+      Filter: max(person.age) < Int64(30)
+        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1182,11 +1176,11 @@ fn select_aggregate_with_having_with_aggregate_not_in_select() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: max(person.age)
-          Filter: max(person.first_name) > Utf8("M")
-            Aggregate: groupBy=[[]], aggr=[[max(person.age), max(person.first_name)]]
-              TableScan: person
-        "#
+    Projection: max(person.age)
+      Filter: max(person.first_name) > Utf8("M")
+        Aggregate: groupBy=[[]], aggr=[[max(person.age), max(person.first_name)]]
+          TableScan: person
+    "#
     );
 }
 
@@ -1199,9 +1193,7 @@ fn select_aggregate_with_having_referencing_column_not_in_select() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "count(*)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "count(*)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -1214,12 +1206,12 @@ fn select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: max(person.age) AS max_age
-          Filter: max(person.age) < Int64(30)
-            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: max(person.age) AS max_age
+      Filter: max(person.age) < Int64(30)
+        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1231,12 +1223,12 @@ fn select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_ali
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: max(person.age) AS max_age
-          Filter: max(person.age) < Int64(30)
-            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: max(person.age) AS max_age
+      Filter: max(person.age) < Int64(30)
+        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1250,11 +1242,11 @@ fn select_aggregate_with_group_by_with_having() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: person.first_name = Utf8("M")
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+    Projection: person.first_name, max(person.age)
+      Filter: person.first_name = Utf8("M")
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "#
     );
 }
 
@@ -1268,13 +1260,13 @@ fn select_aggregate_with_group_by_with_having_and_where() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) < Int64(100)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              Filter: person.id > Int64(5)
-                TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) < Int64(100)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          Filter: person.id > Int64(5)
+            TableScan: person
+    "
     );
 }
 
@@ -1288,13 +1280,13 @@ fn select_aggregate_with_group_by_with_having_and_where_filtering_on_aggregate_c
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) < Int64(100)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              Filter: person.id > Int64(5) AND person.age > Int64(18)
-                TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) < Int64(100)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          Filter: person.id > Int64(5) AND person.age > Int64(18)
+            TableScan: person
+    "
     );
 }
 
@@ -1308,17 +1300,17 @@ fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.first_name AS fn, max(person.age)
-          Filter: max(person.age) > Int64(2) AND person.first_name = Utf8("M")
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+    Projection: person.first_name AS fn, max(person.age)
+      Filter: max(person.age) > Int64(2) AND person.first_name = Utf8("M")
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "#
     );
 }
 
 #[test]
-fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases(
-) {
+fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases()
+ {
     let sql = "SELECT first_name AS fn, MAX(age) AS max_age
                    FROM person
                    GROUP BY first_name
@@ -1327,11 +1319,11 @@ fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_the
     assert_snapshot!(
         plan,
         @r#"
-        Projection: person.first_name AS fn, max(person.age) AS max_age
-          Filter: max(person.age) > Int64(2) AND max(person.age) < Int64(5) AND person.first_name = Utf8("M") AND person.first_name = Utf8("N")
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+    Projection: person.first_name AS fn, max(person.age) AS max_age
+      Filter: max(person.age) > Int64(2) AND max(person.age) < Int64(5) AND person.first_name = Utf8("M") AND person.first_name = Utf8("N")
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "#
     );
 }
 
@@ -1344,12 +1336,12 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) > Int64(100)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) > Int64(100)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1363,9 +1355,7 @@ fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.last_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.first_name, max(person.age)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.last_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.first_name, max(person.age)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -1378,12 +1368,12 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_tim
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) > Int64(100) AND max(person.age) < Int64(200)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) > Int64(100) AND max(person.age) < Int64(200)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1396,12 +1386,12 @@ fn select_aggregate_with_group_by_with_having_using_aggregate_not_in_select() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) > Int64(100) AND min(person.id) < Int64(50)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) > Int64(100) AND min(person.id) < Int64(50)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1415,18 +1405,18 @@ fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_i
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age) AS max_age
-          Filter: max(person.age) > Int64(100)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age) AS max_age
+      Filter: max(person.age) > Int64(100)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
 #[test]
-fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias(
-) {
+fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias()
+ {
     let sql = "SELECT first_name, MAX(age) + 1 AS max_age_plus_one
                    FROM person
                    GROUP BY first_name
@@ -1434,18 +1424,18 @@ fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compo
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age) + Int64(1) AS max_age_plus_one
-          Filter: max(person.age) + Int64(1) > Int64(100)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age) + Int64(1) AS max_age_plus_one
+      Filter: max(person.age) + Int64(1) > Int64(100)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+          TableScan: person
+    "
     );
 }
 
 #[test]
-fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not_in_select(
-) {
+fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not_in_select()
+ {
     let sql = "SELECT first_name, MAX(age)
                    FROM person
                    GROUP BY first_name
@@ -1453,12 +1443,12 @@ fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]]
+          TableScan: person
+    "
     );
 }
 
@@ -1471,12 +1461,12 @@ fn select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.first_name, max(person.age)
-          Filter: max(person.age) > Int64(100) AND count(*) < Int64(50)
-            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), count(*)]]
-              TableScan: person
-        "#
+        @r"
+    Projection: person.first_name, max(person.age)
+      Filter: max(person.age) > Int64(100) AND count(*) < Int64(50)
+        Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), count(*)]]
+          TableScan: person
+    "
     );
 }
 
@@ -1486,10 +1476,10 @@ fn select_binary_expr() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age + person.salary
-          TableScan: person
-        "#
+        @r"
+    Projection: person.age + person.salary
+      TableScan: person
+    "
     );
 }
 
@@ -1499,10 +1489,10 @@ fn select_binary_expr_nested() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: (person.age + person.salary) / Int64(2)
-          TableScan: person
-        "#
+        @r"
+    Projection: (person.age + person.salary) / Int64(2)
+      TableScan: person
+    "
     );
 }
 
@@ -1511,11 +1501,11 @@ fn select_simple_aggregate() {
     let plan = logical_plan("SELECT MIN(age) FROM person").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.age)
-          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.age)
+      Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1524,11 +1514,11 @@ fn test_sum_aggregate() {
     let plan = logical_plan("SELECT sum(age) from person").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: sum(person.age)
-          Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: sum(person.age)
+      Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1546,9 +1536,7 @@ fn select_simple_aggregate_repeated_aggregate() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 0 and "min(person.age)" at position 1 have the same name. Consider aliasing ("AS") one of them.
-        "#
+        @r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 0 and "min(person.age)" at position 1 have the same name. Consider aliasing ("AS") one of them."#
     );
 }
 
@@ -1557,11 +1545,11 @@ fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
     let plan = logical_plan("SELECT MIN(age), MIN(age) AS a FROM person").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.age), min(person.age) AS a
-          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.age), min(person.age) AS a
+      Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1570,11 +1558,11 @@ fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
     let plan = logical_plan("SELECT MIN(age) AS a, MIN(age) AS b FROM person").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.age) AS a, min(person.age) AS b
-          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.age) AS a, min(person.age) AS b
+      Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1601,9 +1589,7 @@ fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Projections require unique expression names but the expression "min(person.age) AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them.
-        "#
+        @r#"Error during planning: Projections require unique expression names but the expression "min(person.age) AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."#
     );
 }
 
@@ -1614,11 +1600,11 @@ fn select_simple_aggregate_with_groupby() {
             .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, min(person.age), max(person.age)
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state, min(person.age), max(person.age)
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1629,11 +1615,11 @@ fn select_simple_aggregate_with_groupby_with_aliases() {
             .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state AS a, min(person.age) AS b
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state AS a, min(person.age) AS b
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1644,9 +1630,7 @@ fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Projections require unique expression names but the expression "person.state AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them.
-        "#
+        @r#"Error during planning: Projections require unique expression names but the expression "person.state AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."#
     );
 }
 
@@ -1656,11 +1640,11 @@ fn select_simple_aggregate_with_groupby_column_unselected() {
         logical_plan("SELECT MIN(age), MAX(age) FROM person GROUP BY state").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.age), max(person.age)
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.age), max(person.age)
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1671,9 +1655,7 @@ fn select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist()
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Schema error: No field named doesnotexist. Valid fields are "sum(person.age)", person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person."😀".
-        "#
+        @r#"Schema error: No field named doesnotexist. Valid fields are "sum(person.age)", person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person."😀"."#
     );
 }
 
@@ -1691,9 +1673,7 @@ fn select_interval_out_of_range() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Arrow error: Invalid argument error: Unable to represent 100000000000000000 days in a signed 32-bit integer
-        "#
+        @"Arrow error: Invalid argument error: Unable to represent 100000000000000000 days in a signed 32-bit integer"
     );
 }
 
@@ -1703,11 +1683,11 @@ fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby()
         logical_plan("SELECT MAX(first_name) FROM person GROUP BY first_name").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: max(person.first_name)
-          Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: max(person.first_name)
+      Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.first_name)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1717,21 +1697,21 @@ fn select_simple_aggregate_with_groupby_can_use_positions() {
         .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, person.age AS b, count(Int64(1))
-          Aggregate: groupBy=[[person.state, person.age]], aggr=[[count(Int64(1))]]
-            TableScan: person
-        "#
-    );
-    let plan = logical_plan("SELECT state, age AS b, count(1) FROM person GROUP BY 2, 1")
+        @r"
+    Projection: person.state, person.age AS b, count(Int64(1))
+      Aggregate: groupBy=[[person.state, person.age]], aggr=[[count(Int64(1))]]
+        TableScan: person
+    "
+    );
+    let plan = logical_plan("SELECT state, age AS b, count(1) FROM person GROUP BY 2, 1")
         .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, person.age AS b, count(Int64(1))
-          Aggregate: groupBy=[[person.age, person.state]], aggr=[[count(Int64(1))]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state, person.age AS b, count(Int64(1))
+      Aggregate: groupBy=[[person.age, person.state]], aggr=[[count(Int64(1))]]
+        TableScan: person
+    "
     );
 }
 
@@ -1742,9 +1722,7 @@ fn select_simple_aggregate_with_groupby_position_out_of_range() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Cannot find column with position 0 in SELECT clause. Valid columns: 1 to 2
-        "#
+        @"Error during planning: Cannot find column with position 0 in SELECT clause. Valid columns: 1 to 2"
     );
 
     let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5";
@@ -1752,9 +1730,7 @@ fn select_simple_aggregate_with_groupby_position_out_of_range() {
 
     assert_snapshot!(
         err2.strip_backtrace(),
-        @r#"
-        Error during planning: Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 2
-        "#
+        @"Error during planning: Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 2"
     );
 }
 
@@ -1764,11 +1740,11 @@ fn select_simple_aggregate_with_groupby_can_use_alias() {
         logical_plan("SELECT state AS a, MIN(age) AS b FROM person GROUP BY a").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state AS a, min(person.age) AS b
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state AS a, min(person.age) AS b
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1779,9 +1755,7 @@ fn select_simple_aggregate_with_groupby_aggregate_repeated() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 1 and "min(person.age)" at position 2 have the same name. Consider aliasing ("AS") one of them.
-        "#
+        @r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 1 and "min(person.age)" at position 2 have the same name. Consider aliasing ("AS") one of them."#
     );
 }
 
@@ -1792,11 +1766,11 @@ fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
             .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, min(person.age), min(person.age) AS ma
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state, min(person.age), min(person.age) AS ma
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1806,11 +1780,11 @@ fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
         logical_plan("SELECT MIN(first_name) FROM person GROUP BY age + 1").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.first_name)
-          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.first_name)
+      Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1821,22 +1795,22 @@ fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resol
             .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age + Int64(1), min(person.first_name)
-          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.age + Int64(1), min(person.first_name)
+      Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+        TableScan: person
+    "
     );
     let plan =
         logical_plan("SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1")
             .unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: min(person.first_name), person.age + Int64(1)
-          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: min(person.first_name), person.age + Int64(1)
+      Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1847,11 +1821,11 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolva
     ).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name)
-          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name)
+      Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1864,9 +1838,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -1877,9 +1849,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -1889,11 +1859,11 @@ fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
         logical_plan("SELECT state, MIN(age) < 10 FROM person GROUP BY state").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, min(person.age) < Int64(10)
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state, min(person.age) < Int64(10)
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1903,11 +1873,11 @@ fn select_simple_aggregate_and_nested_groupby_column() {
         logical_plan("SELECT MAX(first_name), age + 1 FROM person GROUP BY age").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: max(person.first_name), person.age + Int64(1)
-          Aggregate: groupBy=[[person.age]], aggr=[[max(person.first_name)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: max(person.first_name), person.age + Int64(1)
+      Aggregate: groupBy=[[person.age]], aggr=[[max(person.first_name)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1916,11 +1886,11 @@ fn select_aggregate_compounded_with_groupby_column() {
     let plan = logical_plan("SELECT age + MIN(salary) FROM person GROUP BY age").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.age + min(person.salary)
-          Aggregate: groupBy=[[person.age]], aggr=[[min(person.salary)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.age + min(person.salary)
+      Aggregate: groupBy=[[person.age]], aggr=[[min(person.salary)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1930,11 +1900,11 @@ fn select_aggregate_with_non_column_inner_expression_with_groupby() {
         logical_plan("SELECT state, MIN(age + 1) FROM person GROUP BY state").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state, min(person.age + Int64(1))
-          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]]
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state, min(person.age + Int64(1))
+      Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]]
+        TableScan: person
+    "
     );
 }
 
@@ -1944,11 +1914,11 @@ fn select_count_one() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: count(Int64(1))
-  Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
-    TableScan: person
-"#
+        @r"
+    Projection: count(Int64(1))
+      Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+        TableScan: person
+    "
     );
 }
 
@@ -1958,11 +1928,11 @@ fn select_count_column() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: count(person.id)
-  Aggregate: groupBy=[[]], aggr=[[count(person.id)]]
-    TableScan: person
-"#
+        @r"
+    Projection: count(person.id)
+      Aggregate: groupBy=[[]], aggr=[[count(person.id)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1972,11 +1942,11 @@ fn select_approx_median() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: approx_median(person.age)
-  Aggregate: groupBy=[[]], aggr=[[approx_median(person.age)]]
-    TableScan: person
-"#
+        @r"
+    Projection: approx_median(person.age)
+      Aggregate: groupBy=[[]], aggr=[[approx_median(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -1986,10 +1956,10 @@ fn select_scalar_func() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: sqrt(person.age)
-  TableScan: person
-"#
+        @r"
+    Projection: sqrt(person.age)
+      TableScan: person
+    "
     );
 }
 
@@ -1999,10 +1969,10 @@ fn select_aliased_scalar_func() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: sqrt(person.age) AS square_people
-  TableScan: person
-"#
+        @r"
+    Projection: sqrt(person.age) AS square_people
+      TableScan: person
+    "
     );
 }
 
@@ -2013,11 +1983,11 @@ fn select_where_nullif_division() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: aggregate_test_100.c3 / (aggregate_test_100.c4 + aggregate_test_100.c5)
-  Filter: aggregate_test_100.c3 / nullif(aggregate_test_100.c4 + aggregate_test_100.c5, Int64(0)) > Float64(0.1)
-    TableScan: aggregate_test_100
-"#
+        @r"
+    Projection: aggregate_test_100.c3 / (aggregate_test_100.c4 + aggregate_test_100.c5)
+      Filter: aggregate_test_100.c3 / nullif(aggregate_test_100.c4 + aggregate_test_100.c5, Int64(0)) > Float64(0.1)
+        TableScan: aggregate_test_100
+    "
     );
 }
 
@@ -2027,11 +1997,11 @@ fn select_where_with_negative_operator() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: aggregate_test_100.c3
-  Filter: aggregate_test_100.c3 > Float64(-0.1) AND (- aggregate_test_100.c4) > Int64(0)
-    TableScan: aggregate_test_100
-"#
+        @r"
+    Projection: aggregate_test_100.c3
+      Filter: aggregate_test_100.c3 > Float64(-0.1) AND (- aggregate_test_100.c4) > Int64(0)
+        TableScan: aggregate_test_100
+    "
     );
 }
 
@@ -2041,11 +2011,11 @@ fn select_where_with_positive_operator() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: aggregate_test_100.c3
-  Filter: aggregate_test_100.c3 > Float64(0.1) AND aggregate_test_100.c4 > Int64(0)
-    TableScan: aggregate_test_100
-"#
+        @r"
+    Projection: aggregate_test_100.c3
+      Filter: aggregate_test_100.c3 > Float64(0.1) AND aggregate_test_100.c4 > Int64(0)
+        TableScan: aggregate_test_100
+    "
     );
 }
 
@@ -2057,11 +2027,11 @@ fn select_where_compound_identifiers() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: public.aggregate_test_100.c3
-  Filter: public.aggregate_test_100.c3 > Float64(0.1)
-    TableScan: public.aggregate_test_100
-"#
+        @r"
+    Projection: public.aggregate_test_100.c3
+      Filter: public.aggregate_test_100.c3 > Float64(0.1)
+        TableScan: public.aggregate_test_100
+    "
     );
 }
 
@@ -2071,11 +2041,11 @@ fn select_order_by_index() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id ASC NULLS LAST
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Sort: person.id ASC NULLS LAST
+      Projection: person.id
+        TableScan: person
+    "
     );
 }
 
@@ -2085,11 +2055,11 @@ fn select_order_by_multiple_index() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id ASC NULLS LAST, person.age ASC NULLS LAST
-  Projection: person.id, person.state, person.age
-    TableScan: person
-"#
+        @r"
+    Sort: person.id ASC NULLS LAST, person.age ASC NULLS LAST
+      Projection: person.id, person.state, person.age
+        TableScan: person
+    "
     );
 }
 
@@ -2102,9 +2072,7 @@ fn select_order_by_index_of_0() {
 
     assert_snapshot!(
         err,
-        @r#"
-        Error during planning: Order by index starts at 1 for column indexes
-        "#
+        @"Error during planning: Order by index starts at 1 for column indexes"
     );
 }
 
@@ -2117,9 +2085,7 @@ fn select_order_by_index_oob() {
 
     assert_snapshot!(
         err,
-        @r#"
-        Error during planning: Order by column out of bounds, specified: 2, max: 1
-        "#
+        @"Error during planning: Order by column out of bounds, specified: 2, max: 1"
     );
 }
 
@@ -2129,11 +2095,11 @@ fn select_with_order_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id ASC NULLS LAST
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Sort: person.id ASC NULLS LAST
+      Projection: person.id
+        TableScan: person
+    "
     );
 }
 
@@ -2143,11 +2109,11 @@ fn select_order_by_desc() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id DESC NULLS FIRST
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Sort: person.id DESC NULLS FIRST
+      Projection: person.id
+        TableScan: person
+    "
     );
 }
 
@@ -2156,21 +2122,21 @@ fn select_order_by_nulls_last() {
     let plan = logical_plan("SELECT id FROM person ORDER BY id DESC NULLS LAST").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id DESC NULLS LAST
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Sort: person.id DESC NULLS LAST
+      Projection: person.id
+        TableScan: person
+    "
     );
 
     let plan = logical_plan("SELECT id FROM person ORDER BY id NULLS LAST").unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: person.id ASC NULLS LAST
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Sort: person.id ASC NULLS LAST
+      Projection: person.id
+        TableScan: person
+    "
     );
 }
 
@@ -2180,11 +2146,11 @@ fn select_group_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.state
-  Aggregate: groupBy=[[person.state]], aggr=[[]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.state
+      Aggregate: groupBy=[[person.state]], aggr=[[]]
+        TableScan: person
+    "
     );
 }
 
@@ -2194,11 +2160,11 @@ fn select_group_by_columns_not_in_select() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: max(person.age)
-  Aggregate: groupBy=[[person.state]], aggr=[[max(person.age)]]
-    TableScan: person
-"#
+        @r"
+    Projection: max(person.age)
+      Aggregate: groupBy=[[person.state]], aggr=[[max(person.age)]]
+        TableScan: person
+    "
     );
 }
 
@@ -2208,11 +2174,11 @@ fn select_group_by_count_star() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.state, count(*)
-  Aggregate: groupBy=[[person.state]], aggr=[[count(*)]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.state, count(*)
+      Aggregate: groupBy=[[person.state]], aggr=[[count(*)]]
+        TableScan: person
+    "
     );
 }
 
@@ -2222,11 +2188,11 @@ fn select_group_by_needs_projection() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: count(person.state), person.state
-          Aggregate: groupBy=[[person.state]], aggr=[[count(person.state)]]
-            TableScan: person
-        "#
+        @r"
+    Projection: count(person.state), person.state
+      Aggregate: groupBy=[[person.state]], aggr=[[count(person.state)]]
+        TableScan: person
+    "
     );
 }
 
@@ -2236,11 +2202,11 @@ fn select_7480_1() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: aggregate_test_100.c1, min(aggregate_test_100.c12)
-  Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[min(aggregate_test_100.c12)]]
-    TableScan: aggregate_test_100
-"#
+        @r"
+    Projection: aggregate_test_100.c1, min(aggregate_test_100.c12)
+      Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[min(aggregate_test_100.c12)]]
+        TableScan: aggregate_test_100
+    "
     );
 }
 
@@ -2251,9 +2217,7 @@ fn select_7480_2() {
 
     assert_snapshot!(
         err.strip_backtrace(),
-        @r#"
-        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "aggregate_test_100.c13" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "aggregate_test_100.c1, min(aggregate_test_100.c12)" appears in the SELECT clause satisfies this requirement
-        "#
+        @r#"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "aggregate_test_100.c13" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "aggregate_test_100.c1, min(aggregate_test_100.c12)" appears in the SELECT clause satisfies this requirement"#
     );
 }
 
@@ -2263,9 +2227,7 @@ fn create_external_table_csv() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "t" }
-"#
+        @r#"CreateExternalTable: Bare { table: "t" }"#
     );
 }
 
@@ -2275,9 +2237,7 @@ fn create_external_table_with_pk() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "t" } constraints=[PrimaryKey([0])]
-    "#
+        @r#"CreateExternalTable: Bare { table: "t" } constraints=[PrimaryKey([0])]"#
     );
 }
 
@@ -2287,9 +2247,7 @@ fn create_external_table_wih_schema() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Partial { schema: "staging", table: "foo" }
-"#
+        @r#"CreateExternalTable: Partial { schema: "staging", table: "foo" }"#
     );
 }
 
@@ -2299,9 +2257,7 @@ fn create_schema_with_quoted_name() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateCatalogSchema: "quoted_schema_name"
-"#
+        @r#"CreateCatalogSchema: "quoted_schema_name""#
     );
 }
 
@@ -2311,9 +2267,7 @@ fn create_schema_with_quoted_unnormalized_name() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateCatalogSchema: "Foo"
-"#
+        @r#"CreateCatalogSchema: "Foo""#
     );
 }
 
@@ -2323,9 +2277,7 @@ fn create_schema_with_unquoted_normalized_name() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateCatalogSchema: "foo"
-"#
+        @r#"CreateCatalogSchema: "foo""#
     );
 }
 
@@ -2335,9 +2287,7 @@ fn create_external_table_custom() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "dt" }
-"#
+        @r#"CreateExternalTable: Bare { table: "dt" }"#
     );
 }
 
@@ -2347,9 +2297,7 @@ fn create_external_table_csv_no_schema() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "t" }
-"#
+        @r#"CreateExternalTable: Bare { table: "t" }"#
     );
 }
 
@@ -2362,16 +2310,14 @@ fn create_external_table_with_compression_type() {
         "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.gz' OPTIONS ('format.compression' 'gzip')",
         "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.bz2' OPTIONS ('format.compression' 'bzip2')",
         "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD LOCATION 'foo.unk' OPTIONS ('format.compression' 'gzip')",
-         ];
+    ];
 
     allow_duplicates! {
         for sql in sqls {
             let plan = logical_plan(sql).unwrap();
             assert_snapshot!(
                 plan,
-                @r#"
-                CreateExternalTable: Bare { table: "t" }
-                "#
+                @r#"CreateExternalTable: Bare { table: "t" }"#
             );
         }
 
@@ -2393,9 +2339,7 @@ fn create_external_table_with_compression_type() {
 
             assert_snapshot!(
                 err.strip_backtrace(),
-                @r#"
-                Error during planning: File compression type cannot be set for PARQUET, AVRO, or ARROW files.
-                "#
+                @"Error during planning: File compression type cannot be set for PARQUET, AVRO, or ARROW files."
             );
 
         }
@@ -2408,9 +2352,7 @@ fn create_external_table_parquet() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "t" }
-"#
+        @r#"CreateExternalTable: Bare { table: "t" }"#
     );
 }
 
@@ -2420,9 +2362,7 @@ fn create_external_table_parquet_sort_order() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "foo" }
-"#
+        @r#"CreateExternalTable: Bare { table: "foo" }"#
     );
 }
 
@@ -2442,9 +2382,7 @@ fn create_external_table_parquet_no_schema_sort_order() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-CreateExternalTable: Bare { table: "t" }
-"#
+        @r#"CreateExternalTable: Bare { table: "t" }"#
     );
 }
 
@@ -2457,12 +2395,12 @@ fn equijoin_explicit_syntax() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2475,12 +2413,12 @@ fn equijoin_with_condition() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2493,12 +2431,12 @@ fn left_equijoin_with_conditions() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Left Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1) AND person.age < Int64(30)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Left Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1) AND person.age < Int64(30)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2511,12 +2449,12 @@ fn right_equijoin_with_conditions() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Right Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Right Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2529,12 +2467,12 @@ fn full_equijoin_with_conditions() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Full Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Full Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2547,12 +2485,12 @@ fn join_with_table_name() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -2565,13 +2503,13 @@ fn join_with_using() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.first_name, person.id
-  Inner Join: Using person.id = person2.id
-    TableScan: person
-    SubqueryAlias: person2
-      TableScan: person
-"#
+        @r"
+    Projection: person.first_name, person.id
+      Inner Join: Using person.id = person2.id
+        TableScan: person
+        SubqueryAlias: person2
+          TableScan: person
+    "
     );
 }
 
@@ -2584,14 +2522,14 @@ fn equijoin_explicit_syntax_3_tables() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id, lineitem.l_description
-  Inner Join:  Filter: orders.o_item_id = lineitem.l_item_id
-    Inner Join:  Filter: person.id = orders.customer_id
-      TableScan: person
-      TableScan: orders
-    TableScan: lineitem
-"#
+        @r"
+    Projection: person.id, orders.order_id, lineitem.l_description
+      Inner Join:  Filter: orders.o_item_id = lineitem.l_item_id
+        Inner Join:  Filter: person.id = orders.customer_id
+          TableScan: person
+          TableScan: orders
+        TableScan: lineitem
+    "
     );
 }
 
@@ -2603,11 +2541,11 @@ fn boolean_literal_in_condition_expression() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id
-  Filter: orders.delivered = Boolean(false) OR orders.delivered = Boolean(true)
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id
+      Filter: orders.delivered = Boolean(false) OR orders.delivered = Boolean(true)
+        TableScan: orders
+    "
     );
 }
 
@@ -2617,14 +2555,14 @@ fn union() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Distinct:
-  Union
-    Projection: orders.order_id
-      TableScan: orders
-    Projection: orders.order_id
-      TableScan: orders
-"#
+        @r"
+    Distinct:
+      Union
+        Projection: orders.order_id
+          TableScan: orders
+        Projection: orders.order_id
+          TableScan: orders
+    "
     );
 }
 
@@ -2634,16 +2572,16 @@ fn union_by_name_different_columns() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Distinct:
-  Union
-    Projection: order_id, NULL AS Int64(1)
-      Projection: orders.order_id
-        TableScan: orders
-    Projection: order_id, Int64(1)
-      Projection: orders.order_id, Int64(1)
-        TableScan: orders
-"#
+        @r"
+    Distinct:
+      Union
+        Projection: order_id, NULL AS Int64(1)
+          Projection: orders.order_id
+            TableScan: orders
+        Projection: order_id, Int64(1)
+          Projection: orders.order_id, Int64(1)
+            TableScan: orders
+    "
     );
 }
 
@@ -2653,14 +2591,14 @@ fn union_by_name_same_column_names() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Distinct:
-  Union
-    Projection: orders.order_id
-      TableScan: orders
-    Projection: orders.order_id
-      TableScan: orders
-"#
+        @r"
+    Distinct:
+      Union
+        Projection: orders.order_id
+          TableScan: orders
+        Projection: orders.order_id
+          TableScan: orders
+    "
     );
 }
 
@@ -2670,13 +2608,13 @@ fn union_all() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Union
-  Projection: orders.order_id
-    TableScan: orders
-  Projection: orders.order_id
-    TableScan: orders
-"#
+        @r"
+    Union
+      Projection: orders.order_id
+        TableScan: orders
+      Projection: orders.order_id
+        TableScan: orders
+    "
     );
 }
 
@@ -2687,15 +2625,15 @@ fn union_all_by_name_different_columns() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Union
-  Projection: order_id, NULL AS Int64(1)
-    Projection: orders.order_id
-      TableScan: orders
-  Projection: order_id, Int64(1)
-    Projection: orders.order_id, Int64(1)
-      TableScan: orders
-"#
+        @r"
+    Union
+      Projection: order_id, NULL AS Int64(1)
+        Projection: orders.order_id
+          TableScan: orders
+      Projection: order_id, Int64(1)
+        Projection: orders.order_id, Int64(1)
+          TableScan: orders
+    "
     );
 }
 
@@ -2705,15 +2643,15 @@ fn union_all_by_name_same_column_names() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Union
-  Projection: order_id
-    Projection: orders.order_id
-      TableScan: orders
-  Projection: order_id
-    Projection: orders.order_id
-      TableScan: orders
-"#
+        @r"
+    Union
+      Projection: order_id
+        Projection: orders.order_id
+          TableScan: orders
+      Projection: order_id
+        Projection: orders.order_id
+          TableScan: orders
+    "
     );
 }
 
@@ -2723,11 +2661,11 @@ fn empty_over() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2737,11 +2675,11 @@ fn empty_over_with_alias() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid
-  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid
+      WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2751,11 +2689,11 @@ fn empty_over_dup_with_alias() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup
-  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup
+      WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2765,12 +2703,12 @@ fn empty_over_dup_with_different_sort() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    WindowAggr: windowExpr=[[max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        WindowAggr: windowExpr=[[max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2780,11 +2718,11 @@ fn empty_over_plus() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2794,11 +2732,11 @@ fn empty_over_multiple() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2817,11 +2755,11 @@ fn over_partition_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -2843,12 +2781,12 @@ fn over_order_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2858,12 +2796,12 @@ fn over_order_by_with_window_frame_double_end() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]
-    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2873,12 +2811,12 @@ fn over_order_by_with_window_frame_single_end() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2888,12 +2826,12 @@ fn over_order_by_with_window_frame_single_end_groups() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2915,12 +2853,12 @@ fn over_order_by_two_sort_keys() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -2943,13 +2881,13 @@ fn over_order_by_sort_keys_sorting() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-        TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: orders
+    "
     );
 }
 
@@ -2970,13 +2908,13 @@ fn over_order_by_sort_keys_sorting_prefix_compacting() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-        TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: orders
+    "
     );
 }
 
@@ -3002,14 +2940,14 @@ fn over_order_by_sort_keys_sorting_global_order_compacting() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: orders.order_id ASC NULLS LAST
-  Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-    WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-          TableScan: orders
-"#
+        @r"
+    Sort: orders.order_id ASC NULLS LAST
+      Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+        WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+          WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+              TableScan: orders
+    "
     );
 }
 
@@ -3029,11 +2967,11 @@ fn over_partition_by_order_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        TableScan: orders
+    "
     );
 }
 
@@ -3048,16 +2986,15 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDE
 /// ```
 #[test]
 fn over_partition_by_order_by_no_dup() {
-    let sql =
-        "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders";
+    let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        TableScan: orders
+    "
     );
 }
 
@@ -3075,17 +3012,16 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orde
 /// ```
 #[test]
 fn over_partition_by_order_by_mix_up() {
-    let sql =
-            "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders";
+    let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -3102,17 +3038,16 @@ Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orde
 /// FIXME: for now we are not detecting prefix of sorting keys in order to save one sort exec phase
 #[test]
 fn over_partition_by_order_by_mix_up_prefix() {
-    let sql =
-            "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders";
+    let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+      WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
     );
 }
 
@@ -3123,11 +3058,11 @@ fn approx_median_window() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -3163,10 +3098,10 @@ fn select_multibyte_column() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.😀
-  TableScan: person
-"#
+        @r"
+    Projection: person.😀
+      TableScan: person
+    "
     );
 }
 
@@ -3211,11 +3146,11 @@ fn select_groupby_orderby() {
                 plan,
                 // expect that this is not an ambiguous reference
                 @r#"
-        Sort: birth_date ASC NULLS LAST
-          Projection: avg(person.age) AS value, date_trunc(Utf8("month"), person.birth_date) AS birth_date
-            Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
-              TableScan: person
-        "#
+            Sort: birth_date ASC NULLS LAST
+              Projection: avg(person.age) AS value, date_trunc(Utf8("month"), person.birth_date) AS birth_date
+                Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
+                  TableScan: person
+            "#
             );
         }
     }
@@ -3231,11 +3166,60 @@ fn select_groupby_orderby() {
     assert_snapshot!(
         plan,
         @r#"
-Sort: avg(person.age) + avg(person.age) ASC NULLS LAST
-  Projection: avg(person.age) + avg(person.age), date_trunc(Utf8("month"), person.birth_date) AS birth_date
-    Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
-      TableScan: person
-"#
+    Sort: avg(person.age) + avg(person.age) ASC NULLS LAST
+      Projection: avg(person.age) + avg(person.age), date_trunc(Utf8("month"), person.birth_date) AS birth_date
+        Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
+          TableScan: person
+    "#
+    );
+}
+
+#[test]
+fn select_groupby_orderby_aggregate_on_non_selected_column() {
+    let sql = "SELECT state FROM person GROUP BY state ORDER BY MIN(age)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: person.state
+      Sort: min(person.age) ASC NULLS LAST
+        Projection: person.state, min(person.age)
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+            TableScan: person
+    "
+    );
+}
+
+#[test]
+fn select_groupby_orderby_multiple_aggregates_on_non_selected_columns() {
+    let sql =
+        "SELECT state FROM person GROUP BY state ORDER BY MIN(age), MAX(salary) DESC";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: person.state
+      Sort: min(person.age) ASC NULLS LAST, max(person.salary) DESC NULLS FIRST
+        Projection: person.state, min(person.age), max(person.salary)
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.salary)]]
+            TableScan: person
+    "
+    );
+}
+
+#[test]
+fn select_groupby_orderby_aggregate_on_non_selected_column_original_issue() {
+    let sql = "SELECT id FROM person GROUP BY id ORDER BY min(age)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: person.id
+      Sort: min(person.age) ASC NULLS LAST
+        Projection: person.id, min(person.age)
+          Aggregate: groupBy=[[person.id]], aggr=[[min(person.age)]]
+            TableScan: person
+    "
     );
 }
 
@@ -3394,10 +3378,10 @@ fn select_partially_qualified_column() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: public.person.first_name
-  TableScan: public.person
-"#
+        @r"
+    Projection: public.person.first_name
+      TableScan: public.person
+    "
     );
 }
 
@@ -3408,15 +3392,15 @@ fn cross_join_not_to_inner_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id
-  Filter: person.id = person.age
-    Cross Join: 
-      Cross Join: 
-        TableScan: person
-        TableScan: orders
-      TableScan: lineitem
-"#
+        @r"
+    Projection: person.id
+      Filter: person.id = person.age
+        Cross Join: 
+          Cross Join: 
+            TableScan: person
+            TableScan: orders
+          TableScan: lineitem
+    "
     );
 }
 
@@ -3426,14 +3410,14 @@ fn join_with_aliases() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: peeps.id, folks.first_name
-  Inner Join:  Filter: peeps.id = folks.id
-    SubqueryAlias: peeps
-      TableScan: person
-    SubqueryAlias: folks
-      TableScan: person
-"#
+        @r"
+    Projection: peeps.id, folks.first_name
+      Inner Join:  Filter: peeps.id = folks.id
+        SubqueryAlias: peeps
+          TableScan: person
+        SubqueryAlias: folks
+          TableScan: person
+    "
     );
 }
 
@@ -3483,9 +3467,9 @@ fn date_plus_interval_in_projection() {
     assert_snapshot!(
         plan,
         @r#"
-Projection: test.t_date32 + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }")
-  TableScan: test
-"#
+    Projection: test.t_date32 + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }")
+      TableScan: test
+    "#
     );
 }
 
@@ -3499,10 +3483,10 @@ fn date_plus_interval_in_filter() {
     assert_snapshot!(
         plan,
         @r#"
-Projection: test.t_date64
-  Filter: test.t_date64 BETWEEN CAST(Utf8("1999-12-31") AS Date32) AND CAST(Utf8("1999-12-31") AS Date32) + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 30, nanoseconds: 0 }")
-    TableScan: test
-"#
+    Projection: test.t_date64
+      Filter: test.t_date64 BETWEEN CAST(Utf8("1999-12-31") AS Date32) AND CAST(Utf8("1999-12-31") AS Date32) + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 30, nanoseconds: 0 }")
+        TableScan: test
+    "#
     );
 }
 
@@ -3515,16 +3499,16 @@ fn exists_subquery() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: p.id
-  Filter: EXISTS (<subquery>)
-    Subquery:
-      Projection: person.first_name
-        Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
+        @r"
+    Projection: p.id
+      Filter: EXISTS (<subquery>)
+        Subquery:
+          Projection: person.first_name
+            Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
+              TableScan: person
+        SubqueryAlias: p
           TableScan: person
-    SubqueryAlias: p
-      TableScan: person
-"#
+    "
     );
 }
 
@@ -3540,21 +3524,21 @@ fn exists_subquery_schema_outer_schema_overlap() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id
-  Filter: person.id = p.id AND EXISTS (<subquery>)
-    Subquery:
-      Projection: person.first_name
-        Filter: person.id = p2.id AND person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
-          Cross Join: 
+        @r"
+    Projection: person.id
+      Filter: person.id = p.id AND EXISTS (<subquery>)
+        Subquery:
+          Projection: person.first_name
+            Filter: person.id = p2.id AND person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
+              Cross Join: 
+                TableScan: person
+                SubqueryAlias: p2
+                  TableScan: person
+        Cross Join: 
+          TableScan: person
+          SubqueryAlias: p
             TableScan: person
-            SubqueryAlias: p2
-              TableScan: person
-    Cross Join: 
-      TableScan: person
-      SubqueryAlias: p
-        TableScan: person
-"#
+    "
     );
 }
 
@@ -3565,15 +3549,15 @@ fn in_subquery_uncorrelated() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: p.id
-  Filter: p.id IN (<subquery>)
-    Subquery:
-      Projection: person.id
-        TableScan: person
-    SubqueryAlias: p
-      TableScan: person
-"#
+        @r"
+    Projection: p.id
+      Filter: p.id IN (<subquery>)
+        Subquery:
+          Projection: person.id
+            TableScan: person
+        SubqueryAlias: p
+          TableScan: person
+    "
     );
 }
 
@@ -3585,35 +3569,34 @@ fn not_in_subquery_correlated() {
     assert_snapshot!(
         plan,
         @r#"
-Projection: p.id
-  Filter: p.id NOT IN (<subquery>)
-    Subquery:
-      Projection: person.id
-        Filter: person.last_name = outer_ref(p.last_name) AND person.state = Utf8("CO")
+    Projection: p.id
+      Filter: p.id NOT IN (<subquery>)
+        Subquery:
+          Projection: person.id
+            Filter: person.last_name = outer_ref(p.last_name) AND person.state = Utf8("CO")
+              TableScan: person
+        SubqueryAlias: p
           TableScan: person
-    SubqueryAlias: p
-      TableScan: person
-"#
+    "#
     );
 }
 
 #[test]
 fn scalar_subquery() {
-    let sql =
-        "SELECT p.id, (SELECT MAX(id) FROM person WHERE last_name = p.last_name) FROM person p";
+    let sql = "SELECT p.id, (SELECT MAX(id) FROM person WHERE last_name = p.last_name) FROM person p";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: p.id, (<subquery>)
-  Subquery:
-    Projection: max(person.id)
-      Aggregate: groupBy=[[]], aggr=[[max(person.id)]]
-        Filter: person.last_name = outer_ref(p.last_name)
-          TableScan: person
-  SubqueryAlias: p
-    TableScan: person
-"#
+        @r"
+    Projection: p.id, (<subquery>)
+      Subquery:
+        Projection: max(person.id)
+          Aggregate: groupBy=[[]], aggr=[[max(person.id)]]
+            Filter: person.last_name = outer_ref(p.last_name)
+              TableScan: person
+      SubqueryAlias: p
+        TableScan: person
+    "
     );
 }
 
@@ -3629,20 +3612,20 @@ fn scalar_subquery_reference_outer_field() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: j1.j1_string, j2.j2_string
-  Filter: j1.j1_id = j2.j2_id - Int64(1) AND j2.j2_id < (<subquery>)
-    Subquery:
-      Projection: count(*)
-        Aggregate: groupBy=[[]], aggr=[[count(*)]]
-          Filter: outer_ref(j2.j2_id) = j1.j1_id AND j1.j1_id = j3.j3_id
-            Cross Join: 
-              TableScan: j1
-              TableScan: j3
-    Cross Join: 
-      TableScan: j1
-      TableScan: j2
-"#
+        @r"
+    Projection: j1.j1_string, j2.j2_string
+      Filter: j1.j1_id = j2.j2_id - Int64(1) AND j2.j2_id < (<subquery>)
+        Subquery:
+          Projection: count(*)
+            Aggregate: groupBy=[[]], aggr=[[count(*)]]
+              Filter: outer_ref(j2.j2_id) = j1.j1_id AND j1.j1_id = j3.j3_id
+                Cross Join: 
+                  TableScan: j1
+                  TableScan: j3
+        Cross Join: 
+          TableScan: j1
+          TableScan: j2
+    "
     );
 }
 
@@ -3653,11 +3636,11 @@ fn aggregate_with_rollup() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.state, person.age, count(*)
-  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[count(*)]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.state, person.age, count(*)
+      Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[count(*)]]
+        TableScan: person
+    "
     );
 }
 
@@ -3668,11 +3651,11 @@ fn aggregate_with_rollup_with_grouping() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.state, person.age, grouping(person.state), grouping(person.age), grouping(person.state) + grouping(person.age), count(*)
-  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[grouping(person.state), grouping(person.age), count(*)]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.state, person.age, grouping(person.state), grouping(person.age), grouping(person.state) + grouping(person.age), count(*)
+      Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[grouping(person.state), grouping(person.age), count(*)]]
+        TableScan: person
+    "
     );
 }
 
@@ -3694,12 +3677,12 @@ fn rank_partition_grouping() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank
-  WindowAggr: windowExpr=[[rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-    Aggregate: groupBy=[[ROLLUP (person.state, person.last_name)]], aggr=[[sum(person.age), grouping(person.state), grouping(person.last_name)]]
-      TableScan: person
-"#
+        @r"
+    Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank
+      WindowAggr: windowExpr=[[rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        Aggregate: groupBy=[[ROLLUP (person.state, person.last_name)]], aggr=[[sum(person.age), grouping(person.state), grouping(person.last_name)]]
+          TableScan: person
+    "
     );
 }
 
@@ -3710,11 +3693,11 @@ fn aggregate_with_cube() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.state, person.age, count(*)
-  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.age), (person.id, person.state, person.age))]], aggr=[[count(*)]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.state, person.age, count(*)
+      Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.age), (person.id, person.state, person.age))]], aggr=[[count(*)]]
+        TableScan: person
+    "
     );
 }
 
@@ -3724,10 +3707,10 @@ fn round_decimal() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: round(test_decimal.price / Int64(3), Int64(2))
-  TableScan: test_decimal
-"#
+        @r"
+    Projection: round(test_decimal.price / Int64(3), Int64(2))
+      TableScan: test_decimal
+    "
     );
 }
 
@@ -3737,11 +3720,11 @@ fn aggregate_with_grouping_sets() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.state, person.age, count(*)
-  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.state), (person.id, person.state, person.age), (person.id, person.id, person.state))]], aggr=[[count(*)]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.state, person.age, count(*)
+      Aggregate: groupBy=[[GROUPING SETS ((person.id, person.state), (person.id, person.state, person.age), (person.id, person.id, person.state))]], aggr=[[count(*)]]
+        TableScan: person
+    "
     );
 }
 
@@ -3753,12 +3736,12 @@ fn join_on_disjunction_condition() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id OR person.age > Int64(30)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id OR person.age > Int64(30)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -3771,11 +3754,11 @@ fn join_on_complex_condition() {
     assert_snapshot!(
         plan,
         @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id AND (person.age > Int64(30) OR person.last_name = Utf8("X"))
-    TableScan: person
-    TableScan: orders
-"#
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id AND (person.age > Int64(30) OR person.last_name = Utf8("X"))
+        TableScan: person
+        TableScan: orders
+    "#
     );
 }
 
@@ -3787,11 +3770,11 @@ fn hive_aggregate_with_filter() -> Result<()> {
 
     assert_snapshot!(
         plan,
-        @r##"
-        Projection: sum(person.age) FILTER (WHERE person.age > Int64(4))
-          Aggregate: groupBy=[[]], aggr=[[sum(person.age) FILTER (WHERE person.age > Int64(4))]]
-            TableScan: person
-        "##
+        @r"
+    Projection: sum(person.age) FILTER (WHERE person.age > Int64(4))
+      Aggregate: groupBy=[[]], aggr=[[sum(person.age) FILTER (WHERE person.age > Int64(4))]]
+        TableScan: person
+    "
     );
 
     Ok(())
@@ -3807,14 +3790,13 @@ fn order_by_unaliased_name() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: z, q
-  Sort: p.state ASC NULLS LAST
-    Projection: p.state AS z, sum(p.age) AS q, p.state
-      Aggregate: groupBy=[[p.state]], aggr=[[sum(p.age)]]
-        SubqueryAlias: p
-          TableScan: person
-"#
+        @r"
+    Sort: z ASC NULLS LAST
+      Projection: p.state AS z, sum(p.age) AS q
+        Aggregate: groupBy=[[p.state]], aggr=[[sum(p.age)]]
+          SubqueryAlias: p
+            TableScan: person
+    "
     );
 }
 
@@ -3825,9 +3807,7 @@ fn order_by_ambiguous_name() {
 
     assert_snapshot!(
         err,
-        @r###"
-        Schema error: Ambiguous reference to unqualified field age
-        "###
+        @"Schema error: Ambiguous reference to unqualified field age"
     );
 }
 
@@ -3838,9 +3818,7 @@ fn group_by_ambiguous_name() {
 
     assert_snapshot!(
         err,
-        @r###"
-        Schema error: Ambiguous reference to unqualified field age
-        "###
+        @"Schema error: Ambiguous reference to unqualified field age"
     );
 }
 
@@ -3850,24 +3828,24 @@ fn test_zero_offset_with_limit() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Limit: skip=0, fetch=5
-  Projection: person.id
-    Filter: person.id > Int64(100)
-      TableScan: person
-"#
+        @r"
+    Limit: skip=0, fetch=5
+      Projection: person.id
+        Filter: person.id > Int64(100)
+          TableScan: person
+    "
     );
     // Flip the order of LIMIT and OFFSET in the query. Plan should remain the same.
     let sql = "SELECT id FROM person WHERE person.id > 100 OFFSET 0 LIMIT 5;";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Limit: skip=0, fetch=5
-  Projection: person.id
-    Filter: person.id > Int64(100)
-      TableScan: person
-"#
+        @r"
+    Limit: skip=0, fetch=5
+      Projection: person.id
+        Filter: person.id > Int64(100)
+          TableScan: person
+    "
     );
 }
 
@@ -3877,12 +3855,12 @@ fn test_offset_no_limit() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Limit: skip=5, fetch=None
-  Projection: person.id
-    Filter: person.id > Int64(100)
-      TableScan: person
-"#
+        @r"
+    Limit: skip=5, fetch=None
+      Projection: person.id
+        Filter: person.id > Int64(100)
+          TableScan: person
+    "
     );
 }
 
@@ -3892,27 +3870,34 @@ fn test_offset_after_limit() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Limit: skip=3, fetch=5
-  Projection: person.id
-    Filter: person.id > Int64(100)
-      TableScan: person
-"#
+        @r"
+    Limit: skip=3, fetch=5
+      Projection: person.id
+        Filter: person.id > Int64(100)
+          TableScan: person
+    "
     );
 }
 
+#[test]
+fn fetch_clause_is_not_supported() {
+    let sql = "SELECT 1 FETCH NEXT 1 ROW ONLY";
+    let err = logical_plan(sql).unwrap_err();
+    assert_contains!(err.to_string(), "FETCH clause is not supported yet");
+}
+
 #[test]
 fn test_offset_before_limit() {
     let sql = "select id from person where person.id > 100 OFFSET 3 LIMIT 5;";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Limit: skip=3, fetch=5
-  Projection: person.id
-    Filter: person.id > Int64(100)
-      TableScan: person
-"#
+        @r"
+    Limit: skip=3, fetch=5
+      Projection: person.id
+        Filter: person.id > Int64(100)
+          TableScan: person
+    "
     );
 }
 
@@ -3922,11 +3907,11 @@ fn test_distribute_by() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Repartition: DistributeBy(person.state)
-  Projection: person.id
-    TableScan: person
-"#
+        @r"
+    Repartition: DistributeBy(person.state)
+      Projection: person.id
+        TableScan: person
+    "
     );
 }
 
@@ -3958,12 +3943,12 @@ fn test_constant_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -3976,12 +3961,12 @@ fn test_right_left_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -3994,12 +3979,12 @@ fn test_single_column_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id + Int64(10) = orders.customer_id * Int64(2)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id + Int64(10) = orders.customer_id * Int64(2)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4012,12 +3997,12 @@ fn test_multiple_column_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id * Int64(2) - orders.price
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id * Int64(2) - orders.price
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4030,12 +4015,12 @@ fn test_left_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4048,12 +4033,12 @@ fn test_right_expr_eq_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id * Int64(2) - orders.price
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id * Int64(2) - orders.price
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4066,12 +4051,12 @@ fn test_noneq_with_filter_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.first_name
-  Inner Join:  Filter: person.age > Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.first_name
+      Inner Join:  Filter: person.age > Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
     // left join
     let sql = "SELECT person.id, person.first_name \
@@ -4080,12 +4065,12 @@ Projection: person.id, person.first_name
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.first_name
-  Left Join:  Filter: person.age > Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.first_name
+      Left Join:  Filter: person.age > Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
     // right join
     let sql = "SELECT person.id, person.first_name \
@@ -4094,12 +4079,12 @@ Projection: person.id, person.first_name
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.first_name
-  Right Join:  Filter: person.age > Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.first_name
+      Right Join:  Filter: person.age > Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
     // full join
     let sql = "SELECT person.id, person.first_name \
@@ -4108,12 +4093,12 @@ Projection: person.id, person.first_name
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.first_name
-  Full Join:  Filter: person.age > Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.first_name
+      Full Join:  Filter: person.age > Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4128,12 +4113,12 @@ fn test_one_side_constant_full_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, orders.order_id
-  Full Join:  Filter: person.id = Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, orders.order_id
+      Full Join:  Filter: person.id = Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4146,12 +4131,12 @@ fn test_select_join_key_inner_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.customer_id * Int64(2), person.id + Int64(10)
-  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.customer_id * Int64(2), person.id + Int64(10)
+      Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4162,11 +4147,11 @@ fn test_select_order_by() {
     assert_snapshot!(
         plan,
         @r#"
-Projection: Utf8("1")
-  Sort: person.id ASC NULLS LAST
-    Projection: Utf8("1"), person.id
-      TableScan: person
-"#
+    Projection: Utf8("1")
+      Sort: person.id ASC NULLS LAST
+        Projection: Utf8("1"), person.id
+          TableScan: person
+    "#
     );
 }
 
@@ -4181,9 +4166,7 @@ fn test_select_distinct_order_by() {
 
     assert_snapshot!(
         err,
-        @r###"
-        Error during planning: For SELECT DISTINCT, ORDER BY expressions person.id must appear in select list
-        "###
+        @"Error during planning: For SELECT DISTINCT, ORDER BY expressions person.id must appear in select list"
     );
 }
 
@@ -4193,12 +4176,12 @@ fn test_select_qualify_basic() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
-  Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1)
-    WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      TableScan: person
-"#
+        @r"
+    Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
+      Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1)
+        WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: person
+    "
     );
 }
 
@@ -4279,13 +4262,13 @@ fn test_select_qualify_complex_condition() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn, rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rank
-  Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(2) AND rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(5)
-    WindowAggr: windowExpr=[[rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-      WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-        TableScan: person
-"#
+        @r"
+    Projection: person.id, person.age, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn, rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rank
+      Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(2) AND rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(5)
+        WindowAggr: windowExpr=[[rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: person
+    "
     );
 }
 
@@ -4314,17 +4297,16 @@ fn test_select_unsupported_syntax_errors(#[case] sql: &str, #[case] error: &str)
 
 #[test]
 fn select_order_by_with_cast() {
-    let sql =
-        "SELECT first_name AS first_name FROM (SELECT first_name AS first_name FROM person) ORDER BY CAST(first_name as INT)";
+    let sql = "SELECT first_name AS first_name FROM (SELECT first_name AS first_name FROM person) ORDER BY CAST(first_name as INT)";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Sort: CAST(person.first_name AS Int32) ASC NULLS LAST
-  Projection: person.first_name
-    Projection: person.first_name
-      TableScan: person
-"#
+        @r"
+    Sort: CAST(person.first_name AS Int32) ASC NULLS LAST
+      Projection: person.first_name
+        Projection: person.first_name
+          TableScan: person
+    "
     );
 }
 
@@ -4349,12 +4331,12 @@ fn test_duplicated_left_join_key_inner_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age
-  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id * Int64(2) = orders.order_id
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.age
+      Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id * Int64(2) = orders.order_id
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4368,12 +4350,12 @@ fn test_duplicated_right_join_key_inner_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age
-  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id = orders.customer_id + Int64(10)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.age
+      Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id = orders.customer_id + Int64(10)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4391,9 +4373,7 @@ fn test_ambiguous_column_references_in_on_join() {
 
     assert_snapshot!(
         err,
-        @r###"
-        Schema error: Ambiguous reference to unqualified field id
-        "###
+        @"Schema error: Ambiguous reference to unqualified field id"
     );
 }
 
@@ -4406,14 +4386,14 @@ fn test_ambiguous_column_references_with_in_using_join() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: p1.id, p1.age, p2.id
-  Inner Join: Using p1.id = p2.id
-    SubqueryAlias: p1
-      TableScan: person
-    SubqueryAlias: p2
-      TableScan: person
-"#
+        @r"
+    Projection: p1.id, p1.age, p2.id
+      Inner Join: Using p1.id = p2.id
+        SubqueryAlias: p1
+          TableScan: person
+        SubqueryAlias: p2
+          TableScan: person
+    "
     );
 }
 
@@ -4426,12 +4406,12 @@ fn test_inner_join_with_cast_key() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age
-  Inner Join:  Filter: CAST(person.id AS Int32) = CAST(orders.customer_id AS Int32)
-    TableScan: person
-    TableScan: orders
-"#
+        @r"
+    Projection: person.id, person.age
+      Inner Join:  Filter: CAST(person.id AS Int32) = CAST(orders.customer_id AS Int32)
+        TableScan: person
+        TableScan: orders
+    "
     );
 }
 
@@ -4445,11 +4425,11 @@ fn test_multi_grouping_sets() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age
-  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age))]], aggr=[[]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.age
+      Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age))]], aggr=[[]]
+        TableScan: person
+    "
     );
     let sql = "SELECT person.id, person.age
             FROM person
@@ -4460,11 +4440,11 @@ Projection: person.id, person.age
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: person.id, person.age
-  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age, person.salary, person.state), (person.id, person.age, person.salary, person.state, person.birth_date), (person.id, person.age), (person.id, person.age, person.state), (person.id, person.age, person.state, person.birth_date))]], aggr=[[]]
-    TableScan: person
-"#
+        @r"
+    Projection: person.id, person.age
+      Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age, person.salary, person.state), (person.id, person.age, person.salary, person.state, person.birth_date), (person.id, person.age), (person.id, person.age, person.state), (person.id, person.age, person.state, person.birth_date))]], aggr=[[]]
+        TableScan: person
+    "
     );
 }
 
@@ -4477,9 +4457,7 @@ fn test_field_not_found_window_function() {
 
     assert_snapshot!(
         order_by_err,
-        @r###"
-        Schema error: No field named a.
-        "###
+        @"Schema error: No field named a."
     );
 
     let partition_by_sql = "SELECT count() OVER (PARTITION BY a);";
@@ -4489,20 +4467,18 @@ fn test_field_not_found_window_function() {
 
     assert_snapshot!(
         partition_by_err,
-        @r###"
-        Schema error: No field named a.
-        "###
+        @"Schema error: No field named a."
     );
 
     let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from orders";
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: orders
-"#
+        @r"
+    Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+      WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+        TableScan: orders
+    "
     );
 }
 
@@ -4532,16 +4508,17 @@ fn test_parse_escaped_string_literal_value() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @"Projection: character_length(Utf8(\"%\")) AS len, Utf8(\"K\") AS hex, Utf8(\"\u{1}\") AS unicode\n  EmptyRelation: rows=1"
+        @r#"
+    Projection: character_length(Utf8("%")) AS len, Utf8("K") AS hex, Utf8("") AS unicode
+      EmptyRelation: rows=1
+    "#
     );
 
     let sql = r"SELECT character_length(E'\000') AS len";
 
     assert_snapshot!(
         logical_plan(sql).unwrap_err(),
-        @r###"
-        SQL error: TokenizerError("Unterminated encoded string literal at Line: 1, Column: 25")
-        "###
+        @r#"SQL error: TokenizerError("Unterminated encoded string literal at Line: 1, Column: 25")"#
     );
 }
 
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index 177761e4af54e..f148892eca8fb 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -45,7 +45,7 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.5.50", features = ["derive", "env"] }
+clap = { version = "4.5.53", features = ["derive", "env"] }
 datafusion = { workspace = true, default-features = true, features = ["avro"] }
 datafusion-spark = { workspace = true, default-features = true }
 datafusion-substrait = { workspace = true, default-features = true }
diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md
index a389ae1ef60e2..8768deee3d87e 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -142,6 +142,17 @@ select substr('Andrew Lamb', 1, 6), '|'
 Andrew |
 ```
 
+## Cookbook: Ignoring volatile output
+
+Sometimes parts of a result change every run (timestamps, counters, etc.). To keep the rest of the snapshot checked in, replace those fragments with the `<slt:ignore>` marker inside the expected block. During validation the marker acts like a wildcard, so only the surrounding text must match.
+
+```text
+query TT
+EXPLAIN ANALYZE SELECT * FROM generate_series(100);
+----
+Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>]
+```
+
 # Reference
 
 ## Running tests: Validation Mode
diff --git a/datafusion/sqllogictest/bin/postgres_container.rs b/datafusion/sqllogictest/bin/postgres_container.rs
index 411562a7ccc74..e5addf5408748 100644
--- a/datafusion/sqllogictest/bin/postgres_container.rs
+++ b/datafusion/sqllogictest/bin/postgres_container.rs
@@ -16,19 +16,19 @@
 // under the License.
 
 use crate::Options;
+use ContainerCommands::{FetchHost, FetchPort};
 use datafusion::common::Result;
 use log::info;
 use std::env::set_var;
 use std::future::Future;
 use std::sync::LazyLock;
 use std::{env, thread};
+use testcontainers::ImageExt;
 use testcontainers::core::IntoContainerPort;
 use testcontainers::runners::AsyncRunner;
-use testcontainers::ImageExt;
 use testcontainers_modules::postgres;
 use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
-use tokio::sync::{mpsc, Mutex};
-use ContainerCommands::{FetchHost, FetchPort};
+use tokio::sync::{Mutex, mpsc};
 
 #[derive(Debug)]
 pub enum ContainerCommands {
@@ -86,7 +86,9 @@ pub async fn initialize_postgres_container(options: &Options) -> Result<()> {
         let pg_uri = format!("postgresql://postgres:postgres@{db_host}:{db_port}/test");
         info!("Postgres uri is {pg_uri}");
 
-        set_var("PG_URI", pg_uri);
+        unsafe {
+            set_var("PG_URI", pg_uri);
+        }
     } else {
         // close receiver
         POSTGRES_IN.rx.lock().await.close();
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index 7aca0fdd6e8d4..8037532c09ac3 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -18,11 +18,11 @@
 use clap::Parser;
 use datafusion::common::instant::Instant;
 use datafusion::common::utils::get_available_parallelism;
-use datafusion::common::{exec_datafusion_err, exec_err, DataFusionError, Result};
+use datafusion::common::{DataFusionError, Result, exec_datafusion_err, exec_err};
 use datafusion_sqllogictest::{
-    df_value_validator, read_dir_recursive, setup_scratch_dir, should_skip_file,
-    should_skip_record, value_normalizer, DataFusion, DataFusionSubstraitRoundTrip,
-    Filter, TestContext,
+    CurrentlyExecutingSqlTracker, DataFusion, DataFusionSubstraitRoundTrip, Filter,
+    TestContext, df_value_validator, read_dir_recursive, setup_scratch_dir,
+    should_skip_file, should_skip_record, value_normalizer,
 };
 use futures::stream::StreamExt;
 use indicatif::{
@@ -32,8 +32,8 @@ use itertools::Itertools;
 use log::Level::Info;
 use log::{info, log_enabled};
 use sqllogictest::{
-    parse_file, strict_column_validator, AsyncDB, Condition, MakeConnection, Normalizer,
-    Record, Validator,
+    AsyncDB, Condition, MakeConnection, Normalizer, Record, Validator, parse_file,
+    strict_column_validator,
 };
 
 #[cfg(feature = "postgres")]
@@ -41,6 +41,7 @@ use crate::postgres_container::{
     initialize_postgres_container, terminate_postgres_container,
 };
 use datafusion::common::runtime::SpawnedTask;
+use futures::FutureExt;
 use std::ffi::OsStr;
 use std::fs;
 use std::path::{Path, PathBuf};
@@ -134,7 +135,9 @@ async fn run_tests() -> Result<()> {
             eprintln!("  {error}");
         }
 
-        eprintln!("\nTemporary file check failed. Please ensure that within each test file, any scratch file created is placed under a folder with the same name as the test file (without extension).\nExample: inside `join.slt`, temporary files must be created under `.../scratch/join/`\n");
+        eprintln!(
+            "\nTemporary file check failed. Please ensure that within each test file, any scratch file created is placed under a folder with the same name as the test file (without extension).\nExample: inside `join.slt`, temporary files must be created under `.../scratch/join/`\n"
+        );
 
         return exec_err!("sqllogictests scratch file check failed");
     }
@@ -154,6 +157,11 @@ async fn run_tests() -> Result<()> {
             let m_style_clone = m_style.clone();
             let filters = options.filters.clone();
 
+            let relative_path = test_file.relative_path.clone();
+
+            let currently_running_sql_tracker = CurrentlyExecutingSqlTracker::new();
+            let currently_running_sql_tracker_clone =
+                currently_running_sql_tracker.clone();
             SpawnedTask::spawn(async move {
                 match (
                     options.postgres_runner,
@@ -167,6 +175,7 @@ async fn run_tests() -> Result<()> {
                             m_clone,
                             m_style_clone,
                             filters.as_ref(),
+                            currently_running_sql_tracker_clone,
                         )
                         .await?
                     }
@@ -177,12 +186,19 @@ async fn run_tests() -> Result<()> {
                             m_clone,
                             m_style_clone,
                             filters.as_ref(),
+                            currently_running_sql_tracker_clone,
                         )
                         .await?
                     }
                     (false, true, _) => {
-                        run_complete_file(test_file, validator, m_clone, m_style_clone)
-                            .await?
+                        run_complete_file(
+                            test_file,
+                            validator,
+                            m_clone,
+                            m_style_clone,
+                            currently_running_sql_tracker_clone,
+                        )
+                        .await?
                     }
                     (true, false, _) => {
                         run_test_file_with_postgres(
@@ -191,6 +207,7 @@ async fn run_tests() -> Result<()> {
                             m_clone,
                             m_style_clone,
                             filters.as_ref(),
+                            currently_running_sql_tracker_clone,
                         )
                         .await?
                     }
@@ -200,6 +217,7 @@ async fn run_tests() -> Result<()> {
                             validator,
                             m_clone,
                             m_style_clone,
+                            currently_running_sql_tracker_clone,
                         )
                         .await?
                     }
@@ -207,14 +225,42 @@ async fn run_tests() -> Result<()> {
                 Ok(()) as Result<()>
             })
             .join()
+            .map(move |result| (result, relative_path, currently_running_sql_tracker))
         })
         // run up to num_cpus streams in parallel
         .buffer_unordered(options.test_threads)
-        .flat_map(|result| {
+        .flat_map(|(result, test_file_path, current_sql)| {
             // Filter out any Ok() leaving only the DataFusionErrors
             futures::stream::iter(match result {
                 // Tokio panic error
-                Err(e) => Some(DataFusionError::External(Box::new(e))),
+                Err(e) => {
+                    let error = DataFusionError::External(Box::new(e));
+                    let current_sql = current_sql.get_currently_running_sqls();
+
+                    if current_sql.is_empty() {
+                        Some(error.context(format!(
+                            "failure in {} with no currently running sql tracked",
+                            test_file_path.display()
+                        )))
+                    } else if current_sql.len() == 1 {
+                        let sql = &current_sql[0];
+                        Some(error.context(format!(
+                            "failure in {} for sql {sql}",
+                            test_file_path.display()
+                        )))
+                    } else {
+                        let sqls = current_sql
+                            .iter()
+                            .enumerate()
+                            .map(|(i, sql)| format!("\n[{}]: {}", i + 1, sql))
+                            .collect::<String>();
+                        Some(error.context(format!(
+                            "failure in {} for multiple currently running sqls: {}",
+                            test_file_path.display(),
+                            sqls
+                        )))
+                    }
+                }
                 Ok(thread_result) => thread_result.err(),
             })
         })
@@ -247,6 +293,7 @@ async fn run_test_file_substrait_round_trip(
     mp: MultiProgress,
     mp_style: ProgressStyle,
     filters: &[Filter],
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     let TestFile {
         path,
@@ -269,7 +316,8 @@ async fn run_test_file_substrait_round_trip(
             test_ctx.session_ctx().clone(),
             relative_path.clone(),
             pb.clone(),
-        ))
+        )
+        .with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone()))
     });
     runner.add_label("DatafusionSubstraitRoundTrip");
     runner.with_column_validator(strict_column_validator);
@@ -286,6 +334,7 @@ async fn run_test_file(
     mp: MultiProgress,
     mp_style: ProgressStyle,
     filters: &[Filter],
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     let TestFile {
         path,
@@ -308,7 +357,8 @@ async fn run_test_file(
             test_ctx.session_ctx().clone(),
             relative_path.clone(),
             pb.clone(),
-        ))
+        )
+        .with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone()))
     });
     runner.add_label("Datafusion");
     runner.with_column_validator(strict_column_validator);
@@ -359,6 +409,7 @@ async fn run_file_in_runner<D: AsyncDB, M: MakeConnection<Conn = D>>(
     Ok(())
 }
 
+#[expect(clippy::needless_pass_by_value)]
 fn get_record_count(path: &PathBuf, label: String) -> u64 {
     let records: Vec<Record<<DataFusion as AsyncDB>::ColumnType>> =
         parse_file(path).unwrap();
@@ -402,6 +453,7 @@ async fn run_test_file_with_postgres(
     mp: MultiProgress,
     mp_style: ProgressStyle,
     filters: &[Filter],
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     use datafusion_sqllogictest::Postgres;
     let TestFile {
@@ -417,7 +469,11 @@ async fn run_test_file_with_postgres(
     pb.set_message(format!("{:?}", &relative_path));
 
     let mut runner = sqllogictest::Runner::new(|| {
-        Postgres::connect(relative_path.clone(), pb.clone())
+        Postgres::connect_with_tracked_sql(
+            relative_path.clone(),
+            pb.clone(),
+            currently_executing_sql_tracker.clone(),
+        )
     });
     runner.add_label("postgres");
     runner.with_column_validator(strict_column_validator);
@@ -435,6 +491,7 @@ async fn run_test_file_with_postgres(
     _mp: MultiProgress,
     _mp_style: ProgressStyle,
     _filters: &[Filter],
+    _currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     use datafusion::common::plan_err;
     plan_err!("Can not run with postgres as postgres feature is not enabled")
@@ -445,6 +502,7 @@ async fn run_complete_file(
     validator: Validator,
     mp: MultiProgress,
     mp_style: ProgressStyle,
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     let TestFile {
         path,
@@ -470,7 +528,8 @@ async fn run_complete_file(
             test_ctx.session_ctx().clone(),
             relative_path.clone(),
             pb.clone(),
-        ))
+        )
+        .with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone()))
     });
 
     let col_separator = " ";
@@ -497,6 +556,7 @@ async fn run_complete_file_with_postgres(
     validator: Validator,
     mp: MultiProgress,
     mp_style: ProgressStyle,
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     use datafusion_sqllogictest::Postgres;
     let TestFile {
@@ -516,7 +576,11 @@ async fn run_complete_file_with_postgres(
     pb.set_message(format!("{:?}", &relative_path));
 
     let mut runner = sqllogictest::Runner::new(|| {
-        Postgres::connect(relative_path.clone(), pb.clone())
+        Postgres::connect_with_tracked_sql(
+            relative_path.clone(),
+            pb.clone(),
+            currently_executing_sql_tracker.clone(),
+        )
     });
     runner.add_label("postgres");
     runner.with_column_validator(strict_column_validator);
@@ -547,6 +611,7 @@ async fn run_complete_file_with_postgres(
     _validator: Validator,
     _mp: MultiProgress,
     _mp_style: ProgressStyle,
+    _currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 ) -> Result<()> {
     use datafusion::common::plan_err;
     plan_err!("Can not run with postgres as postgres feature is not enabled")
@@ -794,18 +859,18 @@ fn scratch_file_check(test_files: &[TestFile]) -> Result<Vec<String>> {
         let lines: Vec<&str> = content.lines().collect();
 
         for (line_num, line) in lines.iter().enumerate() {
-            if let Some(captures) = scratch_pattern.captures(line) {
-                if let Some(found_target) = captures.get(1) {
-                    let found_target = found_target.as_str();
-                    if found_target != expected_target {
-                        errors.push(format!(
-                            "File {}:{}: scratch target '{}' does not match file name '{}'",
-                            test_file.path.display(),
-                            line_num + 1,
-                            found_target,
-                            expected_target
-                        ));
-                    }
+            if let Some(captures) = scratch_pattern.captures(line)
+                && let Some(found_target) = captures.get(1)
+            {
+                let found_target = found_target.as_str();
+                if found_target != expected_target {
+                    errors.push(format!(
+                        "File {}:{}: scratch target '{}' does not match file name '{}'",
+                        test_file.path.display(),
+                        line_num + 1,
+                        found_target,
+                        expected_target
+                    ));
                 }
             }
         }
diff --git a/datafusion/sqllogictest/src/engines/conversion.rs b/datafusion/sqllogictest/src/engines/conversion.rs
index de3acbee93b1a..633029a2def29 100644
--- a/datafusion/sqllogictest/src/engines/conversion.rs
+++ b/datafusion/sqllogictest/src/engines/conversion.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::{i256, Decimal128Type, Decimal256Type, DecimalType};
+use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType, i256};
 use bigdecimal::BigDecimal;
 use half::f16;
 use rust_decimal::prelude::*;
@@ -122,6 +122,7 @@ pub(crate) fn decimal_to_str(value: Decimal) -> String {
 /// Converts a `BigDecimal` to its plain string representation, optionally rounding to a specified number of decimal places.
 ///
 /// If `round_digits` is `None`, the value is rounded to 12 decimal places by default.
+#[expect(clippy::needless_pass_by_value)]
 pub(crate) fn big_decimal_to_str(value: BigDecimal, round_digits: Option<i64>) -> String {
     // Round the value to limit the number of decimal places
     let value = value.round(round_digits.unwrap_or(12)).normalized();
@@ -132,7 +133,7 @@ pub(crate) fn big_decimal_to_str(value: BigDecimal, round_digits: Option<i64>) -
 #[cfg(test)]
 mod tests {
     use super::big_decimal_to_str;
-    use bigdecimal::{num_bigint::BigInt, BigDecimal};
+    use bigdecimal::{BigDecimal, num_bigint::BigInt};
 
     macro_rules! assert_decimal_str_eq {
         ($integer:expr, $scale:expr, $round_digits:expr, $expected:expr) => {
diff --git a/datafusion/sqllogictest/src/engines/currently_executed_sql.rs b/datafusion/sqllogictest/src/engines/currently_executed_sql.rs
new file mode 100644
index 0000000000000..5b1979b4ee9a9
--- /dev/null
+++ b/datafusion/sqllogictest/src/engines/currently_executed_sql.rs
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashMap;
+use std::sync::atomic::AtomicUsize;
+use std::sync::{Arc, Mutex};
+
+/// Hold the currently executed SQL statements.
+/// This is used to save the currently running SQLs in case of a crash.
+#[derive(Clone)]
+pub struct CurrentlyExecutingSqlTracker {
+    /// The index of the SQL statement.
+    /// Used to uniquely identify each SQL statement even if they are the same.
+    sql_index: Arc<AtomicUsize>,
+    /// Lock to store the currently executed SQL statement.
+    /// It DOES NOT hold the lock for the duration of query execution and only execute the lock
+    /// when updating the currently executed SQL statement to allow for saving the last executed SQL
+    /// in case of a crash.
+    currently_executed_sqls: Arc<Mutex<HashMap<usize, String>>>,
+}
+
+impl Default for CurrentlyExecutingSqlTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CurrentlyExecutingSqlTracker {
+    pub fn new() -> Self {
+        Self {
+            sql_index: Arc::new(AtomicUsize::new(0)),
+            currently_executed_sqls: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    /// Set the currently executed SQL statement.
+    ///
+    /// Returns a key to use to remove the SQL statement when done.
+    ///
+    /// We are not returning a guard that will automatically remove the SQL statement when dropped.
+    /// as on panic the drop can be called, and it will remove the SQL statement before we can log it.
+    #[must_use = "The returned index must be used to remove the SQL statement when done."]
+    pub fn set_sql(&self, sql: impl Into<String>) -> usize {
+        let index = self
+            .sql_index
+            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+        self.currently_executed_sqls
+            .lock()
+            .unwrap_or_else(|e| e.into_inner())
+            .insert(index, sql.into());
+        index
+    }
+
+    /// Remove the currently executed SQL statement by the provided key that was returned by [`Self::set_sql`].
+    pub fn remove_sql(&self, index: usize) {
+        self.currently_executed_sqls
+            .lock()
+            .unwrap_or_else(|e| e.into_inner())
+            .remove(&index);
+    }
+
+    /// Get the currently executed SQL statements.
+    pub fn get_currently_running_sqls(&self) -> Vec<String> {
+        self.currently_executed_sqls
+            .lock()
+            .unwrap_or_else(|e| e.into_inner())
+            .values()
+            .cloned()
+            .collect()
+    }
+}
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
index 45deefdc9bbdf..df43a9a34ce61 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
@@ -18,7 +18,10 @@
 use std::sync::Arc;
 use std::{path::PathBuf, time::Duration};
 
-use super::{error::Result, normalize, DFSqlLogicTestError};
+use super::{DFSqlLogicTestError, error::Result, normalize};
+use crate::engines::currently_executed_sql::CurrentlyExecutingSqlTracker;
+use crate::engines::output::{DFColumnType, DFOutput};
+use crate::is_spark_path;
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
 use datafusion::physical_plan::common::collect;
@@ -30,13 +33,11 @@ use log::{debug, log_enabled, warn};
 use sqllogictest::DBOutput;
 use tokio::time::Instant;
 
-use crate::engines::output::{DFColumnType, DFOutput};
-use crate::is_spark_path;
-
 pub struct DataFusion {
     ctx: SessionContext,
     relative_path: PathBuf,
     pb: ProgressBar,
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 }
 
 impl DataFusion {
@@ -45,6 +46,20 @@ impl DataFusion {
             ctx,
             relative_path,
             pb,
+            currently_executing_sql_tracker: CurrentlyExecutingSqlTracker::default(),
+        }
+    }
+
+    /// Add a tracker that will track the currently executed SQL statement.
+    ///
+    /// This is useful for logging and debugging purposes.
+    pub fn with_currently_executing_sql_tracker(
+        self,
+        currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
+    ) -> Self {
+        Self {
+            currently_executing_sql_tracker,
+            ..self
         }
     }
 
@@ -79,10 +94,14 @@ impl sqllogictest::AsyncDB for DataFusion {
             );
         }
 
+        let tracked_sql = self.currently_executing_sql_tracker.set_sql(sql);
+
         let start = Instant::now();
         let result = run_query(&self.ctx, is_spark_path(&self.relative_path), sql).await;
         let duration = start.elapsed();
 
+        self.currently_executing_sql_tracker.remove_sql(tracked_sql);
+
         if duration.gt(&Duration::from_millis(500)) {
             self.update_slow_count();
         }
diff --git a/datafusion/sqllogictest/src/engines/datafusion_substrait_roundtrip_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_substrait_roundtrip_engine/runner.rs
index 2df93f0dede33..d4b4377e30875 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_substrait_roundtrip_engine/runner.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_substrait_roundtrip_engine/runner.rs
@@ -18,9 +18,10 @@
 use std::sync::Arc;
 use std::{path::PathBuf, time::Duration};
 
+use crate::engines::currently_executed_sql::CurrentlyExecutingSqlTracker;
 use crate::engines::datafusion_engine::Result;
 use crate::engines::output::{DFColumnType, DFOutput};
-use crate::{convert_batches, convert_schema_to_types, DFSqlLogicTestError};
+use crate::{DFSqlLogicTestError, convert_batches, convert_schema_to_types};
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
 use datafusion::logical_expr::LogicalPlan;
@@ -39,6 +40,7 @@ pub struct DataFusionSubstraitRoundTrip {
     ctx: SessionContext,
     relative_path: PathBuf,
     pb: ProgressBar,
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 }
 
 impl DataFusionSubstraitRoundTrip {
@@ -47,6 +49,20 @@ impl DataFusionSubstraitRoundTrip {
             ctx,
             relative_path,
             pb,
+            currently_executing_sql_tracker: CurrentlyExecutingSqlTracker::default(),
+        }
+    }
+
+    /// Add a tracker that will track the currently executed SQL statement.
+    ///
+    /// This is useful for logging and debugging purposes.
+    pub fn with_currently_executing_sql_tracker(
+        self,
+        currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
+    ) -> Self {
+        Self {
+            currently_executing_sql_tracker,
+            ..self
         }
     }
 
@@ -81,10 +97,14 @@ impl sqllogictest::AsyncDB for DataFusionSubstraitRoundTrip {
             );
         }
 
+        let tracked_sql = self.currently_executing_sql_tracker.set_sql(sql);
+
         let start = Instant::now();
         let result = run_query_substrait_round_trip(&self.ctx, sql).await;
         let duration = start.elapsed();
 
+        self.currently_executing_sql_tracker.remove_sql(tracked_sql);
+
         if duration.gt(&Duration::from_millis(500)) {
             self.update_slow_count();
         }
@@ -132,6 +152,7 @@ async fn run_query_substrait_round_trip(
         | LogicalPlan::Explain(_)
         | LogicalPlan::Dml(_)
         | LogicalPlan::Copy(_)
+        | LogicalPlan::DescribeTable(_)
         | LogicalPlan::Statement(_) => df.logical_plan().clone(),
         // For any other plan, convert to Substrait
         logical_plan => {
diff --git a/datafusion/sqllogictest/src/engines/mod.rs b/datafusion/sqllogictest/src/engines/mod.rs
index ef6335ddbed66..ee2987db07593 100644
--- a/datafusion/sqllogictest/src/engines/mod.rs
+++ b/datafusion/sqllogictest/src/engines/mod.rs
@@ -17,18 +17,21 @@
 
 /// Implementation of sqllogictest for datafusion.
 mod conversion;
+mod currently_executed_sql;
 mod datafusion_engine;
 mod datafusion_substrait_roundtrip_engine;
 mod output;
 
-pub use datafusion_engine::convert_batches;
-pub use datafusion_engine::convert_schema_to_types;
 pub use datafusion_engine::DFSqlLogicTestError;
 pub use datafusion_engine::DataFusion;
+pub use datafusion_engine::convert_batches;
+pub use datafusion_engine::convert_schema_to_types;
 pub use datafusion_substrait_roundtrip_engine::DataFusionSubstraitRoundTrip;
 pub use output::DFColumnType;
 pub use output::DFOutput;
 
+pub use currently_executed_sql::CurrentlyExecutingSqlTracker;
+
 #[cfg(feature = "postgres")]
 mod postgres_engine;
 
diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
index 4d310711687f2..b14886fedd617 100644
--- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
+++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
@@ -27,6 +27,7 @@ use std::str::FromStr;
 use std::time::Duration;
 
 use super::conversion::*;
+use crate::engines::currently_executed_sql::CurrentlyExecutingSqlTracker;
 use crate::engines::output::{DFColumnType, DFOutput};
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 use indicatif::ProgressBar;
@@ -59,6 +60,7 @@ pub struct Postgres {
     /// Relative test file path
     relative_path: PathBuf,
     pb: ProgressBar,
+    currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
 }
 
 impl Postgres {
@@ -118,9 +120,34 @@ impl Postgres {
             spawned_task: Some(spawned_task),
             relative_path,
             pb,
+            currently_executing_sql_tracker: CurrentlyExecutingSqlTracker::default(),
         })
     }
 
+    /// Creates a runner for executing queries against an existing postgres connection
+    /// with a tracker for currently executing SQL statements.
+    pub async fn connect_with_tracked_sql(
+        relative_path: PathBuf,
+        pb: ProgressBar,
+        currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
+    ) -> Result<Self> {
+        let conn = Self::connect(relative_path, pb).await?;
+        Ok(conn.with_currently_executing_sql_tracker(currently_executing_sql_tracker))
+    }
+
+    /// Add a tracker that will track the currently executed SQL statement.
+    ///
+    /// This is useful for logging and debugging purposes.
+    pub fn with_currently_executing_sql_tracker(
+        self,
+        currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
+    ) -> Self {
+        Self {
+            currently_executing_sql_tracker,
+            ..self
+        }
+    }
+
     fn get_client(&mut self) -> &mut tokio_postgres::Client {
         self.client.as_mut().expect("client is shutdown")
     }
@@ -242,6 +269,8 @@ impl sqllogictest::AsyncDB for Postgres {
             sql
         );
 
+        let tracked_sql = self.currently_executing_sql_tracker.set_sql(sql);
+
         let lower_sql = sql.trim_start().to_ascii_lowercase();
 
         let is_query_sql = {
@@ -258,11 +287,15 @@ impl sqllogictest::AsyncDB for Postgres {
 
         if lower_sql.starts_with("copy") {
             self.pb.inc(1);
-            return self.run_copy_command(sql).await;
+            let result = self.run_copy_command(sql).await;
+            self.currently_executing_sql_tracker.remove_sql(tracked_sql);
+
+            return result;
         }
 
         if !is_query_sql {
             self.get_client().execute(sql, &[]).await?;
+            self.currently_executing_sql_tracker.remove_sql(tracked_sql);
             self.pb.inc(1);
             return Ok(DBOutput::StatementComplete(0));
         }
@@ -292,12 +325,14 @@ impl sqllogictest::AsyncDB for Postgres {
                 .collect()
         };
 
+        self.currently_executing_sql_tracker.remove_sql(tracked_sql);
+
         if rows.is_empty() && types.is_empty() {
             Ok(DBOutput::StatementComplete(0))
         } else {
             Ok(DBOutput::Rows {
                 types: convert_types(types),
-                rows: convert_rows(rows),
+                rows: convert_rows(&rows),
             })
         }
     }
@@ -316,7 +351,7 @@ impl sqllogictest::AsyncDB for Postgres {
     }
 }
 
-fn convert_rows(rows: Vec<Row>) -> Vec<Vec<String>> {
+fn convert_rows(rows: &[Row]) -> Vec<Vec<String>> {
     rows.iter()
         .map(|row| {
             row.columns()
diff --git a/datafusion/sqllogictest/src/filters.rs b/datafusion/sqllogictest/src/filters.rs
index 44482236f7c5b..568fa3f66676e 100644
--- a/datafusion/sqllogictest/src/filters.rs
+++ b/datafusion/sqllogictest/src/filters.rs
@@ -120,10 +120,10 @@ pub fn should_skip_record<D: AsyncDB>(
         if !loc.file().contains(&filter.file_substring) {
             continue;
         }
-        if let Some(line_num) = filter.line_number {
-            if loc.line() != line_num {
-                continue;
-            }
+        if let Some(line_num) = filter.line_number
+            && loc.line() != line_num
+        {
+            continue;
         }
 
         // This filter matches both file name substring and the exact
@@ -142,12 +142,11 @@ fn statement_is_skippable(statement: &Statement) -> bool {
 
     // Cannot skip SELECT INTO statements, as they can also create tables
     // that further test cases will use.
-    if let SqlStatement::Query(v) = sql_stmt.as_ref() {
-        if let SetExpr::Select(v) = v.body.as_ref() {
-            if v.into.is_some() {
-                return false;
-            }
-        }
+    if let SqlStatement::Query(v) = sql_stmt.as_ref()
+        && let SetExpr::Select(v) = v.body.as_ref()
+        && v.into.is_some()
+    {
+        return false;
     }
 
     // Only SELECT and EXPLAIN statements can be skipped, as any other
diff --git a/datafusion/sqllogictest/src/lib.rs b/datafusion/sqllogictest/src/lib.rs
index f3a78607242ce..f228ee89abbe8 100644
--- a/datafusion/sqllogictest/src/lib.rs
+++ b/datafusion/sqllogictest/src/lib.rs
@@ -28,13 +28,14 @@
 
 mod engines;
 
-pub use engines::convert_batches;
-pub use engines::convert_schema_to_types;
+pub use engines::CurrentlyExecutingSqlTracker;
 pub use engines::DFColumnType;
 pub use engines::DFOutput;
 pub use engines::DFSqlLogicTestError;
 pub use engines::DataFusion;
 pub use engines::DataFusionSubstraitRoundTrip;
+pub use engines::convert_batches;
+pub use engines::convert_schema_to_types;
 
 #[cfg(feature = "postgres")]
 pub use engines::Postgres;
diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs
index b499401e5589c..9ec085b41eec0 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -32,12 +32,12 @@ use arrow::record_batch::RecordBatch;
 use datafusion::catalog::{
     CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, Session,
 };
-use datafusion::common::{not_impl_err, DataFusionError, Result};
+use datafusion::common::{DataFusionError, Result, not_impl_err};
 use datafusion::functions::math::abs;
 use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl};
 use datafusion::logical_expr::{
-    create_udf, ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
-    Signature, Volatility,
+    ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
+    Volatility, create_udf,
 };
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::*;
@@ -49,8 +49,8 @@ use datafusion::{
 use crate::is_spark_path;
 use async_trait::async_trait;
 use datafusion::common::cast::as_float64_array;
-use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::SessionStateBuilder;
+use datafusion::execution::runtime_env::RuntimeEnv;
 use log::info;
 use tempfile::TempDir;
 
diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs
index 695fe463fa676..6a3d3944e4e81 100644
--- a/datafusion/sqllogictest/src/util.rs
+++ b/datafusion/sqllogictest/src/util.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::common::{exec_datafusion_err, Result};
+use datafusion::common::{Result, exec_datafusion_err};
 use itertools::Itertools;
 use log::Level::Warn;
 use log::{info, log_enabled, warn};
@@ -82,6 +82,10 @@ pub fn df_value_validator(
     actual: &[Vec<String>],
     expected: &[String],
 ) -> bool {
+    // Support ignore marker <slt:ignore> to skip volatile parts of output.
+    const IGNORE_MARKER: &str = "<slt:ignore>";
+    let contains_ignore_marker = expected.iter().any(|line| line.contains(IGNORE_MARKER));
+
     let normalized_expected = expected.iter().map(normalizer).collect::<Vec<_>>();
     let normalized_actual = actual
         .iter()
@@ -89,13 +93,39 @@ pub fn df_value_validator(
         .map(|str| str.trim_end().to_string())
         .collect_vec();
 
+    // If ignore marker present, perform fragment-based matching on the full snapshot.
+    if contains_ignore_marker {
+        let expected_snapshot = normalized_expected.join("\n");
+        let actual_snapshot = normalized_actual.join("\n");
+        let fragments: Vec<&str> = expected_snapshot.split(IGNORE_MARKER).collect();
+        let mut pos = 0;
+        for (i, frag) in fragments.iter().enumerate() {
+            if frag.is_empty() {
+                continue;
+            }
+            if let Some(idx) = actual_snapshot[pos..].find(frag) {
+                // Edge case: The following example is expected to fail
+                // Actual - 'foo bar baz'
+                // Expected - 'bar <slt:ignore>'
+                if (i == 0) && (idx != 0) {
+                    return false;
+                }
+
+                pos += idx + frag.len();
+            } else {
+                return false;
+            }
+        }
+        return true;
+    }
+
     if log_enabled!(Warn) && normalized_actual != normalized_expected {
         warn!("df validation failed. actual vs expected:");
         for i in 0..normalized_actual.len() {
             warn!("[{i}] {}<eol>", normalized_actual[i]);
             warn!(
                 "[{i}] {}<eol>",
-                if normalized_expected.len() >= i {
+                if normalized_expected.len() > i {
                     &normalized_expected[i]
                 } else {
                     "No more results"
@@ -110,3 +140,20 @@ pub fn df_value_validator(
 pub fn is_spark_path(relative_path: &Path) -> bool {
     relative_path.starts_with("spark/")
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Validation should fail for the below case:
+    // Actual - 'foo bar baz'
+    // Expected - 'bar <slt:ignore>'
+    #[test]
+    fn ignore_marker_does_not_skip_leading_text() {
+        // Actual snapshot contains unexpected prefix before the expected fragment.
+        let actual = vec![vec!["foo bar baz".to_string()]];
+        let expected = vec!["bar <slt:ignore>".to_string()];
+
+        assert!(!df_value_validator(value_normalizer, &actual, &expected));
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
index 9aeaaacb10718..2b33452184bc0 100644
--- a/datafusion/sqllogictest/test_files/agg_func_substitute.slt
+++ b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
@@ -46,11 +46,10 @@ physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
 02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 
 query TT
@@ -66,11 +65,10 @@ physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
 02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT a, ARRAY_AGG(c ORDER BY c)[1 + 100] as result
@@ -85,11 +83,10 @@ physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
 02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query II
 SELECT a, ARRAY_AGG(c ORDER BY c)[1] as result
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index a5973afc0a93d..2a4daeb92979d 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -129,6 +129,16 @@ CREATE TABLE group_median_table_nullable (
 # Error tests
 #######
 
+statement error DataFusion error: Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions
+SELECT SUM(c2) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+
+# WITHIN GROUP rejected for non-ordered-set UDAF
+# MIN does not implement ordered-set semantics (`supports_within_group_clause()`),
+# so the planner should reject the WITHIN GROUP syntax.
+statement error DataFusion error: Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions
+SELECT MIN(c) WITHIN GROUP (ORDER BY c) FROM (VALUES (1),(2)) as t(c);
+
+
 # https://github.com/apache/datafusion/issues/3353
 statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name "approx_distinct\(aggregate_test_100\.c9\)"
 SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100
@@ -424,20 +434,19 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[array_length(array_agg(DISTINCT a.foo)@1) as array_length(array_agg(DISTINCT a.foo)), sum(DISTINCT Int64(1))@2 as sum(DISTINCT Int64(1))]
 02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))], ordering_mode=Sorted
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
-05)--------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))], ordering_mode=Sorted
-06)----------UnionExec
-07)------------ProjectionExec: expr=[1 as id, 2 as foo]
-08)--------------PlaceholderRowExec
-09)------------ProjectionExec: expr=[1 as id, NULL as foo]
-10)--------------PlaceholderRowExec
-11)------------ProjectionExec: expr=[1 as id, NULL as foo]
-12)--------------PlaceholderRowExec
-13)------------ProjectionExec: expr=[1 as id, 3 as foo]
-14)--------------PlaceholderRowExec
-15)------------ProjectionExec: expr=[1 as id, 2 as foo]
-16)--------------PlaceholderRowExec
+03)----RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
+04)------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))], ordering_mode=Sorted
+05)--------UnionExec
+06)----------ProjectionExec: expr=[1 as id, 2 as foo]
+07)------------PlaceholderRowExec
+08)----------ProjectionExec: expr=[1 as id, NULL as foo]
+09)------------PlaceholderRowExec
+10)----------ProjectionExec: expr=[1 as id, NULL as foo]
+11)------------PlaceholderRowExec
+12)----------ProjectionExec: expr=[1 as id, 3 as foo]
+13)------------PlaceholderRowExec
+14)----------ProjectionExec: expr=[1 as id, 2 as foo]
+15)------------PlaceholderRowExec
 
 
 # FIX: custom absolute values
@@ -491,6 +500,12 @@ SELECT bit_xor(distinct c5 % 2) FROM aggregate_test_100
 ----
 -2
 
+# edge case for null accumulator state fields
+query ???I
+SELECT bit_and(NULL), bit_or(NULL), bit_xor(NULL), approx_distinct(NULL) from aggregate_test_100
+----
+NULL NULL NULL 0
+
 # csv_query_covariance_1
 query R
 SELECT covar_pop(c2, c12) FROM aggregate_test_100
@@ -591,6 +606,70 @@ from data
 ----
 1
 
+# group correlation_query_with_nans_f32
+query IR
+select id, corr(f, b)
+from values
+    (1, 1, 'nan'::float),
+    (2, 'nan'::float, 1),
+    (3, 'nan'::float, null),
+    (4, null, 'nan'::float),
+    (5, 'nan'::float, 'nan'::float),
+    (5, 1, 1),
+    (5, 2, 2),
+    (6, 'nan'::float, 'nan'::float) t(id, f, b)
+group by id
+order by id
+----
+1 NULL
+2 NULL
+3 NULL
+4 NULL
+5 NaN
+6 NaN
+
+# correlation_query_with_nans_f32
+query RR
+with data as (
+    select 'nan'::float as f, 'nan'::float as b
+)
+select corr(f, b), corr('nan'::float, 'nan'::float)
+from data
+----
+NaN NaN
+
+# group correlation_query_with_nans_f64
+query IR
+select id, corr(f, b)
+from values
+    (1, 1, 'nan'::double),
+    (2, 'nan'::double, 1),
+    (3, 'nan'::double, null),
+    (4, null, 'nan'::float),
+    (5, 'nan'::double, 'nan'::double),
+    (5, 1, 1),
+    (5, 2, 2),
+    (6, 'nan'::double, 'nan'::double) t(id, f, b)
+group by id
+order by id
+----
+1 NULL
+2 NULL
+3 NULL
+4 NULL
+5 NaN
+6 NaN
+
+# correlation_query_with_nans_f64
+query RR
+with data as (
+    select 'nan'::double as f, 'nan'::double as b
+)
+select corr(f, b), corr('nan'::double, 'nan'::double)
+from data
+----
+NaN NaN
+
 # csv_query_variance_1
 query R
 SELECT var_pop(c2) FROM aggregate_test_100
@@ -838,6 +917,13 @@ SELECT approx_median(distinct col_i8) FROM median_table
 statement error DataFusion error: This feature is not implemented: APPROX_MEDIAN\(DISTINCT\) aggregations are not available
 SELECT approx_median(col_i8), approx_median(distinct col_i8) FROM median_table
 
+# null handling clauses not supported
+query error DataFusion error: Error during planning: \[IGNORE \| RESPECT\] NULLS are not permitted for median
+SELECT median(c2) IGNORE NULLS FROM aggregate_test_100
+
+query error DataFusion error: Error during planning: \[IGNORE \| RESPECT\] NULLS are not permitted for median
+SELECT median(c2) RESPECT NULLS FROM aggregate_test_100
+
 # median_i16
 query I
 SELECT median(col_i16) FROM median_table
@@ -904,6 +990,155 @@ SELECT approx_median(col_f64_nan) FROM median_table
 ----
 NaN
 
+
+# median_i8_overflow_negative
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(-85, 'Int8')), (arrow_cast(-56, 'Int8'))) AS t(v);
+----
+-70
+
+# median_i8_overflow_positive
+# Test overflow with positive values: 100 + 120 = 220 > 127 (max i8)
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(100, 'Int8')), (arrow_cast(120, 'Int8'))) AS t(v);
+----
+110
+
+# median_u8_overflow
+# Test unsigned overflow: 200 + 250 = 450 > 255 (max u8)
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(200, 'UInt8')), (arrow_cast(250, 'UInt8'))) AS t(v);
+----
+225
+
+# median_i8_no_overflow_normal_case
+# Normal case that doesn't overflow for comparison
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(4, 'Int8')), (arrow_cast(5, 'Int8'))) AS t(v);
+----
+4
+
+# median_i8_max_values
+# Test with both i8::MAX values: 127 + 127 = 254 > 127, overflow
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(127, 'Int8')), (arrow_cast(127, 'Int8'))) AS t(v);
+----
+127
+
+# median_i8_min_values
+# Test with both i8::MIN values: -128 + -128 = -256 < -128, underflow
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(-128, 'Int8')), (arrow_cast(-128, 'Int8'))) AS t(v);
+----
+-128
+
+# median_i8_min_max_values
+# Test with i8::MIN and i8::MAX: -128 + 127 = -1, no overflow, median = 0 (truncated from -0.5)
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(-128, 'Int8')), (arrow_cast(127, 'Int8'))) AS t(v);
+----
+0
+
+# median_u8_max_values
+# Test with both u8::MAX values: 255 + 255 = 510 > 255, overflow
+query I
+SELECT median(v) FROM (VALUES (arrow_cast(255, 'UInt8')), (arrow_cast(255, 'UInt8'))) AS t(v);
+----
+255
+
+# median_sliding_window
+statement ok
+CREATE TABLE median_window_test (
+    timestamp INT,
+    tags VARCHAR,
+    value DOUBLE
+);
+
+statement ok
+INSERT INTO median_window_test (timestamp, tags, value) VALUES
+(1, 'tag1', 10.0),
+(2, 'tag1', 20.0),
+(3, 'tag1', 30.0),
+(4, 'tag1', 40.0),
+(5, 'tag1', 50.0),
+(1, 'tag2', 60.0),
+(2, 'tag2', 70.0),
+(3, 'tag2', 80.0),
+(4, 'tag2', 90.0),
+(5, 'tag2', 100.0);
+
+query ITRR
+SELECT
+    timestamp,
+    tags,
+    value,
+    median(value) OVER (
+        PARTITION BY tags
+        ORDER BY timestamp
+        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
+    ) AS value_median_3
+FROM median_window_test
+ORDER BY tags, timestamp;
+----
+1 tag1 10 15
+2 tag1 20 20
+3 tag1 30 30
+4 tag1 40 40
+5 tag1 50 45
+1 tag2 60 65
+2 tag2 70 70
+3 tag2 80 80
+4 tag2 90 90
+5 tag2 100 95
+
+# median_non_sliding_window
+query ITRRRR
+SELECT
+    timestamp,
+    tags,
+    value,
+    median(value) OVER (
+        PARTITION BY tags
+        ORDER BY timestamp
+        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    ) AS value_median_unbounded_preceding,
+    median(value) OVER (
+        PARTITION BY tags
+        ORDER BY timestamp
+        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+    ) AS value_median_unbounded_both,
+    median(value) OVER (
+        PARTITION BY tags
+        ORDER BY timestamp
+        ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+    ) AS value_median_unbounded_following
+FROM median_window_test
+ORDER BY tags, timestamp;
+----
+1 tag1 10 10 30 30
+2 tag1 20 15 30 35
+3 tag1 30 20 30 40
+4 tag1 40 25 30 45
+5 tag1 50 30 30 50
+1 tag2 60 60 80 80
+2 tag2 70 65 80 85
+3 tag2 80 70 80 90
+4 tag2 90 75 80 95
+5 tag2 100 80 80 100
+
+statement ok
+DROP TABLE median_window_test;
+
+query RT
+select approx_median(arrow_cast(col_f32, 'Float16')), arrow_typeof(approx_median(arrow_cast(col_f32, 'Float16'))) from median_table;
+----
+2.75 Float16
+
+query ?T
+select approx_median(NULL), arrow_typeof(approx_median(NULL)) from median_table;
+----
+NULL Null
+
 # median decimal
 statement ok
 create table t(c decimal(10, 4)) as values (0.0001), (0.0002), (0.0003), (0.0004), (0.0005), (0.0006);
@@ -1179,11 +1414,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[median(alias1)]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=4
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------AggregateExec: mode=Partial, gby=[c@0 as alias1], aggr=[]
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[c@0 as alias1], aggr=[]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table t;
@@ -3402,6 +3635,59 @@ SELECT percentile_cont(1.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
 ----
 5
 
+# Ensure percentile_cont simplification rewrites to min/max plans
+query TT
+EXPLAIN SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]]
+02)--TableScan: aggregate_test_100 projection=[c2]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
+
+query TT
+EXPLAIN SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2 DESC) FROM aggregate_test_100;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]]
+02)--TableScan: aggregate_test_100 projection=[c2]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
+
+query TT
+EXPLAIN SELECT percentile_cont(c2, 0.0) FROM aggregate_test_100;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(0))]]
+02)--TableScan: aggregate_test_100 projection=[c2]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
+
+query TT
+EXPLAIN SELECT percentile_cont(c2, 1.0) FROM aggregate_test_100;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(1))]]
+02)--TableScan: aggregate_test_100 projection=[c2]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
+
 query R
 SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
 ----
@@ -4537,6 +4823,16 @@ SELECT max(column1), max(column2), max(column3), max(column4) FROM d;
 ----
 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs
 
+query ????
+SELECT avg(column1), avg(column2), avg(column3), avg(column4) FROM d;
+----
+0 days 0 hours 0 mins 6 secs 0 days 0 hours 0 mins 0.012 secs 0 days 0 hours 0 mins 0.000018 secs 0 days 0 hours 0 mins 0.000000024 secs
+
+query ????
+SELECT sum(column1), sum(column2), sum(column3), sum(column4) FROM d;
+----
+0 days 0 hours 0 mins 12 secs 0 days 0 hours 0 mins 0.024 secs 0 days 0 hours 0 mins 0.000036 secs 0 days 0 hours 0 mins 0.000000048 secs
+
 # GROUP BY follows a different code path
 query ????I
 SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5;
@@ -4548,6 +4844,16 @@ SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GR
 ----
 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1
 
+query ????I
+SELECT avg(column1), avg(column2), avg(column3), avg(column4), column5 FROM d GROUP BY column5;
+----
+0 days 0 hours 0 mins 6 secs 0 days 0 hours 0 mins 0.012 secs 0 days 0 hours 0 mins 0.000018 secs 0 days 0 hours 0 mins 0.000000024 secs 1
+
+query ????I
+SELECT sum(column1), sum(column2), sum(column3), sum(column4), column5 FROM d GROUP BY column5;
+----
+0 days 0 hours 0 mins 12 secs 0 days 0 hours 0 mins 0.024 secs 0 days 0 hours 0 mins 0.000036 secs 0 days 0 hours 0 mins 0.000000048 secs 1
+
 statement ok
 INSERT INTO d VALUES
   (arrow_cast(3, 'Duration(Second)'), arrow_cast(1, 'Duration(Millisecond)'), arrow_cast(7, 'Duration(Microsecond)'), arrow_cast(2, 'Duration(Nanosecond)'), 1),
@@ -4563,6 +4869,16 @@ SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GR
 ----
 0 days 0 hours 0 mins 0 secs 0 days 0 hours 0 mins 0.001 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000002 secs 1
 
+query ????I
+SELECT avg(column1), avg(column2), avg(column3), avg(column4), column5 FROM d GROUP BY column5 ORDER BY column5;
+----
+0 days 0 hours 0 mins 3 secs 0 days 0 hours 0 mins 0.008 secs 0 days 0 hours 0 mins 0.000012 secs 0 days 0 hours 0 mins 0.000000014 secs 1
+
+query ????I
+SELECT sum(column1), sum(column2), sum(column3), sum(column4), column5 FROM d GROUP BY column5 ORDER BY column5;
+----
+0 days 0 hours 0 mins 15 secs 0 days 0 hours 0 mins 0.034 secs 0 days 0 hours 0 mins 0.000048 secs 0 days 0 hours 0 mins 0.000000058 secs 1
+
 statement ok
 drop table d;
 
@@ -5933,16 +6249,12 @@ logical_plan
 04)------TableScan: aggregate_test_100 projection=[c1, c3]
 physical_plan
 01)CoalescePartitionsExec: fetch=5
-02)--AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([c3@0, min(aggregate_test_100.c1)@1], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[min(aggregate_test_100.c1)]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[min(aggregate_test_100.c1)]
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c1, c3], file_type=csv, has_header=true
+02)--AggregateExec: mode=SinglePartitioned, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
+03)----AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[min(aggregate_test_100.c1)]
+04)------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[min(aggregate_test_100.c1)]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c1, c3], file_type=csv, has_header=true
 
 
 #
@@ -6023,10 +6335,9 @@ physical_plan
 07)------------AggregateExec: mode=Final, gby=[c2@0 as c2, c3@1 as c3], aggr=[]
 08)--------------CoalescePartitionsExec
 09)----------------AggregateExec: mode=Partial, gby=[c2@0 as c2, c3@1 as c3], aggr=[]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------FilterExec: c3@1 >= 10 AND c3@1 <= 20
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2, c3], file_type=csv, has_header=true
+10)------------------FilterExec: c3@1 >= 10 AND c3@1 <= 20
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2, c3], file_type=csv, has_header=true
 
 query I
 SELECT DISTINCT c3 FROM aggregate_test_100 WHERE c3 between 10 and 20 group by c3 order by c3 limit 4;
@@ -7085,7 +7396,7 @@ physical_plan
 01)AggregateExec: mode=Final, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]
 02)--CoalescePartitionsExec
 03)----AggregateExec: mode=Partial, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 ASC NULLS LAST]]
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c3], output_orderings=[[c1@0 ASC NULLS LAST], [c3@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 # test last to first
@@ -7099,7 +7410,7 @@ physical_plan
 01)AggregateExec: mode=Final, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
 03)----AggregateExec: mode=Partial, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 DESC NULLS FIRST]]
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c2], output_orderings=[[c1@0 ASC NULLS LAST], [c2@1 DESC]], file_type=csv, has_header=true
 
 # test building plan with aggreagte sum
@@ -7134,11 +7445,16 @@ statement ok
 drop table employee_csv;
 
 # test null literal handling in supported aggregate functions
-query I??III?T
+query I??????T
 select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ',');
 ----
 0 NULL NULL NULL NULL NULL NULL NULL
 
+query TTT
+SELECT arrow_typeof(bit_and(NULL)), arrow_typeof(bit_or(NULL)), arrow_typeof(bit_xor(NULL))
+----
+Null Null Null
+
 statement ok
 create table having_test(v1 int, v2 int)
 
@@ -7166,14 +7482,11 @@ logical_plan
 03)----Aggregate: groupBy=[[having_test.v1, having_test.v2]], aggr=[[max(having_test.v1)]]
 04)------TableScan: having_test projection=[v1, v2]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: max(having_test.v1)@2 = 3, projection=[v1@0, v2@1]
-03)----AggregateExec: mode=FinalPartitioned, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: max(having_test.v1)@2 = 3, projection=[v1@0, v2@1]
+02)--AggregateExec: mode=FinalPartitioned, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
+03)----RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query error
@@ -7318,20 +7631,19 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST]@1 as last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))@2 as sum(DISTINCT Int64(1))]
 02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))], ordering_mode=Sorted
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
-05)--------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))], ordering_mode=Sorted
-06)----------UnionExec
-07)------------ProjectionExec: expr=[1 as id, 2 as foo]
-08)--------------PlaceholderRowExec
-09)------------ProjectionExec: expr=[1 as id, 4 as foo]
-10)--------------PlaceholderRowExec
-11)------------ProjectionExec: expr=[1 as id, 5 as foo]
-12)--------------PlaceholderRowExec
-13)------------ProjectionExec: expr=[1 as id, 3 as foo]
-14)--------------PlaceholderRowExec
-15)------------ProjectionExec: expr=[1 as id, 2 as foo]
-16)--------------PlaceholderRowExec
+03)----RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
+04)------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))], ordering_mode=Sorted
+05)--------UnionExec
+06)----------ProjectionExec: expr=[1 as id, 2 as foo]
+07)------------PlaceholderRowExec
+08)----------ProjectionExec: expr=[1 as id, 4 as foo]
+09)------------PlaceholderRowExec
+10)----------ProjectionExec: expr=[1 as id, 5 as foo]
+11)------------PlaceholderRowExec
+12)----------ProjectionExec: expr=[1 as id, 3 as foo]
+13)------------PlaceholderRowExec
+14)----------ProjectionExec: expr=[1 as id, 2 as foo]
+15)------------PlaceholderRowExec
 
 # SortExec is removed if it is coming after one-row producing AggregateExec's having an empty group by expression
 query TT
@@ -7851,17 +8163,15 @@ VALUES
 ----
 x 1
 
-query ?
+query error Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions
 SELECT array_agg(a_varchar) WITHIN GROUP (ORDER BY a_varchar)
 FROM (VALUES ('a'), ('d'), ('c'), ('a')) t(a_varchar);
-----
-[a, a, c, d]
 
-query ?
+
+query error Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions
 SELECT array_agg(DISTINCT a_varchar) WITHIN GROUP (ORDER BY a_varchar)
 FROM (VALUES ('a'), ('d'), ('c'), ('a')) t(a_varchar);
-----
-[a, c, d]
+
 
 query error Error during planning: ORDER BY and WITHIN GROUP clauses cannot be used together in the same aggregate function
 SELECT array_agg(a_varchar order by a_varchar) WITHIN GROUP (ORDER BY a_varchar)
diff --git a/datafusion/sqllogictest/test_files/aggregate_repartition.slt b/datafusion/sqllogictest/test_files/aggregate_repartition.slt
index 27602b61e4244..eeece7862341b 100644
--- a/datafusion/sqllogictest/test_files/aggregate_repartition.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_repartition.slt
@@ -77,16 +77,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[env@0 as env], aggr=[count(Int64(1))]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.csv]]}, projection=[env], file_type=csv, has_header=true
+03)----RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=4
+04)------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.csv]]}, projection=[env], file_type=csv, has_header=true
 
 # Test 2: EXPLAIN query for Parquet table with GROUP BY
-# This plan differs from the one above and includes two consecutive repartitions — one round-robin and one hash —
-# which seems unnecessary. We may want to align it with the previous plan (push the round robin down or remove the round robin), or, if the input file is small,
-# avoid repartitioning altogether. A single partition should suffice for a single-step aggregate as the plan after this.
 
 query TT
 EXPLAIN SELECT env, count(*) FROM dim_parquet GROUP BY env;
@@ -98,11 +94,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[env@0 as env], aggr=[count(Int64(1))]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=4
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.parquet]]}, projection=[env], file_type=parquet
+03)----RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.parquet]]}, projection=[env], file_type=parquet
 
 # Verify the queries actually work and return the same results
 query TI rowsort
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
index 5dcb72b7055b8..0885a6a7d663e 100644
--- a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -175,18 +175,17 @@ GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
 -2117946883 d 1 0 0 0
 -2098805236 c 1 0 0 0
 
-# FIXME: add bool_and(v3) column when issue fixed
-# ISSUE https://github.com/apache/datafusion/issues/11846
-query TBBB rowsort
-select v1, bool_or(v2), bool_and(v2), bool_or(v3)
+# Regression test for https://github.com/apache/datafusion/issues/11846
+query TBBBB rowsort
+select v1, bool_or(v2), bool_and(v2), bool_or(v3), bool_and(v3)
 from aggregate_test_100_bool
 group by v1
 ----
-a true false true
-b true false true
-c true false false
-d true false false
-e true false NULL
+a true false true true
+b true false true true
+c true false false false
+d true false false false
+e true false NULL NULL
 
 query TBBB rowsort
 select v1,
diff --git a/datafusion/sqllogictest/test_files/aggregates_topk.slt b/datafusion/sqllogictest/test_files/aggregates_topk.slt
index cc1693843848a..58abecfacfa8d 100644
--- a/datafusion/sqllogictest/test_files/aggregates_topk.slt
+++ b/datafusion/sqllogictest/test_files/aggregates_topk.slt
@@ -46,11 +46,9 @@ physical_plan
 01)SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TI
 select * from (select trace_id, MAX(timestamp) max_ts from traces t group by trace_id) where trace_id != 'b' order by max_ts desc limit 3;
@@ -110,11 +108,9 @@ physical_plan
 01)SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) desc limit 4;
@@ -127,11 +123,9 @@ physical_plan
 01)SortPreservingMergeExec: [min(traces.timestamp)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[min(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) asc limit 4;
@@ -144,11 +138,9 @@ physical_plan
 01)SortPreservingMergeExec: [max(traces.timestamp)@1 ASC NULLS LAST], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by trace_id asc limit 4;
@@ -161,11 +153,9 @@ physical_plan
 01)SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TI
 select trace_id, max(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
@@ -235,11 +225,9 @@ physical_plan
 01)SortPreservingMergeExec: [max(traces_utf8view.timestamp)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(traces_utf8view.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 # Also add LargeUtf8 to test PR https://github.com/apache/datafusion/pull/15152
@@ -263,11 +251,9 @@ physical_plan
 01)SortPreservingMergeExec: [max(traces_largeutf8.timestamp)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(traces_largeutf8.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 7aa267a4dc6d7..35e25a15bebba 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -710,13 +710,13 @@ select
 query TTT
 select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays;
 ----
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
-List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
+List(List(Int64)) List(Float64) List(Utf8)
 
 # arrays table
 query ???
@@ -1182,7 +1182,7 @@ select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))
 query T
 select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')));
 ----
-List(nullable LargeList(nullable Int64))
+List(LargeList(Int64))
 
 
 query ???
@@ -1978,11 +1978,11 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0,
 ----
 [1, 2, 3, 4, 5] [h, e, l, l, o]
 
-# TODO: Enable once arrow_cast supports ListView types.
+# TODO make error message nicer: https://github.com/apache/datafusion/issues/19004
 # Expected output (once supported):
 # ----
 # [1, 2, 3, 4, 5] [h, e, l, l, o]
-query error DataFusion error: Execution error: Unsupported type 'ListView\(Int64\)'. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'. Error unknown token: ListView
+query error Failed to coerce arguments to satisfy a call to 'array_slice' function:
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6),
        array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5);
 
@@ -2025,14 +2025,15 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2,
 ----
 [2, 3, 4, 5] [l, l, o]
 
-# TODO: Enable once arrow_cast supports LargeListView types.
+# TODO: Enable once array_slice supports LargeListView types.
 # Expected output (once supported):
 # ----
 # [2, 3, 4, 5] [l, l, o]
-query error DataFusion error: Execution error: Unsupported type 'LargeListView\(Int64\)'. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'. Error unknown token: LargeListView
+query error Failed to coerce arguments to satisfy a call to 'array_slice' function:
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6),
        array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7);
 
+
 # array_slice scalar function #6 (with positive indexes; nested array)
 query ?
 select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1);
@@ -3321,7 +3322,7 @@ select
     array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]),
     arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]));
 ----
-[1, 2, 3] List(nullable Utf8View)
+[1, 2, 3] List(Utf8View)
 
 # array_concat error
 query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with "Error during planning: array_concat does not support type Int64"
@@ -4614,7 +4615,7 @@ NULL [baz] baz
 query T
 SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'));
 ----
-List(nullable Utf8View)
+List(Utf8View)
 
 # expect a,b,c,d. make_array forces all types to be of a common type (see above)
 query T
@@ -6436,10 +6437,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 query I
 with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -6465,10 +6465,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 query I
 with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -6494,10 +6493,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 query I
 with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -6523,10 +6521,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 query I
 with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -6552,10 +6549,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c])
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 query I
 with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -6583,10 +6579,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+06)----------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 # any operator
 query ?
@@ -7714,8 +7709,8 @@ CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/
 query T
 select arrow_typeof(f0) from fixed_size_list_array;
 ----
-FixedSizeList(2 x nullable Int64)
-FixedSizeList(2 x nullable Int64)
+FixedSizeList(2 x Int64)
+FixedSizeList(2 x Int64)
 
 query ?
 select * from fixed_size_list_array;
@@ -7744,8 +7739,8 @@ select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array
 ----
-List(nullable List(nullable Int64))
-List(nullable List(nullable Int64))
+List(List(Int64))
+List(List(Int64))
 
 query ?
 select make_array(f0) from fixed_size_list_array
@@ -7756,8 +7751,8 @@ select make_array(f0) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(f0)) from fixed_size_list_array
 ----
-List(nullable FixedSizeList(2 x nullable Int64))
-List(nullable FixedSizeList(2 x nullable Int64))
+List(FixedSizeList(2 x Int64))
+List(FixedSizeList(2 x Int64))
 
 query ?
 select array_concat(column1, [7]) from arrays_values_v2;
@@ -7804,7 +7799,7 @@ select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)
        arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))),
        arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')));
 ----
-[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(nullable Int64) LargeList(nullable FixedSizeList(1 x nullable Float64))
+[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(Int64) LargeList(FixedSizeList(1 x Float64))
 
 # flatten with column values
 query ????
@@ -8344,19 +8339,19 @@ select * from test_create_array_table;
 query T
 select arrow_typeof(a) from test_create_array_table;
 ----
-List(nullable Int32)
+List(Int32)
 
 query T
 select arrow_typeof(c) from test_create_array_table;
 ----
-List(nullable List(nullable Int32))
+List(List(Int32))
 
 # Test casting to array types
 # issue: https://github.com/apache/datafusion/issues/9440
 query ??T
 select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
 ----
-[1, 2, 3] [[1]] List(nullable Utf8View)
+[1, 2, 3] [[1]] List(Utf8View)
 
 # test empty arrays return length
 # issue: https://github.com/apache/datafusion/pull/12459
@@ -8376,8 +8371,8 @@ create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]);
 query T
 select arrow_typeof(a) from fixed_size_col_table;
 ----
-FixedSizeList(3 x nullable Int32)
-FixedSizeList(3 x nullable Int32)
+FixedSizeList(3 x Int32)
+FixedSizeList(3 x Int32)
 
 query ? rowsort
 SELECT DISTINCT a FROM fixed_size_col_table
@@ -8413,7 +8408,6 @@ select array_contains(a, b) from array_has order by 1 nulls last;
 true
 NULL
 
-# TODO: Enable once arrow_cast supports ListView types.
 # Expected output (once supported):
 # ----
 # [5, 4, 3, 2, 1]
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt b/datafusion/sqllogictest/test_files/arrow_files.slt
index b3975e0c3f471..c3bc967bafb9e 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -128,3 +128,263 @@ physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/
 # Errors in partition filters should be reported
 query error Divide by zero error
 SELECT f0 FROM arrow_partitioned WHERE CASE WHEN true THEN 1 / 0 ELSE part END = 1;
+
+#############
+## Arrow IPC stream format support
+#############
+
+# Test CREATE EXTERNAL TABLE with stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream.arrow';
+
+# physical plan for stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream
+----
+logical_plan TableScan: arrow_stream projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# stream format should return same data as file format
+query ITB
+SELECT * FROM arrow_stream
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+# Verify both file and stream formats return identical results
+query ITB
+SELECT * FROM arrow_simple ORDER BY f0
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+query ITB
+SELECT * FROM arrow_stream ORDER BY f0
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+# Both formats should support projection pushdown
+query IT
+SELECT f0, f1 FROM arrow_simple ORDER BY f0
+----
+1 foo
+2 bar
+3 baz
+4 NULL
+
+query IT
+SELECT f0, f1 FROM arrow_stream ORDER BY f0
+----
+1 foo
+2 bar
+3 baz
+4 NULL
+
+# Both formats should support filtering
+query ITB
+SELECT * FROM arrow_simple WHERE f0 > 2 ORDER BY f0
+----
+3 baz false
+4 NULL true
+
+query ITB
+SELECT * FROM arrow_stream WHERE f0 > 2 ORDER BY f0
+----
+3 baz false
+4 NULL true
+
+# Test aggregations on stream format
+query I
+SELECT COUNT(*) FROM arrow_stream
+----
+4
+
+query I
+SELECT SUM(f0) FROM arrow_stream
+----
+10
+
+query I
+SELECT MAX(f0) FROM arrow_stream
+----
+4
+
+query I
+SELECT MIN(f0) FROM arrow_stream WHERE f0 IS NOT NULL
+----
+1
+
+# Test aggregations on file format for comparison
+query I
+SELECT COUNT(*) FROM arrow_simple
+----
+4
+
+query I
+SELECT SUM(f0) FROM arrow_simple
+----
+10
+
+# Test joins between file and stream formats
+query ITBITB
+SELECT a.f0, a.f1, a.f2, b.f0, b.f1, b.f2
+FROM arrow_simple a
+JOIN arrow_stream b ON a.f0 = b.f0
+WHERE a.f0 <= 2
+ORDER BY a.f0
+----
+1 foo true 1 foo true
+2 bar NULL 2 bar NULL
+
+# Test that both formats work in UNION
+query ITB
+SELECT * FROM arrow_simple WHERE f0 = 1
+UNION ALL
+SELECT * FROM arrow_stream WHERE f0 = 2
+ORDER BY f0
+----
+1 foo true
+2 bar NULL
+
+# Test GROUP BY on stream format
+query BI
+SELECT f2, COUNT(*) as cnt FROM arrow_stream GROUP BY f2 ORDER BY f2
+----
+false 1
+true 2
+NULL 1
+
+# Test DISTINCT on stream format
+query B
+SELECT DISTINCT f2 FROM arrow_stream ORDER BY f2
+----
+false
+true
+NULL
+
+# Test subquery with stream format
+query I
+SELECT f0 FROM arrow_simple WHERE f0 IN (SELECT f0 FROM arrow_stream WHERE f0 < 3) ORDER BY f0
+----
+1
+2
+
+# ARROW partitioned table (stream format)
+statement ok
+CREATE EXTERNAL TABLE arrow_partitioned_stream (
+    part Int,
+    f0 Bigint,
+    f1 String,
+    f2 Boolean
+)
+STORED AS ARROW
+LOCATION '../core/tests/data/partitioned_table_arrow_stream/'
+PARTITIONED BY (part);
+
+# select wildcard
+query ITBI
+SELECT * FROM arrow_partitioned_stream ORDER BY f0;
+----
+1 foo true 123
+2 bar false 123
+3 baz true 456
+4 NULL NULL 456
+
+# select all fields
+query IITB
+SELECT part, f0, f1, f2 FROM arrow_partitioned_stream ORDER BY f0;
+----
+123 1 foo true
+123 2 bar false
+456 3 baz true
+456 4 NULL NULL
+
+# select without partition column
+query IB
+SELECT f0, f2 FROM arrow_partitioned_stream ORDER BY f0
+----
+1 true
+2 false
+3 true
+4 NULL
+
+# select only partition column
+query I
+SELECT part FROM arrow_partitioned_stream ORDER BY part
+----
+123
+123
+456
+456
+
+# select without any table-related columns in projection
+query I
+SELECT 1 FROM arrow_partitioned_stream
+----
+1
+1
+1
+1
+
+# select with partition filter
+query I
+SELECT f0 FROM arrow_partitioned_stream WHERE part = 123 ORDER BY f0
+----
+1
+2
+
+# select with partition filter should scan only one directory
+query TT
+EXPLAIN SELECT f0 FROM arrow_partitioned_stream WHERE part = 456
+----
+logical_plan TableScan: arrow_partitioned_stream projection=[f0], full_filters=[arrow_partitioned_stream.part = Int32(456)]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow]]}, projection=[f0], file_type=arrow_stream
+
+
+# Errors in partition filters should be reported
+query error Divide by zero error
+SELECT f0 FROM arrow_partitioned_stream WHERE CASE WHEN true THEN 1 / 0 ELSE part END = 1;
+
+# Test CREATE EXTERNAL TABLE with empty stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream_empty
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream_empty.arrow'; 
+
+# physical plan for empty stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream_empty
+----
+logical_plan TableScan: arrow_stream_empty projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# stream format should return same data as file format
+query ITB
+SELECT * FROM arrow_stream_empty
+----
+
+# Test CREATE EXTERNAL TABLE with corrupted stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream_corrupted_metadata_length
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow'; 
+
+# physical plan for corrupted stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream_corrupted_metadata_length
+----
+logical_plan TableScan: arrow_stream_corrupted_metadata_length projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# querying corrupted stream format should result in error
+query error DataFusion error: Arrow error: Parser error: Unsupported message header type in IPC stream: 'NONE'
+SELECT * FROM arrow_stream_corrupted_metadata_length
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 5ba62be6873c3..c213f2abf7190 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -357,12 +357,12 @@ select arrow_cast(make_array(1, 2, 3), 'List(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'List(Int64)'));
 ----
-List(nullable Int64)
+List(Int64)
 
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'), 'List(List(Int64))'));
 ----
-List(nullable List(nullable Int64))
+List(List(Int64))
 
 ## LargeList
 
@@ -380,12 +380,12 @@ select arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
 ----
-LargeList(nullable Int64)
+LargeList(Int64)
 
 query T
 select arrow_typeof(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'));
 ----
-LargeList(nullable LargeList(nullable Int64))
+LargeList(LargeList(Int64))
 
 ## FixedSizeList
 
@@ -417,7 +417,7 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)');
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)'));
 ----
-FixedSizeList(3 x nullable Int64)
+FixedSizeList(3 x Int64)
 
 query ?
 select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
diff --git a/datafusion/sqllogictest/test_files/async_udf.slt b/datafusion/sqllogictest/test_files/async_udf.slt
index c61d02cfecfd4..31ca87c4354a4 100644
--- a/datafusion/sqllogictest/test_files/async_udf.slt
+++ b/datafusion/sqllogictest/test_files/async_udf.slt
@@ -58,14 +58,13 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[min(async_abs(data.x))@1 as min(async_abs(data.x))]
 02)--AggregateExec: mode=FinalPartitioned, gby=[async_abs(data.x)@0 as async_abs(data.x)], aggr=[min(async_abs(data.x))]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([async_abs(data.x)@0], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[__common_expr_1@0 as async_abs(data.x)], aggr=[min(async_abs(data.x))]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------ProjectionExec: expr=[__async_fn_0@1 as __common_expr_1]
-08)--------------AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_abs(x@0))]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([async_abs(data.x)@0], 4), input_partitions=4
+04)------AggregateExec: mode=Partial, gby=[__common_expr_1@0 as async_abs(data.x)], aggr=[min(async_abs(data.x))]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------ProjectionExec: expr=[__async_fn_0@1 as __common_expr_1]
+07)------------AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_abs(x@0))]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Async udf can be used in filter
 query I
@@ -80,12 +79,11 @@ logical_plan
 01)Filter: async_abs(data.x) < Int32(5)
 02)--TableScan: data projection=[x]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: __async_fn_0@1 < 5, projection=[x@0]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_abs(x@0))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: __async_fn_0@1 < 5, projection=[x@0]
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_abs(x@0))]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Async udf can be used in projection
 query I rowsort
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 1a4b6a7a2b4a8..074d216ac7524 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -683,3 +683,157 @@ FROM (
 10 10 100
 -20 20 200
 NULL 30 300
+
+# Case-with-expression that was incorrectly classified as not-nullable, but evaluates to null
+query I
+SELECT CASE 0 WHEN 0 THEN NULL WHEN SUM(1) + COUNT(*) THEN 10 ELSE 20 END
+----
+NULL
+
+query TT
+EXPLAIN SELECT CASE WHEN CASE WHEN a IS NOT NULL THEN a ELSE 1 END IS NOT NULL THEN a ELSE 1 END FROM (
+    VALUES (10), (20), (30)
+  ) t(a);
+----
+logical_plan
+01)Projection: t.a AS CASE WHEN CASE WHEN t.a IS NOT NULL THEN t.a ELSE Int64(1) END IS NOT NULL THEN t.a ELSE Int64(1) END
+02)--SubqueryAlias: t
+03)----Projection: column1 AS a
+04)------Values: (Int64(10)), (Int64(20)), (Int64(30))
+physical_plan
+01)ProjectionExec: expr=[column1@0 as CASE WHEN CASE WHEN t.a IS NOT NULL THEN t.a ELSE Int64(1) END IS NOT NULL THEN t.a ELSE Int64(1) END]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+#####
+# CASE with literal characters (to test lookup table CASE optimization)
+#####
+statement ok
+create table source (letter varchar) as values ('a'), ('b'), (NULL), ('c'), ('a'), ('c'), ('d');
+
+# Table with different string types
+statement ok
+create table letters as
+select
+  arrow_cast(letter, 'Utf8') as letter_utf8,
+  arrow_cast(letter, 'LargeUtf8') as letter_large_utf8,
+  arrow_cast(letter, 'Utf8View') as letter_utf8_view,
+  arrow_cast(letter, 'Dictionary(Int32, Utf8)') as letter_string_dict,
+from source;
+
+
+query TIIIII
+select
+  letter_utf8 as letter
+  ,CASE letter_utf8       WHEN 'b' THEN 1 WHEN 'a' THEN 2 WHEN 'd' THEN 3 ELSE 0 END as utf8
+  ,CASE letter_large_utf8 WHEN 'b' THEN 1 WHEN 'a' THEN 2 WHEN 'd' THEN 3 ELSE 0 END as large_utf8
+  ,CASE letter_utf8_view  WHEN 'b' THEN 1 WHEN 'a' THEN 2 WHEN 'd' THEN 3 ELSE 0 END as utf8_view
+  ,CASE letter_string_dict WHEN 'b' THEN 1 WHEN 'a' THEN 2 WHEN 'd' THEN 3 ELSE 0 END as string_dict
+  ,CASE letter_utf8       WHEN 'b' THEN 1 WHEN NULL THEN 2 WHEN 'd' THEN 3 ELSE 0 END as utf8_with_null
+FROM letters;
+----
+a 2 2 2 2 0
+b 1 1 1 1 1
+NULL 0 0 0 0 0
+c 0 0 0 0 0
+a 2 2 2 2 0
+c 0 0 0 0 0
+d 3 3 3 3 3
+
+statement ok
+create table letters_binary as
+select
+  arrow_cast(letter, 'Binary') as letter_binary,
+  arrow_cast(letter, 'LargeBinary') as letter_large_binary,
+  arrow_cast(letter, 'BinaryView') as letter_binary_view,
+  arrow_cast(letter, 'Dictionary(Int32, Binary)') as letter_binary_dict,
+  arrow_cast(arrow_cast(letter, 'Binary'), 'FixedSizeBinary(1)') as letter_fsb
+from source;
+
+query ?IIIII
+select
+    letter_binary as letter
+    ,CASE letter_binary       WHEN X'62' THEN 1 WHEN X'61' THEN 2 WHEN X'64' THEN 3 ELSE 0 END as binary
+    ,CASE letter_large_binary WHEN X'62' THEN 1 WHEN X'61' THEN 2 WHEN X'64' THEN 3 ELSE 0 END as large_binary
+    ,CASE letter_binary_view  WHEN X'62' THEN 1 WHEN X'61' THEN 2 WHEN X'64' THEN 3 ELSE 0 END as binary_view
+    ,CASE letter_binary_dict  WHEN X'62' THEN 1 WHEN X'61' THEN 2 WHEN X'64' THEN 3 ELSE 0 END as binary_dict
+    ,CASE letter_fsb          WHEN X'62' THEN 1 WHEN X'61' THEN 2 WHEN X'64' THEN 3 ELSE 0 END as fsb
+FROM letters_binary;
+----
+61 2 2 2 2 2
+62 1 1 1 1 1
+NULL 0 0 0 0 0
+63 0 0 0 0 0
+61 2 2 2 2 2
+63 0 0 0 0 0
+64 3 3 3 3 3
+
+statement ok
+drop table source;
+
+
+statement ok
+drop table letters;
+
+statement ok
+drop table letters_binary;
+
+# Tests for CASE with boolean expressions
+statement ok
+create table booleans (b boolean) as values (true), (false), (null), (true), (null), (false);
+
+query BIII
+select
+    b as boolean_value
+    ,CASE b WHEN true THEN 1 WHEN false THEN 2 ELSE 0 END as boolean_case
+    ,CASE b WHEN false THEN 1 WHEN true THEN 2 ELSE 0 END as boolean_case_rev
+    ,CASE b WHEN true THEN 1 WHEN NULL THEN 2 WHEN false THEN 3 ELSE 0 END as boolean_with_nulls
+FROM booleans;
+----
+true 1 2 1
+false 2 1 3
+NULL 0 0 0
+true 1 2 1
+NULL 0 0 0
+false 2 1 3
+
+statement ok
+drop table booleans;
+
+# Tests for CASE with floating point literals
+statement ok
+create table float_source (f float) as values (1.0), (2.0), (null), (3.5), (2.0), (null);
+
+statement ok
+create table floats as
+select
+    arrow_cast(f, 'Float16') as f16,
+    arrow_cast(f, 'Float32') as f32,
+    arrow_cast(f, 'Float64') as f64,
+    arrow_cast(f, 'Dictionary(Int32, Float32)') as f32_dict,
+from float_source;
+
+query RTTTT
+select
+    f32 as float_value
+    ,CASE f16      WHEN 1.0 THEN 'one'  WHEN 3.5 THEN 'three_point_five' WHEN 2.0 THEN 'two' ELSE 'N/A' END as f16_case
+    ,CASE f32      WHEN 1.0 THEN 'one'  WHEN 3.5 THEN 'three_point_five' WHEN 2.0 THEN 'two' ELSE 'N/A' END as f32_case
+    ,CASE f64      WHEN 1.0 THEN 'one'  WHEN 3.5 THEN 'three_point_five' WHEN 2.0 THEN 'two' ELSE 'N/A' END as f64_case
+    ,CASE f32_dict WHEN 1.0 THEN 'one'  WHEN 3.5 THEN 'three_point_five' WHEN 2.0 THEN 'two' ELSE 'N/A' END as f32_dict_case
+FROM floats;
+----
+1 one one one one
+2 two two two two
+NULL N/A N/A N/A N/A
+3.5 three_point_five three_point_five three_point_five three_point_five
+2 two two two two
+NULL N/A N/A N/A N/A
+
+statement ok
+drop table float_source;
+
+statement ok
+drop table floats;
+
+#####
+# End of lookup table CASE tests
+#####
diff --git a/datafusion/sqllogictest/test_files/cast.slt b/datafusion/sqllogictest/test_files/cast.slt
index 3466354e54d71..916895b8be1eb 100644
--- a/datafusion/sqllogictest/test_files/cast.slt
+++ b/datafusion/sqllogictest/test_files/cast.slt
@@ -89,3 +89,39 @@ select * from t0 where v0<1e100;
 
 statement ok
 drop table t0;
+
+
+# ensure that automatically casting with "datafusion.optimizer.expand_views_at_output" does not
+# change the column name
+
+statement ok
+create table t(a int, b varchar);
+
+statement ok
+set datafusion.optimizer.expand_views_at_output = true;
+
+query TT
+explain select * from t;
+----
+logical_plan
+01)Projection: t.a, CAST(t.b AS LargeUtf8) AS b
+02)--TableScan: t projection=[a, b]
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, CAST(b@1 AS LargeUtf8) as b]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+
+query TT
+explain select b from t;
+----
+logical_plan
+01)Projection: CAST(t.b AS LargeUtf8) AS b
+02)--TableScan: t projection=[b]
+physical_plan
+01)ProjectionExec: expr=[CAST(b@0 AS LargeUtf8) as b]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+
+statement ok
+set datafusion.optimizer.expand_views_at_output = false;
+
+statement ok
+drop table t;
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index e34a601851d78..9e5b71b871299 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -199,14 +199,14 @@ select
   coalesce(array[1, 2], array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[1, 2] List(nullable Int64)
+[1, 2] List(Int64)
 
 query ?T
 select
   coalesce(null, array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[3, 4] List(nullable Int64)
+[3, 4] List(Int64)
 
 # coalesce with array
 query ?T
@@ -214,7 +214,7 @@ select
   coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
   arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]));
 ----
-[1, 2] List(nullable Int64)
+[1, 2] List(Int64)
 
 # test dict(int32, utf8)
 statement ok
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index b78c021a565c1..a1c0e6303a765 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -1,4 +1,4 @@
-# Licensed to the Apache Software Foundation (ASF) under one
+# Licensed to the Apache Software Foundation (ASF) under onecount_star
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
@@ -49,11 +49,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1))@1 as count()]
 02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 0;
@@ -65,14 +63,11 @@ logical_plan
 04)------TableScan: t1 projection=[a]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1))@1 as cnt]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: count(Int64(1))@1 > 0
-04)------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: count(Int64(1))@1 > 0
+03)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 1;
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 1e6183f48bac7..0b15a7f8ec5dd 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -264,7 +264,7 @@ logical_plan
 02)--TableScan: t projection=[id]
 physical_plan
 01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id], file_type=parquet, reverse_row_groups=true
 
 statement ok
 DROP TABLE t;
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index e7ca7a5ae1d8d..3dac92938772c 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -58,18 +58,6 @@ WITH RECURSIVE nodes AS (
 statement ok
 set datafusion.execution.enable_recursive_ctes = true;
 
-
-# DISTINCT UNION is not supported
-query error DataFusion error: This feature is not implemented: Recursive queries with a distinct 'UNION' \(in which the previous iteration's results will be de\-duplicated\) is not supported
-WITH RECURSIVE nodes AS (
-    SELECT 1 as id
-    UNION
-    SELECT id + 1 as id
-    FROM nodes
-    WHERE id < 3
-) SELECT * FROM nodes
-
-
 # trivial recursive CTE works
 query I rowsort
 WITH RECURSIVE nodes AS (
@@ -117,10 +105,44 @@ physical_plan
 03)----PlaceholderRowExec
 04)--CoalescePartitionsExec
 05)----ProjectionExec: expr=[id@0 + 1 as id]
-06)------CoalesceBatchesExec: target_batch_size=8192
-07)--------FilterExec: id@0 < 10
-08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)------------WorkTableExec: name=nodes
+06)------FilterExec: id@0 < 10
+07)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)----------WorkTableExec: name=nodes
+
+# simple deduplicating recursive CTE works
+query I
+WITH RECURSIVE nodes AS (
+    SELECT id from (VALUES (1), (2)) nodes(id)
+    UNION
+    SELECT id + 1 as id
+    FROM nodes
+    WHERE id < 4
+)
+SELECT * FROM nodes
+----
+1
+2
+3
+4
+
+# deduplicating recursive CTE with two variables works
+query II
+WITH RECURSIVE ranges AS (
+    SELECT min, max from (VALUES (1, 1), (2, 2)) ranges(min, max)
+    UNION
+    SELECT min, max + 1 as max
+    FROM ranges
+    WHERE max < 4
+)
+SELECT * FROM ranges
+----
+1 1
+2 2
+1 2
+2 3
+1 3
+2 4
+1 4
 
 # setup
 statement ok
@@ -162,10 +184,9 @@ physical_plan
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/balance.csv]]}, projection=[time, name, account_balance], file_type=csv, has_header=true
 04)----CoalescePartitionsExec
 05)------ProjectionExec: expr=[time@0 + 1 as time, name@1 as name, account_balance@2 + 10 as account_balance]
-06)--------CoalesceBatchesExec: target_batch_size=2
-07)----------FilterExec: time@0 < 10
-08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------------WorkTableExec: name=balances
+06)--------FilterExec: time@0 < 10
+07)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)------------WorkTableExec: name=balances
 
 # recursive CTE with static term derived from table works
 # note that this is run with batch size set to 2. This should produce multiple batches per iteration since the input
@@ -645,21 +666,51 @@ ORDER BY
 3 1400 1
 1 2700 2
 
-#expect error from recursive CTE with nested recursive terms
-query error DataFusion error: This feature is not implemented: Recursive queries cannot be nested
+#nested recursive ctes
+query I
 WITH RECURSIVE outer_cte AS (
     SELECT 1 as a
     UNION ALL (
-        WITH  RECURSIVE nested_cte AS (
+        WITH RECURSIVE nested_cte AS (
            SELECT 1 as a
            UNION ALL
-           SELECT a+2 as a
-	   FROM nested_cte where a < 3
-         )
-    SELECT outer_cte.a +2
-    FROM outer_cte JOIN nested_cte USING(a)
-    WHERE nested_cte.a < 4
-   )
+           SELECT a + 2 as a
+           FROM nested_cte where a < 3
+        )
+        SELECT outer_cte.a + 2 as a
+        FROM outer_cte JOIN nested_cte USING(a)
+        WHERE nested_cte.a < 4
+    )
+)
+SELECT a FROM outer_cte;
+----
+1
+3
+5
+
+# Check that CTE name shadowing is returning an error
+query error DataFusion error: Error during planning: WITH query name "outer_cte" specified more than once
+WITH RECURSIVE outer_cte AS (
+    SELECT 1 as a
+    UNION ALL (
+        WITH RECURSIVE nested_cte AS (
+           SELECT 1 as a
+           UNION ALL (
+               WITH RECURSIVE outer_cte AS (
+                    SELECT 1 as a
+                    UNION ALL
+                    SELECT a + 2 as a
+                    FROM outer_cte where a < 3
+               )
+               SELECT nested_cte.a + outer_cte.a as a
+               FROM nested_cte JOIN outer_cte USING(a)
+               WHERE outer_cte_cte.a < 8
+           )
+        )
+        SELECT outer_cte.a + nested_cte.a as a
+        FROM outer_cte JOIN nested_cte USING(a)
+        WHERE nested_cte.a < 8
+    )
 )
 SELECT a FROM outer_cte;
 
@@ -734,12 +785,11 @@ physical_plan
 04)--ProjectionExec: expr=[2 as val]
 05)----CrossJoinExec
 06)------CoalescePartitionsExec
-07)--------CoalesceBatchesExec: target_batch_size=8182
-08)----------FilterExec: val@0 < 2
-09)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)--------------WorkTableExec: name=recursive_cte
-11)------ProjectionExec: expr=[2 as val]
-12)--------PlaceholderRowExec
+07)--------FilterExec: val@0 < 2
+08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)------------WorkTableExec: name=recursive_cte
+10)------ProjectionExec: expr=[2 as val]
+11)--------PlaceholderRowExec
 
 # Test issue: https://github.com/apache/datafusion/issues/9794
 # Non-recursive term and recursive term have different types
@@ -908,10 +958,9 @@ logical_plan
 04)----SubqueryAlias: cte
 05)------TableScan: person projection=[id]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8182
-02)--HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(id@0, id@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(id@0, id@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 statement count 0
 drop table person;
@@ -964,10 +1013,9 @@ physical_plan
 03)----PlaceholderRowExec
 04)--CoalescePartitionsExec
 05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)]
-06)------CoalesceBatchesExec: target_batch_size=8182
-07)--------FilterExec: n@0 < 10
-08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)------------WorkTableExec: name=numbers
+06)------FilterExec: n@0 < 10
+07)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)----------WorkTableExec: name=numbers
 
 query TT
 explain WITH RECURSIVE numbers AS (
@@ -990,10 +1038,9 @@ physical_plan
 03)----PlaceholderRowExec
 04)--CoalescePartitionsExec
 05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)]
-06)------CoalesceBatchesExec: target_batch_size=8182
-07)--------FilterExec: n@0 < 10
-08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)------------WorkTableExec: name=numbers
+06)------FilterExec: n@0 < 10
+07)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)----------WorkTableExec: name=numbers
 
 # Test for issue #16998: SortExec shares DynamicFilterPhysicalExpr across multiple executions
 query II
@@ -1049,6 +1096,63 @@ physical_plan
 05)----SortExec: TopK(fetch=1), expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)------WorkTableExec: name=r
 
+# setup
+statement ok
+CREATE EXTERNAL TABLE closure STORED as CSV LOCATION '../core/tests/data/recursive_cte/closure.csv' OPTIONS ('format.has_header' 'true');
+
+# transitive closure with loop
+query II
+WITH RECURSIVE trans AS (
+    SELECT * FROM closure
+    UNION
+    SELECT l.start, r.end
+    FROM trans as l, closure AS r
+    WHERE l.end = r.start
+) SELECT * FROM trans ORDER BY start, end
+----
+1 1
+1 2
+1 3
+1 4
+2 1
+2 2
+2 3
+2 4
+4 1
+4 2
+4 3
+4 4
+
+query TT
+EXPLAIN WITH RECURSIVE trans AS (
+    SELECT * FROM closure
+    UNION
+    SELECT l.start, r.end
+    FROM trans as l, closure AS r
+    WHERE l.end = r.start
+) SELECT * FROM trans
+----
+logical_plan
+01)SubqueryAlias: trans
+02)--RecursiveQuery: is_distinct=true
+03)----Projection: closure.start, closure.end
+04)------TableScan: closure
+05)----Projection: l.start, r.end
+06)------Inner Join: l.end = r.start
+07)--------SubqueryAlias: l
+08)----------TableScan: trans
+09)--------SubqueryAlias: r
+10)----------TableScan: closure
+physical_plan
+01)RecursiveQueryExec: name=trans, is_distinct=true
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/closure.csv]]}, projection=[start, end], file_type=csv, has_header=true
+03)--CoalescePartitionsExec
+04)----HashJoinExec: mode=Partitioned, join_type=Inner, on=[(end@1, start@0)], projection=[start@0, end@3]
+05)------RepartitionExec: partitioning=Hash([end@1], 4), input_partitions=1
+06)--------WorkTableExec: name=trans
+07)------RepartitionExec: partitioning=Hash([start@0], 4), input_partitions=1
+08)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/closure.csv]]}, projection=[start, end], file_type=csv, has_header=true
+
 statement count 0
 set datafusion.execution.enable_recursive_ctes = false;
 
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt
new file mode 100644
index 0000000000000..f6e4aad78b27c
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt
@@ -0,0 +1,16 @@
+# date - date → integer
+# Subtract dates, producing the number of days elapsed
+# date '2001-10-01' - date '2001-09-28' → 3
+
+# note that datafusion returns Duration whereas postgres returns an int
+# Tracking issue: https://github.com/apache/datafusion/issues/19528
+
+query ?
+SELECT '2001-10-01'::date - '2001-09-28'::date
+----
+3 days 0 hours 0 mins 0 secs
+
+query T
+SELECT arrow_typeof('2001-10-01'::date - '2001-09-28'::date)
+----
+Duration(s)
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_integer.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_integer.slt
new file mode 100644
index 0000000000000..512c507d9478c
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_date_integer.slt
@@ -0,0 +1,89 @@
+# date + integer → date
+# Add a number of days to a date
+# date '2001-09-28' + 7 → 2001-10-05
+
+query D
+SELECT '2001-09-28'::date + 7
+----
+2001-10-05
+
+query D
+SELECT 7 + '2001-09-28'::date
+----
+2001-10-05
+
+query T
+SELECT arrow_typeof('2001-09-28'::date + 7)
+----
+Date32
+
+query D
+SELECT arrow_cast('2001-09-28', 'Date64') + 7
+----
+2001-10-05T00:00:00
+
+query D
+SELECT 7::smallint + '2001-09-28'::date
+----
+2001-10-05
+
+query D
+SELECT 7::smallint unsigned + '2001-09-28'::date
+----
+2001-10-05
+
+query D
+SELECT 7::int unsigned + '2001-09-28'::date
+----
+2001-10-05
+
+query D
+SELECT 7::bigint + '2001-09-28'::date
+----
+2001-10-05
+
+query D
+SELECT 7::bigint unsigned + '2001-09-28'::date
+----
+2001-10-05
+
+query D
+SELECT 7 + arrow_cast('2001-09-28', 'Date64')
+----
+2001-10-05T00:00:00
+
+query T
+SELECT arrow_typeof(arrow_cast('2001-09-28', 'Date64') + 7)
+----
+Date64
+
+# date - integer → date
+# Subtract a number of days from a date
+# date '2001-10-01' - 7 → 2001-09-24
+
+query D
+SELECT '2001-10-01'::date - 7
+----
+2001-09-24
+
+query D
+SELECT arrow_cast('2001-10-01', 'Date64') - 7
+----
+2001-09-24T00:00:00
+
+query T
+SELECT arrow_typeof('2001-10-01'::date - 7)
+----
+Date32
+
+query error Invalid arithmetic operation
+SELECT 7 - '2001-10-01'::date
+
+query error Invalid date arithmetic operation
+SELECT '2001-10-01'::date * 7
+
+query error Invalid date arithmetic operation
+SELECT '2001-10-01'::date / 7
+
+query error Invalid date arithmetic operation
+SELECT '2001-10-01'::date % 7
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_interval.slt
new file mode 100644
index 0000000000000..ad2e7ed496f79
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_date_interval.slt
@@ -0,0 +1,37 @@
+# postgresql behavior
+#
+# date + interval → timestamp
+# Add an interval to a date
+# date '2001-09-28' + interval '1 hour' → 2001-09-28 01:00:00
+#
+# note that while the above reflects what postgresql does
+# in the case of datafusion/arrow that is not the case. The
+# result will be date32/date64
+#
+# Tracking issue: https://github.com/apache/datafusion/issues/19527
+
+query D
+SELECT '2001-09-28'::date + interval '1 hour'
+----
+2001-09-28
+
+query T
+SELECT arrow_typeof('2001-09-28'::date + interval '1 hour')
+----
+Date32
+
+# postgresql behavior
+#
+# date - interval → timestamp
+# Subtract an interval from a date
+# date '2001-09-28' - interval '1 hour' → 2001-09-27 23:00:00
+
+query D
+SELECT '2001-09-28'::date - interval '25 hour'
+----
+2001-09-27
+
+query T
+SELECT arrow_typeof('2001-09-28'::date - interval '25 hour')
+----
+Date32
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_time.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_time.slt
new file mode 100644
index 0000000000000..bc796a51ff5a4
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_date_time.slt
@@ -0,0 +1,116 @@
+# date + time → timestamp
+# Add a time-of-day to a date
+# date '2001-09-28' + time '03:00' → 2001-09-28 03:00:00
+
+query P
+SELECT '2001-09-28'::date + '03:00'::time
+----
+2001-09-28T03:00:00
+
+query P
+SELECT '03:00'::time + '2001-09-28'::date
+----
+2001-09-28T03:00:00
+
+query T
+SELECT arrow_typeof('2001-09-28'::date + '03:00'::time)
+----
+Timestamp(ns)
+
+query P
+SELECT '2001-09-28'::date - '03:00'::time
+----
+2001-09-27T21:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date32') + arrow_cast('03:00', 'Time32(Second)')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date32') + arrow_cast('03:00', 'Time32(Millisecond)')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date32') + arrow_cast('03:00', 'Time64(Microsecond)')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date32') + arrow_cast('03:00', 'Time64(Nanosecond)')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date64') + arrow_cast('03:00', 'Time32(Second)')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date64') + arrow_cast('03:00:00.123', 'Time32(Millisecond)')
+----
+2001-09-28T03:00:00.123
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date64') + arrow_cast('03:00:00.123456', 'Time64(Microsecond)')
+----
+2001-09-28T03:00:00.123456
+
+query P
+SELECT arrow_cast('2001-09-28', 'Date64') + arrow_cast('03:00:00.001234567', 'Time64(Nanosecond)')
+----
+2001-09-28T03:00:00.001234567
+
+query P
+SELECT arrow_cast('03:00', 'Time32(Second)') + arrow_cast('2001-09-28', 'Date32')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('03:00', 'Time32(Millisecond)') + arrow_cast('2001-09-28', 'Date32')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('03:00', 'Time64(Microsecond)') + arrow_cast('2001-09-28', 'Date32')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('03:00', 'Time64(Nanosecond)') + arrow_cast('2001-09-28', 'Date32')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('03:00', 'Time32(Second)') + arrow_cast('2001-09-28', 'Date64')
+----
+2001-09-28T03:00:00
+
+query P
+SELECT arrow_cast('03:00:00.123', 'Time32(Millisecond)') + arrow_cast('2001-09-28', 'Date64')
+----
+2001-09-28T03:00:00.123
+
+query P
+SELECT arrow_cast('03:00:00.123456', 'Time64(Microsecond)') + arrow_cast('2001-09-28', 'Date64')
+----
+2001-09-28T03:00:00.123456
+
+query P
+SELECT arrow_cast('03:00:00.001234567', 'Time64(Nanosecond)') + arrow_cast('2001-09-28', 'Date64')
+----
+2001-09-28T03:00:00.001234567
+
+query error Invalid arithmetic operation
+SELECT '03:00'::time - '2001-09-28'::date
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28'::date * '03:00'::time
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28'::date / '03:00'::time
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28'::date % '03:00'::time
+
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_interval_double.slt b/datafusion/sqllogictest/test_files/datetime/arith_interval_double.slt
new file mode 100644
index 0000000000000..d48d2b59c8bee
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_interval_double.slt
@@ -0,0 +1,41 @@
+# interval * double precision → interval
+# Multiply an interval by a scalar
+# interval '1 second' * 900 → 00:15:00
+# interval '1 day' * 21 → 21 days
+# interval '1 hour' * 3.5 → 03:30:00
+
+# these currently do not work - https://github.com/apache/arrow-rs/issues/9030
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT interval '1 second' * 900
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT 900 * interval '1 second'
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT interval '1 day' * 21
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT interval '1 hour' * 3.5
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT 3.5 * interval '1 hour'
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) \* Interval\(MonthDayNano\)
+SELECT arrow_typeof(interval '1 second' * 900)
+
+# interval / double precision → interval
+# Divide an interval by a scalar
+# interval '1 hour' / 1.5 → 00:40:00
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) / Interval\(MonthDayNano\)
+SELECT interval '1 hour' / 1.5
+
+
+query error Invalid interval arithmetic operation: Interval\(MonthDayNano\) / Interval\(MonthDayNano\)
+SELECT arrow_typeof(interval '1 hour' / 1.5)
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_interval_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_interval_interval.slt
new file mode 100644
index 0000000000000..d8a701356b6e3
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_interval_interval.slt
@@ -0,0 +1,27 @@
+# interval + interval → interval
+# Add intervals
+# interval '1 day' + interval '1 hour' → 1 day 01:00:00
+
+query ?
+SELECT interval '1 day' + interval '1 hour'
+----
+1 days 1 hours
+
+query T
+SELECT arrow_typeof(interval '1 day' + interval '1 hour')
+----
+Interval(MonthDayNano)
+
+# interval - interval → interval
+# Subtract intervals
+# interval '1 day' - interval '1 hour' → 1 day -01:00:00
+
+query ?
+SELECT interval '1 day' - interval '1 hour'
+----
+1 days -1 hours
+
+query T
+SELECT arrow_typeof(interval '1 day' - interval '1 hour')
+----
+Interval(MonthDayNano)
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_negate_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_negate_interval.slt
new file mode 100644
index 0000000000000..52ef046bf22da
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_negate_interval.slt
@@ -0,0 +1,13 @@
+# - interval → interval
+# Negate an interval
+# - interval '23 hours' → -23:00:00
+
+query ?
+SELECT - interval '23 hours'
+----
+-23 hours
+
+query T
+SELECT arrow_typeof(- interval '23 hours')
+----
+Interval(MonthDayNano)
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt
new file mode 100644
index 0000000000000..997eae9b1bd8b
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt
@@ -0,0 +1,70 @@
+# postgresql behavior
+#
+# time + interval → time
+# Add an interval to a time
+# time '01:00' + interval '3 hours' → 04:00:00
+#
+# note that while the above reflects what postgresql does
+# in the case of datafusion/arrow that is not the case. The
+# result will be an interval, not a time.
+
+query ?
+SELECT '01:00'::time + interval '3 hours'
+----
+4 hours
+
+query T
+SELECT arrow_typeof('01:00'::time + interval '3 hours')
+----
+Interval(MonthDayNano)
+
+query ?
+SELECT '22:00'::time + interval '3 hours'
+----
+25 hours
+
+query ?
+SELECT interval '3 hours' + '22:00'::time
+----
+25 hours
+
+query ?
+SELECT arrow_cast('22:00', 'Time32(Second)') + interval '3 hours'
+----
+25 hours
+
+query ?
+SELECT arrow_cast('22:00', 'Time32(Millisecond)') + interval '3 hours'
+----
+25 hours
+
+query ?
+SELECT arrow_cast('22:00', 'Time64(Microsecond)') + interval '3 hours'
+----
+25 hours
+
+query ?
+SELECT arrow_cast('22:00', 'Time64(Nanosecond)') + interval '3 hours'
+----
+25 hours
+
+# postgresql behavior
+#
+# time - interval → time
+# Subtract an interval from a time
+# time '05:00' - interval '2 hours' → 03:00:00
+
+query ?
+SELECT '05:00'::time - interval '2 hours'
+----
+3 hours
+
+query T
+SELECT arrow_typeof('05:00'::time - interval '2 hours')
+----
+Interval(MonthDayNano)
+
+query ?
+SELECT '02:00'::time - interval '3 hours'
+----
+-1 hours
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_time_time.slt b/datafusion/sqllogictest/test_files/datetime/arith_time_time.slt
new file mode 100644
index 0000000000000..4cf081970e2f9
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_time_time.slt
@@ -0,0 +1,47 @@
+# time - time → interval
+# Subtract times
+# time '05:00' - time '03:00' → 02:00:00
+
+query ?
+SELECT '05:00'::time - '03:00'::time
+----
+2 hours
+
+query T
+SELECT arrow_typeof('05:00'::time - '03:00'::time)
+----
+Interval(MonthDayNano)
+
+query ?
+SELECT '05:00'::time + '03:00'::time
+----
+8 hours
+
+query ?
+SELECT arrow_cast('05:00', 'Time32(Second)') - arrow_cast('03:00', 'Time32(Millisecond)')
+----
+2 hours
+
+query ?
+SELECT arrow_cast('05:00', 'Time32(Second)') - arrow_cast('03:00', 'Time64(Microsecond)')
+----
+2 hours
+
+query ?
+SELECT arrow_cast('05:00', 'Time64(Microsecond)') - arrow_cast('03:00', 'Time32(Millisecond)')
+----
+2 hours
+
+query ?
+SELECT arrow_cast('05:00', 'Time64(Nanosecond)') - arrow_cast('03:00', 'Time32(Second)')
+----
+2 hours
+
+query error Invalid interval arithmetic operation
+SELECT '05:00'::time * '03:00'::time
+
+query error Invalid interval arithmetic operation
+SELECT '05:00'::time / '03:00'::time
+
+query error Invalid interval arithmetic operation
+SELECT '05:00'::time % '03:00'::time
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_timestamp_duration.slt b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_duration.slt
new file mode 100644
index 0000000000000..10381346f8359
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_duration.slt
@@ -0,0 +1,147 @@
+# timestamp + duration → timestamp
+# Add an duration to a timestamp
+# timestamp '2001-09-28 01:00' + arrow_cast(12345000000000, 'Duration(Nanosecond)') → 2001-09-29 00:00:00
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp + arrow_cast(12345, 'Duration(Second)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp - arrow_cast(12345, 'Duration(Second)');
+----
+2001-09-27T21:34:15
+
+query P
+SELECT arrow_cast(12345, 'Duration(Second)') + '2001-09-28T01:00:00'::timestamp;
+----
+2001-09-28T04:25:45
+
+query T
+SELECT arrow_typeof('2001-09-28T01:00:00'::timestamp + arrow_cast(12345, 'Duration(Second)'))
+----
+Timestamp(ns)
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp + arrow_cast(12345000, 'Duration(Millisecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp - arrow_cast(12345000, 'Duration(Millisecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof('2001-09-28T01:00:00'::timestamp + arrow_cast(12345000, 'Duration(Millisecond)'))
+----
+Timestamp(ns)
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp + arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp - arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof('2001-09-28T01:00:00'::timestamp + arrow_cast(12345000000, 'Duration(Microsecond)'))
+----
+Timestamp(ns)
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp + arrow_cast(12345000000999, 'Duration(Nanosecond)');
+----
+2001-09-28T04:25:45.000000999
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp - arrow_cast(12345000000999, 'Duration(Nanosecond)');
+----
+2001-09-27T21:34:14.999999001
+
+query T
+SELECT arrow_typeof('2001-09-28T01:00:00'::timestamp + arrow_cast(12345000000999, 'Duration(Nanosecond)'))
+----
+Timestamp(ns)
+
+# test with other timestamp timeunits beyond the default ns
+
+# second +/- millisecond
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') + arrow_cast(12345000, 'Duration(Millisecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') - arrow_cast(12345000, 'Duration(Millisecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof(arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') + arrow_cast(12345000, 'Duration(Millisecond)'))
+----
+Timestamp(s)
+
+# second +/- microsecond
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') + arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') - arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof(arrow_cast('2001-09-28T01:00:00', 'Timestamp(Second)') + arrow_cast(12345000000, 'Duration(Microsecond)'))
+----
+Timestamp(s)
+
+# millisecond +/- nanosecond
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') + arrow_cast(12345000000999, 'Duration(Nanosecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') - arrow_cast(12345000000999, 'Duration(Nanosecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof(arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') + arrow_cast(12345000000999, 'Duration(Nanosecond)'))
+----
+Timestamp(ms)
+
+# millisecond +/- microsecond
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') + arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-28T04:25:45
+
+query P
+SELECT arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') - arrow_cast(12345000000, 'Duration(Microsecond)');
+----
+2001-09-27T21:34:15
+
+query T
+SELECT arrow_typeof(arrow_cast('2001-09-28T01:00:00', 'Timestamp(Millisecond)') + arrow_cast(12345000000, 'Duration(Microsecond)'))
+----
+Timestamp(ms)
+
+# while timestamp + duration makes sense, duration - timestamp does not
+query error Invalid arithmetic operation: Duration\(ns\) - Timestamp\(ns\)
+SELECT arrow_cast(12345, 'Duration(Second)') - '2001-09-28T01:00:00'::timestamp;
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28T01:00:00'::timestamp * arrow_cast(12345, 'Duration(Second)');
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28T01:00:00'::timestamp % arrow_cast(12345, 'Duration(Second)');
+
+query error Invalid timestamp arithmetic operation
+SELECT '2001-09-28T01:00:00'::timestamp / arrow_cast(12345, 'Duration(Second)');
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_timestamp_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_interval.slt
new file mode 100644
index 0000000000000..aaf629f1f07da
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_interval.slt
@@ -0,0 +1,36 @@
+# timestamp + interval → timestamp
+# Add an interval to a timestamp
+# timestamp '2001-09-28 01:00' + interval '23 hours' → 2001-09-29 00:00:00
+
+query P
+SELECT '2001-09-28T01:00:00'::timestamp + interval '23 hours'
+----
+2001-09-29T00:00:00
+
+query T
+SELECT arrow_typeof('2001-09-28T01:00:00'::timestamp + interval '23 hours')
+----
+Timestamp(ns)
+
+# timestamp - interval → timestamp
+# Subtract an interval from a timestamp
+# timestamp '2001-09-28 23:00' - interval '23 hours' → 2001-09-28 00:00:00
+
+query P
+SELECT '2001-09-28T23:00:00'::timestamp - interval '23 hours'
+----
+2001-09-28T00:00:00
+
+query T
+SELECT arrow_typeof('2001-09-28T23:00:00'::timestamp - interval '23 hours')
+----
+Timestamp(ns)
+
+query error Cannot coerce arithmetic expression Timestamp\(ns\) \* Interval\(MonthDayNano\) to valid types
+SELECT '2001-09-28T23:00:00'::timestamp * interval '23 hours'
+
+query error Cannot coerce arithmetic expression Timestamp\(ns\) / Interval\(MonthDayNano\) to valid types
+SELECT '2001-09-28T23:00:00'::timestamp / interval '23 hours'
+
+query error Cannot coerce arithmetic expression Timestamp\(ns\) % Interval\(MonthDayNano\) to valid types
+SELECT '2001-09-28T23:00:00'::timestamp % interval '23 hours'
diff --git a/datafusion/sqllogictest/test_files/datetime/arith_timestamp_timestamp.slt b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_timestamp.slt
new file mode 100644
index 0000000000000..975365ae22ebe
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/datetime/arith_timestamp_timestamp.slt
@@ -0,0 +1,13 @@
+# timestamp - timestamp → interval
+# Subtract timestamps (converting 24-hour intervals into days, similarly to justify_hours())
+# timestamp '2001-09-29 03:00' - timestamp '2001-07-27 12:00' → 63 days 15:00:00
+
+query ?
+SELECT '2001-09-29T03:00:00'::timestamp - '2001-07-27T12:00:00'::timestamp
+----
+63 days 15 hours 0 mins 0.000000000 secs
+
+query T
+SELECT arrow_typeof('2001-09-29T03:00:00'::timestamp - '2001-07-27T12:00:00'::timestamp)
+----
+Duration(ns)
diff --git a/datafusion/sqllogictest/test_files/current_date_timezone.slt b/datafusion/sqllogictest/test_files/datetime/current_date_timezone.slt
similarity index 100%
rename from datafusion/sqllogictest/test_files/current_date_timezone.slt
rename to datafusion/sqllogictest/test_files/datetime/current_date_timezone.slt
diff --git a/datafusion/sqllogictest/test_files/current_time_timezone.slt b/datafusion/sqllogictest/test_files/datetime/current_time_timezone.slt
similarity index 100%
rename from datafusion/sqllogictest/test_files/current_time_timezone.slt
rename to datafusion/sqllogictest/test_files/datetime/current_time_timezone.slt
diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/datetime/date_part.slt
similarity index 100%
rename from datafusion/sqllogictest/test_files/expr/date_part.slt
rename to datafusion/sqllogictest/test_files/datetime/date_part.slt
diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/datetime/dates.slt
similarity index 68%
rename from datafusion/sqllogictest/test_files/dates.slt
rename to datafusion/sqllogictest/test_files/datetime/dates.slt
index abf64675e9039..6ba34cfcac03f 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/datetime/dates.slt
@@ -164,12 +164,114 @@ SELECT to_date('21311111');
 statement error DataFusion error: Arrow error:
 SELECT to_date('213111111');
 
+# verify date cast with tinyint input
+query DDDDDD
+SELECT to_date(null::tinyint), to_date(0::tinyint), to_date(19::tinyint), to_date(1::tinyint), to_date(-1::tinyint), to_date((0-1)::tinyint)
+----
+NULL 1970-01-01 1970-01-20 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with smallint input
+query DDDDDD
+SELECT to_date(null::smallint), to_date(0::smallint), to_date(19234::smallint), to_date(1::smallint), to_date(-1::smallint), to_date((0-1)::smallint)
+----
+NULL 1970-01-01 2022-08-30 1970-01-02 1969-12-31 1969-12-31
+
 # verify date cast with integer input
 query DDDDDD
 SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), to_date(0-1)
 ----
 NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31
 
+# verify date cast with bigint input
+query DDDDDD
+SELECT to_date(null::bigint), to_date(0::bigint), to_date(191234::bigint), to_date(1::bigint), to_date(-1::bigint), to_date((0-1)::bigint)
+----
+NULL 1970-01-01 2493-07-31 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with unsigned tinyint input
+query DDDD
+SELECT to_date(null::tinyint unsigned), to_date(0::tinyint unsigned), to_date(192::tinyint unsigned), to_date(1::tinyint unsigned)
+----
+NULL 1970-01-01 1970-07-12 1970-01-02
+
+# verify date cast with unsigned smallint input
+query DDDD
+SELECT to_date(null::smallint unsigned), to_date(0::smallint unsigned), to_date(19260::smallint unsigned), to_date(1::smallint unsigned)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02
+
+# verify date cast with unsigned int input
+query DDDD
+SELECT to_date(null::int unsigned), to_date(0::int unsigned), to_date(19260::int unsigned), to_date(1::int unsigned)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02
+
+# verify date cast with unsigned bigint input
+query DDDD
+SELECT to_date(null::bigint unsigned), to_date(0::bigint unsigned), to_date(19260000::bigint unsigned), to_date(1::bigint unsigned)
+----
+NULL 1970-01-01 +54702-02-03 1970-01-02
+
+# verify date cast with real input (float32)
+query DDDDDD
+SELECT to_date(null::real), to_date(0.0::real), to_date(19260.1::real), to_date(1.1::real), to_date(-1.1::real), to_date(0-1.1::real)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with double input (float64)
+query DDDDDD
+SELECT to_date(null::double), to_date(0.0::double), to_date(19260.1::double), to_date(1.1::double), to_date(-1.1::double), to_date(0-1.1::double)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with decimal32 input (Decimal32)
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal32(8,2)')), to_date(arrow_cast(0.0, 'Decimal32(8,2)')), to_date(arrow_cast(19260.1, 'Decimal32(8,2)')), to_date(arrow_cast(1.1, 'Decimal32(8,2)')), to_date(arrow_cast(-1.1, 'Decimal32(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal32(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal64 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal64(8,2)')), to_date(arrow_cast(0.0, 'Decimal64(8,2)')), to_date(arrow_cast(19260.1, 'Decimal64(8,2)')), to_date(arrow_cast(1.1, 'Decimal64(8,2)')), to_date(arrow_cast(-1.1, 'Decimal64(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal64(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal128 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal128(8,2)')), to_date(arrow_cast(0.0, 'Decimal128(8,2)')), to_date(arrow_cast(19260.1, 'Decimal128(8,2)')), to_date(arrow_cast(1.1, 'Decimal128(8,2)')), to_date(arrow_cast(-1.1, 'Decimal128(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal128(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with decimal input (Decimal128)
+query DDDDDD
+SELECT to_date(null::decimal(10,2)), to_date(0.0::decimal(10,2)), to_date(19260.1::decimal(10,2)), to_date(1.1::decimal(10,2)), to_date(-1.1::decimal(10,2)), to_date(0-1.1::decimal(10,2))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal256 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal256(8,2)')), to_date(arrow_cast(0.0, 'Decimal256(8,2)')), to_date(arrow_cast(19260.1, 'Decimal256(8,2)')), to_date(arrow_cast(1.1, 'Decimal256(8,2)')), to_date(arrow_cast(-1.1, 'Decimal256(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal256(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with date input
+query DDDD
+SELECT to_date('2024-12-1'::date), to_date('1920-01-12'::date), to_date('1970-01-01'::date), to_date('-0200-07-22'::date)
+----
+2024-12-01 1920-01-12 1970-01-01 -0200-07-22
+
+# verify date cast with date64 input
+query DDDD
+SELECT to_date(arrow_cast('2024-12-1', 'Date64')), to_date(arrow_cast('1920-01-12', 'Date64')), to_date(arrow_cast('1970-01-01', 'Date64')), to_date(arrow_cast(-863999913600000, 'Date64'))
+----
+2024-12-01 1920-01-12 1970-01-01 -25410-12-07
+
+# verify date cast with timestamp input
+query DD
+SELECT to_date('2024-12-01T00:32:45'::timestamp), to_date('1677-12-01T00:32:45'::timestamp)
+----
+2024-12-01 1677-12-01
+
 # verify date output types
 query TTT
 SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), arrow_typeof(to_date('2023-01-10 12:34:56.000'))
diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/datetime/interval.slt
similarity index 100%
rename from datafusion/sqllogictest/test_files/interval.slt
rename to datafusion/sqllogictest/test_files/datetime/interval.slt
diff --git a/datafusion/sqllogictest/test_files/interval_mysql.slt b/datafusion/sqllogictest/test_files/datetime/interval_mysql.slt
similarity index 100%
rename from datafusion/sqllogictest/test_files/interval_mysql.slt
rename to datafusion/sqllogictest/test_files/datetime/interval_mysql.slt
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt
similarity index 88%
rename from datafusion/sqllogictest/test_files/timestamps.slt
rename to datafusion/sqllogictest/test_files/datetime/timestamps.slt
index cdacad0fda0d3..4fa5ce389b2c7 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt
@@ -45,6 +45,9 @@ create table ts_data_millis as select arrow_cast(ts / 1000000, 'Timestamp(Millis
 statement ok
 create table ts_data_secs as select arrow_cast(ts / 1000000000, 'Timestamp(Second, None)') as ts, value from ts_data;
 
+statement ok
+create table ts_data_micros_kolkata as select arrow_cast(ts / 1000, 'Timestamp(Microsecond, Some("Asia/Kolkata"))') as ts, value from ts_data;
+
 
 ##########
 ## Current date Tests
@@ -190,6 +193,8 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01'
 ----
 2000-01-01T01:01:01Z
 
+statement ok
+RESET datafusion.execution.time_zone
 
 ##########
 ## cast tests
@@ -788,6 +793,81 @@ FROM (
   ) as t (time, val)
 group by time;
 
+query D
+SELECT DATE_BIN(INTERVAL '15 minutes', TIME '14:38:50', TIME '00:00:00')
+----
+14:30:00
+
+# Supports Month-Day-Nano nanosecond interval
+query D
+SELECT DATE_BIN(INTERVAL '10 nanoseconds', TIME '14:38:50.000000016', TIME '00:00:00')
+----
+14:38:50.000000010
+
+# Supports Month-Day-Nano nanosecond interval via fractions
+query D
+SELECT DATE_BIN(INTERVAL '0.000000010 seconds', TIME '14:38:50.000000016', TIME '00:00:00')
+----
+14:38:50.000000010
+
+# Supports Month-Day-Nano microsecond interval
+query D
+SELECT DATE_BIN(INTERVAL '5 microseconds', TIME '14:38:50.000006', TIME '00:00:00')
+----
+14:38:50.000005
+
+# stride by 7 days
+query error DataFusion error: Execution error: DATE_BIN stride for TIME input must be less than 1 day
+SELECT DATE_BIN(INTERVAL '7 days', TIME '14:38:50', TIME '00:00:00')
+
+# stride by 25 hours
+query error DataFusion error: Execution error: DATE_BIN stride for TIME input must be less than 1 day
+SELECT DATE_BIN(INTERVAL '25 hours', TIME '14:38:50', TIME '00:00:00')
+
+# stride by 23 hours, 59 minutes 59 seconds
+query D
+SELECT DATE_BIN(INTERVAL '23 hours 59 minutes 59 seconds', TIME '14:38:50', TIME '00:00:00')
+----
+00:00:00
+
+# mixed types
+query error Failed to coerce arguments to satisfy a call to 'date_bin' function:
+SELECT DATE_BIN(INTERVAL '23 hours', TIME '14:38:50', TIMESTAMP '2022-08-03 14:38:50.000000006Z')
+
+# mixed types
+query error Failed to coerce arguments to satisfy a call to 'date_bin' function:
+SELECT DATE_BIN(INTERVAL '23 hours', TIMESTAMP '2022-08-03 14:38:50.000000006Z', TIME '14:38:50')
+
+# Can coerce all string arguments
+query D
+SELECT DATE_BIN('15 minutes', '14:38:50'::time, '00:00:00'::time)
+----
+14:30:00
+
+# Call in two arguments (should be the same as the above query)
+query B
+SELECT DATE_BIN('15 minutes', '14:38:50'::time) = DATE_BIN('15 minutes', '14:38:50'::time, '00:00:00'::time)
+----
+true
+
+# Shift forward by 5 minutes
+query D
+SELECT DATE_BIN(INTERVAL '15 minutes', TIME '14:38:50', TIME '00:05:00')
+----
+14:35:00
+
+# Shift backward by 5 minutes
+query D
+SELECT DATE_BIN(INTERVAL '15 minutes', TIME '14:38:50', TIME '23:55:00')
+----
+14:25:00
+
+# origin after source, TIME in previous bucket
+query D
+SELECT DATE_BIN(INTERVAL '15 minutes', TIME '14:38:50', TIME '14:40:00')
+----
+14:25:00
+
 query P
 SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:00:00Z')
 ----
@@ -1873,27 +1953,6 @@ true false true true
 
 
 
-##########
-## Common timestamp data
-##########
-
-statement ok
-drop table ts_data
-
-statement ok
-drop table ts_data_nanos
-
-statement ok
-drop table ts_data_micros
-
-statement ok
-drop table ts_data_millis
-
-statement ok
-drop table ts_data_secs
-
-
-
 ##########
 ## Timezone impact on scalar functions
 #
@@ -2268,13 +2327,13 @@ SET TIME ZONE = '+05:00'
 
 statement ok
 CREATE TABLE foo (time TIMESTAMPTZ) AS VALUES
-    ('2020-01-01T00:00:00+05:00'), 
+    ('2020-01-01T00:00:00+05:00'),
     ('2020-01-01T01:00:00+05:00'),
     ('2020-01-01T02:00:00+05:00'),
     ('2020-01-01T03:00:00+05:00')
 
 statement ok
-SET TIME ZONE = '+00'
+RESET datafusion.execution.time_zone
 
 # verify column type
 query T
@@ -2353,17 +2412,17 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5
 # verify timestamp syntax styles are consistent
 query BBBBBBBBBBBBB
 SELECT to_timestamp(null) is null as c1,
-       null::timestamp is null as c2, 
-       cast(null as timestamp) is null as c3, 
-       to_timestamp(0) = 0::timestamp as c4, 
-       to_timestamp(1926632005) = 1926632005::timestamp as c5, 
-       to_timestamp(1) = 1::timestamp as c6, 
-       to_timestamp(-1) = -1::timestamp as c7, 
+       null::timestamp is null as c2,
+       cast(null as timestamp) is null as c3,
+       to_timestamp(0) = 0::timestamp as c4,
+       to_timestamp(1926632005) = 1926632005::timestamp as c5,
+       to_timestamp(1) = 1::timestamp as c6,
+       to_timestamp(-1) = -1::timestamp as c7,
        to_timestamp(0-1) = (0-1)::timestamp as c8,
-       to_timestamp(0) = cast(0 as timestamp) as c9, 
-       to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10, 
-       to_timestamp(1) = cast(1 as timestamp) as c11, 
-       to_timestamp(-1) = cast(-1 as timestamp) as c12, 
+       to_timestamp(0) = cast(0 as timestamp) as c9,
+       to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10,
+       to_timestamp(1) = cast(1 as timestamp) as c11,
+       to_timestamp(-1) = cast(-1 as timestamp) as c12,
        to_timestamp(0-1) = cast(0-1 as timestamp) as c13
 ----
 true true true true true true true true true true true true true
@@ -2376,10 +2435,10 @@ Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # verify timestamp output types using timestamp literal syntax
 query BBBBBB
-SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1, 
+SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1,
        arrow_typeof(to_timestamp(null)) = arrow_typeof(null::timestamp) as c2,
        arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof('2023-01-10 12:34:56.000'::timestamp) as c3,
-       arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4, 
+       arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4,
        arrow_typeof(to_timestamp(null)) = arrow_typeof(cast(null as timestamp)) as c5,
        arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof(cast('2023-01-10 12:34:56.000' as timestamp)) as c6
 ----
@@ -2388,7 +2447,7 @@ true true true true true true
 # known issues. currently overflows (expects default precision to be microsecond instead of nanoseconds. Work pending)
 #verify extreme values
 #query PPPPPPPP
-#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp)
+#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177), -62125747200::timestamp as t1, 1926632005177::timestamp, cast(-62125747200 as timestamp), cast(1926632005177 as timestamp) as t2
 #----
 #0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37
 
@@ -2623,13 +2682,13 @@ drop table table_a
 ##########
 
 statement ok
-create table table_a (ts timestamp) as values 
-    ('2020-09-08T11:42:29Z'::timestamp), 
+create table table_a (ts timestamp) as values
+    ('2020-09-08T11:42:29Z'::timestamp),
     ('2020-09-08T12:42:29Z'::timestamp),
     ('2020-09-08T13:42:29Z'::timestamp)
 
 statement ok
-create table table_b (ts timestamp) as values 
+create table table_b (ts timestamp) as values
     ('2020-09-08T11:42:29.190Z'::timestamp),
     ('2020-09-08T13:42:29.190Z'::timestamp),
     ('2020-09-08T12:42:29.190Z'::timestamp)
@@ -2714,8 +2773,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1;
 query PT
 SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1;
 ----
-2018-07-01T06:00:00Z Timestamp(ns, "+00")
-2018-07-01T07:00:00Z Timestamp(ns, "+00")
+2018-07-01T06:00:00 Timestamp(ns)
+2018-07-01T07:00:00 Timestamp(ns)
 
 query D
 SELECT 0::TIME
@@ -2751,8 +2810,8 @@ statement ok
 drop table t1
 
 statement ok
-create table table_a (val int, ts1 timestamp, ts2 timestamp) as values 
-    (1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp), 
+create table table_a (val int, ts1 timestamp, ts2 timestamp) as values
+    (1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp),
     (2, '2018-07-01T07:00:00'::timestamp, '2018-07-01T08:00:00'::timestamp)
 
 query I?
@@ -2865,8 +2924,12 @@ select make_date(t.year, t.month, '4') from table_nums t;
 statement ok
 insert into table_nums values (2024, null, 23);
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 0, 23
+query D
 select make_date(t.year, t.month, t.day) from table_nums t;
+----
+2024-01-23
+2023-11-30
+NULL
 
 statement ok
 drop table table_nums;
@@ -2885,48 +2948,344 @@ select make_date(t.year, t.month, t.day) from table_strings t;
 statement ok
 insert into table_strings values (2024, null, 23);
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 0, 23
+query D
 select make_date(t.year, t.month, t.day) from table_strings t;
+----
+2024-01-23
+2023-11-30
+NULL
 
 statement ok
 drop table table_strings;
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 13, 23
+query error DataFusion error: Execution error: Month value '13' is out of range
 select make_date(2024, 13, 23);
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 1, 32
-select make_date(2024, 01, 32);
+query error DataFusion error: Execution error: Day value '32' is out of range
+select make_date(2024, 1, 32);
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 0, 23
+query error DataFusion error: Execution error: Month value '0' is out of range
 select make_date(2024, 0, 23);
 
 query error DataFusion error: Execution error: Month value '\-1' is out of range
 select make_date(2024, -1, 23);
 
-query error DataFusion error: Execution error: Unable to parse date from 2024, 12, 0
+query error DataFusion error: Execution error: Day value '0' is out of range
 select make_date(2024, 12, 0);
 
-query error DataFusion error: Execution error: Day value '\-1' is out of range
+query error DataFusion error: Execution error: Month value '13' is out of range
 select make_date(2024, 13, -1);
 
-query error DataFusion error: Execution error: Unable to parse date from null/empty value
+query D
 select make_date(null, 1, 23);
+----
+NULL
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
+query error Cast error: Cannot cast string '' to value of Int32 type
 select make_date('', 1, 23);
 
-query error DataFusion error: Execution error: Unable to parse date from null/empty value
+query D
 select make_date(2024, null, 23);
+----
+NULL
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
+query error Arrow error: Cast error: Cannot cast string '' to value of Int32 type
 select make_date(2024, '', 27);
 
-query error DataFusion error: Execution error: Unable to parse date from null/empty value
+query D
 select make_date(2024, 1, null);
+----
+NULL
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
+query error Arrow error: Cast error: Cannot cast string '' to value of Int32 type
 select make_date(2024, 1, '');
 
+query error DataFusion error: Execution error: Unable to parse date from 2024, 11, 31
+select make_date(2024, 11, 31);
+
+query D
+select make_date(null, 1::bigint, 14::bigint unsigned);
+----
+NULL
+
+query error DataFusion error: Error during planning: Function 'make_date' expects 3 arguments but received 1
+select make_date(1);
+
+query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(Int32\), Int32\)\) but received NativeType::Interval\(MonthDayNano\), DataType: Interval\(MonthDayNano\)
+select make_date(interval '1 day', '2001-05-21'::timestamp, '2001-05-21'::timestamp);
+
+##########
+## make time tests
+##########
+
+query D
+select make_time(22, 1, 27);
+----
+22:01:27
+
+query D
+select make_time(22, 0, 0);
+----
+22:00:00
+
+query D
+select make_time(0, 0, 0);
+----
+00:00:00
+
+query D
+select make_time(22, 2, 29);
+----
+22:02:29
+
+query D
+select make_time(null, null, null);
+----
+NULL
+
+query D
+select make_time(null, 1, 23);
+----
+NULL
+
+query D
+select make_time(22, null, 23);
+----
+NULL
+
+query D
+select make_time(22, 1, null);
+----
+NULL
+
+query D
+select make_time('22', '01', '27');
+----
+22:01:27
+
+query D
+select make_time(12 + 11, '01', '27');
+----
+23:01:27
+
+query D
+select make_time(22::tinyint, 01::tinyint, 27::tinyint);
+----
+22:01:27
+
+query D
+select make_time(22::smallint, 01::smallint, 27::smallint);
+----
+22:01:27
+
+query D
+select make_time(22::int, 01::int, 27::int);
+----
+22:01:27
+
+query D
+select make_time(22::bigint, 01::bigint, 27::bigint);
+----
+22:01:27
+
+query D
+select make_time(22::tinyint unsigned, 01::tinyint unsigned, 27::tinyint unsigned);
+----
+22:01:27
+
+query D
+select make_time(22::smallint unsigned, 01::smallint unsigned, 27::smallint unsigned);
+----
+22:01:27
+
+query D
+select make_time(22::int unsigned, 01::int unsigned, 27::int unsigned);
+----
+22:01:27
+
+query D
+select make_time(22::bigint unsigned, 01::bigint unsigned, 27::bigint unsigned);
+----
+22:01:27
+
+query D
+select make_time(arrow_cast(22, 'Int32'), arrow_cast(1, 'Int32'), arrow_cast(27, 'Int32'));
+----
+22:01:27
+
+query D
+select make_time(arrow_cast(22, 'Int64'), arrow_cast(1, 'Int64'), arrow_cast(27, 'Int64'));
+----
+22:01:27
+
+query D
+select make_time(arrow_cast('22', 'Utf8'), arrow_cast('1', 'Utf8'), arrow_cast('27', 'Utf8'));
+----
+22:01:27
+
+query D
+select make_time(arrow_cast('22', 'Utf8View'), arrow_cast('1', 'Utf8View'), arrow_cast('27', 'Utf8View'));
+----
+22:01:27
+
+query D
+select make_time(arrow_cast('22', 'LargeUtf8'), arrow_cast('1', 'LargeUtf8'), arrow_cast('27', 'LargeUtf8'));
+----
+22:01:27
+
+query D
+select make_time(22, arrow_cast('1', 'Int64'), arrow_cast('27', 'UInt32'));
+----
+22:01:27
+
+query D
+select make_time(22, arrow_cast('1', 'UInt64'), arrow_cast('27', 'UInt32'));
+----
+22:01:27
+
+query D
+select make_time(arrow_cast('22', 'Utf8'), arrow_cast('1', 'LargeUtf8'), arrow_cast('27', 'Utf8'));
+----
+22:01:27
+
+query D
+select make_time(22, arrow_cast('1', 'LargeUtf8'), arrow_cast('27', 'Utf8'));
+----
+22:01:27
+
+query error Can't cast value 18446744073709551615 to type Int32
+select make_time(22, 18446744073709551615, 27);
+
+query T
+select arrow_typeof(make_time(22, 1, 27));
+----
+Time32(s)
+
+statement ok
+create table table_nums (hour int, minute int, second int) as values
+    (22, 1, 23),
+    (20, 11, 30);
+
+query D
+select make_time(t.hour, t.minute, t.second) from table_nums t;
+----
+22:01:23
+20:11:30
+
+query D
+select make_time(21, t.minute, t.second) from table_nums t;
+----
+21:01:23
+21:11:30
+
+query D
+select make_time(t.hour, 3, t.second) from table_nums t;
+----
+22:03:23
+20:03:30
+
+query D
+select make_time(t.hour, t.minute, 4) from table_nums t;
+----
+22:01:04
+20:11:04
+
+query D
+select make_time('21', t.minute, t.second) from table_nums t;
+----
+21:01:23
+21:11:30
+
+query D
+select make_time(t.hour, '3', t.second) from table_nums t;
+----
+22:03:23
+20:03:30
+
+query D
+select make_time(t.hour, t.minute, '4') from table_nums t;
+----
+22:01:04
+20:11:04
+
+statement ok
+insert into table_nums values (25, null, 77);
+
+query D
+select make_time(t.hour, t.minute, t.second) from table_nums t;
+----
+22:01:23
+20:11:30
+NULL
+
+statement ok
+drop table table_nums;
+
+statement ok
+create table table_strings (hour varchar(4), minute varchar(2), second varchar(2)) as values
+    ('22', '1', '23'),
+    ('23', '11', '30');
+
+query D
+select make_time(t.hour, t.minute, t.second) from table_strings t;
+----
+22:01:23
+23:11:30
+
+statement ok
+insert into table_strings values ('33', null, '23');
+
+query D
+select make_time(t.hour, t.minute, t.second) from table_strings t;
+----
+22:01:23
+23:11:30
+NULL
+
+statement ok
+insert into table_strings values ('33', '12', '23');
+
+query error DataFusion error: Execution error: Hour value '33' is out of range
+select make_time(t.hour, t.minute, t.second) from table_strings t;
+
+statement ok
+drop table table_strings;
+
+query error Function 'make_time' expects 3 arguments but received 1
+select make_time(22);
+
+query error Function 'make_time' expects 3 arguments but received 2
+select make_time(22, 22);
+
+query error DataFusion error: Execution error: Hour value '26' is out of range
+select make_time(26, 13, 23);
+
+query error DataFusion error: Execution error: Second value '62' is out of range
+select make_time(22, 01, 62);
+
+query error DataFusion error: Execution error: Minute value '64' is out of range
+select make_time(22, 64, 23);
+
+query error DataFusion error: Execution error: Hour value '-1' is out of range
+select make_time(-1, 12, 0);
+
+query error DataFusion error: Execution error: Minute value '-1' is out of range
+select make_time(22, -1, 23);
+
+query error DataFusion error: Execution error: Second value '-1' is out of range
+select make_time(22, 13, -1);
+
+query error Cannot cast string '' to value of Int32 type
+select make_time('', 1, 23);
+
+query error Cannot cast string '' to value of Int32 type
+select make_time(22, '', 27);
+
+query error Cannot cast string '' to value of Int32 type
+select make_time(22, 1, '');
+
+query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(Int32\), Int32\)\) but received NativeType::Float64, DataType: Float64
+select make_time(arrow_cast(22, 'Float64'), 1, '');
 
 ##########
 ## to_char tests
@@ -3056,7 +3415,7 @@ NULL
 
 query T
 SELECT to_char(date_column, '%Y-%m-%d')
-FROM (VALUES 
+FROM (VALUES
     (DATE '2020-09-01'),
     (NULL)
 ) AS t(date_column);
@@ -3066,7 +3425,7 @@ NULL
 
 query T
 SELECT to_char(date_column, '%Y-%m-%d')
-FROM (VALUES 
+FROM (VALUES
     (NULL),
     (DATE '2020-09-01')
 ) AS t(date_column);
@@ -3169,6 +3528,64 @@ select to_unixtime(arrow_cast(1599523200.414, 'Float64'));
 ----
 1599523200
 
+query I
+select to_unixtime(arrow_cast(-1, 'Int8'));
+----
+-1
+
+query I
+select to_unixtime(arrow_cast(null, 'Int8'));
+----
+NULL
+
+query I
+select to_unixtime(arrow_cast(1000, 'Int16'));
+----
+1000
+
+query I
+select to_unixtime(arrow_cast(255, 'UInt8'));
+----
+255
+
+query I
+select to_unixtime(arrow_cast(65535, 'UInt16'));
+----
+65535
+
+query I
+select to_unixtime(arrow_cast(1599523200, 'UInt32'));
+----
+1599523200
+
+query I
+select to_unixtime(arrow_cast(1599523200, 'UInt64'));
+----
+1599523200
+
+query error DataFusion error: Arrow error: Cast error: Can't cast value 18446744073709551615 to type Int64
+select to_unixtime(arrow_cast(18446744073709551615, 'UInt64'));
+
+query I
+select to_unixtime(arrow_cast(1000.12, 'Float16'));
+----
+1000
+
+query I
+select to_unixtime(arrow_cast(1000.414, 'Float32'));
+----
+1000
+
+query I
+select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'Utf8View'));
+----
+1599566400
+
+query I
+select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'LargeUtf8'));
+----
+1599566400
+
 ##########
 ## Tests for the "AT TIME ZONE" clause
 ##########
@@ -3267,7 +3684,7 @@ statement error
 select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string');
 
 # invalid argument data type
-statement error The to_local_time function can only accept Timestamp as the arg got Utf8
+statement error DataFusion error: Error during planning: Internal error: Expect TypeSignatureClass::Timestamp but received NativeType::String, DataType: Utf8
 select to_local_time('2024-04-01T00:00:20Z');
 
 # invalid timezone
@@ -3295,6 +3712,13 @@ select to_local_time(NULL);
 ----
 NULL
 
+query PT
+select
+  to_local_time(arrow_cast(null, 'Timestamp(s, "Asia/Tokyo")')),
+  arrow_typeof(to_local_time(arrow_cast(null, 'Timestamp(s, "Asia/Tokyo")')));
+----
+NULL Timestamp(s)
+
 query PTPT
 select
   time,
@@ -3702,4 +4126,105 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a)
 ----
-Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
+Timestamp(ns) 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00 2005-09-10T11:31:00
+
+query P
+SELECT
+    date_trunc('millisecond', ts)
+FROM ts_data_micros_kolkata
+----
+2020-09-08T19:12:29.190+05:30
+2020-09-08T18:12:29.190+05:30
+2020-09-08T17:12:29.190+05:30
+
+
+##########
+## Casting between timestamp with and without timezone
+##########
+
+# Test casting from Timestamp(Nanosecond, Some("UTC")) to Timestamp(Nanosecond, None)
+# Verifies that the underlying nanosecond values are preserved when removing timezone
+
+# Verify input type
+query T
+SELECT arrow_typeof(arrow_cast(1, 'Timestamp(Nanosecond, Some("UTC"))'));
+----
+Timestamp(ns, "UTC")
+
+# Verify output type after casting
+query T
+SELECT arrow_typeof(arrow_cast(arrow_cast(1, 'Timestamp(Nanosecond, Some("UTC"))'), 'Timestamp(Nanosecond, None)'));
+----
+Timestamp(ns)
+
+# Verify values are preserved when casting from timestamp with timezone to timestamp without timezone
+query P rowsort
+SELECT arrow_cast(column1, 'Timestamp(Nanosecond, None)')
+FROM (VALUES
+  (arrow_cast(1, 'Timestamp(Nanosecond, Some("UTC"))')),
+  (arrow_cast(2, 'Timestamp(Nanosecond, Some("UTC"))')),
+  (arrow_cast(3, 'Timestamp(Nanosecond, Some("UTC"))')),
+  (arrow_cast(4, 'Timestamp(Nanosecond, Some("UTC"))')),
+  (arrow_cast(5, 'Timestamp(Nanosecond, Some("UTC"))'))
+) t;
+----
+1970-01-01T00:00:00.000000001
+1970-01-01T00:00:00.000000002
+1970-01-01T00:00:00.000000003
+1970-01-01T00:00:00.000000004
+1970-01-01T00:00:00.000000005
+
+# Test casting from Timestamp(Nanosecond, None) to Timestamp(Nanosecond, Some("UTC"))
+# Verifies that the underlying nanosecond values are preserved when adding timezone
+
+# Verify input type
+query T
+SELECT arrow_typeof(arrow_cast(1, 'Timestamp(Nanosecond, None)'));
+----
+Timestamp(ns)
+
+# Verify output type after casting
+query T
+SELECT arrow_typeof(arrow_cast(arrow_cast(1, 'Timestamp(Nanosecond, None)'), 'Timestamp(Nanosecond, Some("UTC"))'));
+----
+Timestamp(ns, "UTC")
+
+# Verify values are preserved when casting from timestamp without timezone to timestamp with timezone
+query P rowsort
+SELECT arrow_cast(column1, 'Timestamp(Nanosecond, Some("UTC"))')
+FROM (VALUES
+  (arrow_cast(1, 'Timestamp(Nanosecond, None)')),
+  (arrow_cast(2, 'Timestamp(Nanosecond, None)')),
+  (arrow_cast(3, 'Timestamp(Nanosecond, None)')),
+  (arrow_cast(4, 'Timestamp(Nanosecond, None)')),
+  (arrow_cast(5, 'Timestamp(Nanosecond, None)'))
+) t;
+----
+1970-01-01T00:00:00.000000001Z
+1970-01-01T00:00:00.000000002Z
+1970-01-01T00:00:00.000000003Z
+1970-01-01T00:00:00.000000004Z
+1970-01-01T00:00:00.000000005Z
+
+
+##########
+## Common timestamp data
+##########
+
+statement ok
+drop table ts_data
+
+statement ok
+drop table ts_data_nanos
+
+statement ok
+drop table ts_data_micros
+
+statement ok
+drop table ts_data_millis
+
+statement ok
+drop table ts_data_secs
+
+statement ok
+drop table ts_data_micros_kolkata
diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt
index f350d9b3bfe1f..9dd31427dcb4a 100644
--- a/datafusion/sqllogictest/test_files/decimal.slt
+++ b/datafusion/sqllogictest/test_files/decimal.slt
@@ -614,22 +614,11 @@ select a / b from foo;
 ----
 0.2
 
-statement ok
-create table t as values (arrow_cast(123, 'Decimal256(5,2)'));
-
-# make sure query below runs in single partition
-# otherwise error message may not be deterministic
-statement ok
-set datafusion.execution.target_partitions = 1;
-
 query R
-select AVG(column1) from t;
+select AVG(column1) from values (arrow_cast(123, 'Decimal256(5,2)'));
 ----
 123
 
-statement ok
-drop table t;
-
 statement ok
 CREATE EXTERNAL TABLE decimal256_simple (
 c1 DECIMAL(50,6) NOT NULL,
@@ -788,6 +777,13 @@ Float64 133333333333333330000000000000000000000000000
 statement ok
 set datafusion.sql_parser.parse_float_as_decimal = true;
 
+# round should keep decimals when parse_float_as_decimal is enabled
+query TR
+select arrow_typeof(round(173975140545.855, 2)),
+       round(173975140545.855, 2);
+----
+Decimal128(15, 3) 173975140545.86
+
 # smoke test for decimal parsing
 query RT
 select 100000000000000000000000000000000000::decimal(38,0), arrow_typeof(100000000000000000000000000000000000::decimal(38,0));
@@ -805,6 +801,11 @@ select log(arrow_cast(100, 'Decimal32(9, 2)'));
 ----
 2
 
+query R
+select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)'));
+----
+13
+
 # log for small decimal64
 query R
 select log(arrow_cast(100, 'Decimal64(18, 0)'));
@@ -816,6 +817,12 @@ select log(arrow_cast(100, 'Decimal64(18, 2)'));
 ----
 2
 
+query R
+select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)'));
+----
+13
+
+
 # log for small decimal128
 query R
 select log(arrow_cast(100, 'Decimal128(38, 0)'));
@@ -918,6 +925,245 @@ select log(2.0, null);
 ----
 NULL
 
+# log with negative scale decimals
+# Using scientific notation to create decimals with negative scales
+# 1e4 = 10000 with scale -4, log10(10000) = 4.0
+query R
+select log(1e4);
+----
+4
+
+# log with negative scale and explicit base 10
+query R
+select log(10, 1e4);
+----
+4
+
+# log with negative scale and base 2
+# 8e1 = 80 with scale -1, log2(80) ≈ 6.321928
+query R
+select log(2.0, 8e1);
+----
+6.321928094887
+
+# log with negative scale and base 2 (another value)
+# 16e1 = 160 with scale -1, log2(160) ≈ 7.321928
+query R
+select log(2.0, 16e1);
+----
+7.321928094887
+
+# log with negative scale -3
+# 5e3 = 5000 with scale -3, log10(5000) ≈ 3.69897
+query R
+select log(5e3);
+----
+3.698970004336
+
+# log with negative scale array values
+query R rowsort
+select log(value) from (values (1e3), (1e4), (1e5)) as t(value);
+----
+3
+4
+5
+
+# log with negative scale and different bases
+query R rowsort
+select log(base, 1e4) from (values (10.0), (2.0), (3.0)) as t(base);
+----
+13.287712379549
+4
+8.383613097158
+
+# log(decimal32) with negative scale
+# 1e4 = 10000 with scale -4
+query R
+select log(CAST(1e4 AS DECIMAL(9, -4)));
+----
+4
+
+# log(decimal32) with negative scale and base 2
+# 8e1 = 80, log2(80) ≈ 6.321928
+query R
+select log(2.0, CAST(8e1 AS DECIMAL(9, -1)));
+----
+6.321928094887
+
+
+# log(decimal64) with negative scale
+# 5e3 = 5000, log10(5000) ≈ 3.69897
+query R
+select log(CAST(5e3 AS DECIMAL(18, -3)));
+----
+3.698970004336
+
+# log(decimal64) with negative scale and different bases
+query R rowsort
+select log(base, CAST(1e4 AS DECIMAL(18, -4)))
+from (values (10.0), (2.0), (3.0)) as t(base);
+----
+13.287712379549
+4
+8.383613097158
+
+# log(decimal128) with negative scale and base 2
+# 8e1 = 80, log2(80) ≈ 6.321928
+query R
+select log(2.0, CAST(8e1 AS DECIMAL(38, -1)));
+----
+6.321928094887
+
+
+# log(decimal128) with negative scale and different bases
+query R rowsort
+select log(base, CAST(1e4 AS DECIMAL(38, -4)))
+from (values (10.0), (2.0), (3.0)) as t(base);
+----
+13.287712379549
+4
+8.383613097158
+
+# Test log of a decimal value between 0 and 1 (e.g., 0.5)
+query R
+SELECT log(10, arrow_cast(0.5, 'Decimal32(5, 1)'))
+----
+NaN
+
+query R
+SELECT log(10, arrow_cast(1 , 'Decimal32(5, 1)'))
+----
+0
+
+# power with decimals
+
+query RT
+SELECT power(2::decimal(38, 0), 4), arrow_typeof(power(2::decimal(38, 0), 4));
+----
+16 Decimal128(38, 0)
+
+query RT
+SELECT power(10000000000::decimal(38, 0), 2), arrow_typeof(power(10000000000::decimal(38, 0), 2));
+----
+100000000000000000000 Decimal128(38, 0)
+
+query R
+SELECT power(2.5, 4)
+----
+39
+
+query R
+SELECT power(2.5, 1)
+----
+2.5
+
+query R
+SELECT power(2.5, 0)
+----
+1
+
+query R
+SELECT power(1e4, 2)
+----
+100000000
+
+# int64 base with decimal exponent (coerced to float computation)
+query R
+SELECT power(10, -2.0)
+----
+0.01
+
+query R
+SELECT power(2, -0.5)
+----
+0.707106781187
+
+# query error Unsupported data type Decimal128\(2, 1\) for power function
+# SELECT power(2.5, 4.0)
+
+# power() with very large exponent returns infinity (Float64 behavior)
+query R
+SELECT power(2, 100000000000)
+----
+Infinity
+
+query error Arrow error: Arithmetic overflow: Unsupported exp value
+SELECT power(2::decimal(38, 0), -5)
+
+# Expected to have `16 Decimal128(38, 0)`
+# Due to type coericion, it becomes Float -> Float -> Float
+query RT
+SELECT power(2::decimal(38, 0), 4), arrow_typeof(power(2::decimal(38, 0), 4));
+----
+16 Decimal128(38, 0)
+
+# Arbitrary scale
+query RT
+SELECT power(2.5::decimal(38, 3), 4), arrow_typeof(power(2.5::decimal(38, 3), 4));
+----
+39.062 Decimal128(38, 3)
+
+query RT
+SELECT power(2.5, 4.0), arrow_typeof(power(2.5, 4.0));
+----
+39 Decimal128(2, 1)
+
+query error Compute error: Cannot use non-integer exp
+SELECT power(2.5, 4.2), arrow_typeof(power(2.5, 4.2));
+
+query error Compute error: Cannot use non-integer exp: NaN
+SELECT power(2::decimal(38, 0), arrow_cast('NaN','Float64'))
+
+query error Compute error: Cannot use non-integer exp: inf
+SELECT power(2::decimal(38, 0), arrow_cast('INF','Float64'))
+
+# Floating above u32::max
+query error Compute error: Cannot use non-integer exp
+SELECT power(2::decimal(38, 0), 5000000000.1)
+
+# Integer Above u32::max
+query error Arrow error: Arithmetic overflow: Unsupported exp value
+SELECT power(2::decimal(38, 0), 5000000000)
+
+query ?T
+SELECT power(arrow_cast(2, 'Decimal32(5, 0)'), 4), arrow_typeof(power(arrow_cast(2, 'Decimal32(5, 0)'), 4));
+----
+16 Decimal32(5, 0)
+
+query ?T
+SELECT power(arrow_cast(2, 'Decimal64(5, 0)'), 4), arrow_typeof(power(arrow_cast(2, 'Decimal64(5, 0)'), 4));
+----
+16 Decimal64(5, 0)
+
+query RT
+SELECT power(2::decimal(76, 0), 4), arrow_typeof(power(2::decimal(76, 0), 4));
+----
+16 Decimal256(76, 0)
+
+query R
+SELECT power(2.0, null)
+----
+NULL
+
+# Array variants of power function
+query RR rowsort
+SELECT distinct c1*100000, power(c1*100000, 2) from decimal_simple;
+----
+1 1
+2 4
+3 9
+4 16
+5 25
+
+query RR rowsort
+SELECT distinct c1*100000, power(c1*100000, 2.0) from decimal_simple;
+----
+1 1
+2 4
+3 9
+4 16
+5 25
+
 # Set parse_float_as_decimal to false to test float parsing
 statement ok
 set datafusion.sql_parser.parse_float_as_decimal = false;
@@ -941,3 +1187,33 @@ query R
 select log(100000000000000000000000000000000000::decimal(38,0))
 ----
 34
+
+# Result is decimal since argument is decimal regardless decimals-as-floats parsing
+query R
+SELECT power(10000000000::decimal(38, 0), 2);
+----
+100000000000000000000
+
+query RT
+SELECT power(10000000000::decimal(38, 0), 2),
+       arrow_typeof(power(10000000000::decimal(38, 0), 2));
+----
+100000000000000000000 Decimal128(38, 0)
+
+query R
+SELECT power(2.5, 4.0)
+----
+39.0625
+
+query R
+SELECT power(2.5, 4)
+----
+39.0625
+
+query R
+SELECT power(2, null)
+----
+NULL
+
+query error Arrow error: Invalid argument error: 1.10 is too large to store in a Decimal128 of precision 2. Max is 0.99
+select cast(1.1 as decimal(2, 2)) + 1;
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index fd9a7fb9ce447..b6098758a9e67 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -410,9 +410,8 @@ logical_plan
 01)Filter: test.column2 = Dictionary(Int32, Utf8("1"))
 02)--TableScan: test projection=[column1, column2]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column2@1 = 1
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: column2@1 = 1
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # try literal = col to verify order doesn't matter
 # filter should not cast column2
@@ -423,9 +422,8 @@ logical_plan
 01)Filter: test.column2 = Dictionary(Int32, Utf8("1"))
 02)--TableScan: test projection=[column1, column2]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column2@1 = 1
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: column2@1 = 1
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 # Now query using an integer which must be coerced into a dictionary string
@@ -441,9 +439,8 @@ logical_plan
 01)Filter: test.column2 = Dictionary(Int32, Utf8("1"))
 02)--TableScan: test projection=[column1, column2]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column2@1 = 1
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: column2@1 = 1
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Window Functions
 query I
diff --git a/datafusion/sqllogictest/test_files/distinct_on.slt b/datafusion/sqllogictest/test_files/distinct_on.slt
index b4a491619e893..23a269b525f38 100644
--- a/datafusion/sqllogictest/test_files/distinct_on.slt
+++ b/datafusion/sqllogictest/test_files/distinct_on.slt
@@ -98,11 +98,10 @@ physical_plan
 02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
 
 # ON expressions are not a sub-set of the ORDER BY expressions
 query error SELECT DISTINCT ON expressions must match initial ORDER BY expressions
diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
index e5cd6d88b08f4..3e403171e0718 100644
--- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
+++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
@@ -18,7 +18,8 @@
 # Tests for dynamic filter pushdown configuration options
 # - enable_topk_dynamic_filter_pushdown (for TopK dynamic filters)
 # - enable_join_dynamic_filter_pushdown (for Join dynamic filters)
-# - enable_dynamic_filter_pushdown (controls both)
+# - enable_aggregate_dynamic_filter_pushdown (for Aggregate dynamic filters)
+# - enable_dynamic_filter_pushdown (controls all three)
 
 # Setup: Create parquet test files
 statement ok
@@ -124,10 +125,9 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
 
 # Disable Join dynamic filter pushdown
 statement ok
@@ -148,10 +148,9 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
 
 # Re-enable for next tests
 statement ok
@@ -181,10 +180,9 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
 
 # Enable TopK, disable Join
 statement ok
@@ -208,12 +206,84 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
 
-# Test 4: Backward compatibility
+# Test 4: Aggregate dynamic filter pushdown
+
+# Prepare aggregate-specific parquet data without statistics so aggregate statistics optimizer
+# doesn't pre-compute results.
+statement ok
+CREATE TABLE agg_source(category VARCHAR, score INT) AS VALUES
+('alpha', 10),
+('alpha', 25),
+('beta', 5),
+('beta', 12),
+('gamma', 42),
+('gamma', 8);
+
+statement ok
+SET datafusion.execution.parquet.statistics_enabled = 'none';
+
+statement ok
+COPY agg_source TO 'test_files/scratch/dynamic_filter_pushdown_config/agg_data.parquet' STORED AS PARQUET;
+
+statement ok
+SET datafusion.execution.parquet.statistics_enabled = 'page';
+
+statement ok
+CREATE EXTERNAL TABLE agg_parquet(category VARCHAR, score INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/agg_data.parquet';
+
+statement ok
+SET datafusion.execution.parquet.pushdown_filters = true;
+
+# Aggregate dynamic filter should be pushed into the scan when enabled
+# Expecting a `DynamicFilter` inside parquet scanner's predicate
+query TT
+EXPLAIN SELECT MAX(score) FROM agg_parquet WHERE category = 'alpha'
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[max(agg_parquet.score)]]
+02)--Projection: agg_parquet.score
+03)----Filter: agg_parquet.category = Utf8View("alpha")
+04)------TableScan: agg_parquet projection=[category, score], partial_filters=[agg_parquet.category = Utf8View("alpha")]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_parquet.score)]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_parquet.score)]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/agg_data.parquet]]}, projection=[score], file_type=parquet, predicate=category@0 = alpha AND DynamicFilter [ empty ], pruning_predicate=category_null_count@2 != row_count@3 AND category_min@0 <= alpha AND alpha <= category_max@1, required_guarantees=[category in (alpha)]
+
+# Disable aggregate dynamic filters only
+statement ok
+SET datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = false;
+
+# Expecting no `DynamicFilter` inside parquet scanner's predicate
+query TT
+EXPLAIN SELECT MAX(score) FROM agg_parquet WHERE category = 'alpha'
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[max(agg_parquet.score)]]
+02)--Projection: agg_parquet.score
+03)----Filter: agg_parquet.category = Utf8View("alpha")
+04)------TableScan: agg_parquet projection=[category, score], partial_filters=[agg_parquet.category = Utf8View("alpha")]
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_parquet.score)]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_parquet.score)]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/agg_data.parquet]]}, projection=[score], file_type=parquet, predicate=category@0 = alpha, pruning_predicate=category_null_count@2 != row_count@3 AND category_min@0 <= alpha AND alpha <= category_max@1, required_guarantees=[category in (alpha)]
+
+statement ok
+SET datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.execution.parquet.pushdown_filters = false;
+
+# Test 5: Backward compatibility
 
 # First, set both new configs to specific values
 statement ok
@@ -229,7 +299,7 @@ set datafusion.catalog.information_schema = true
 statement ok
 SET datafusion.optimizer.enable_dynamic_filter_pushdown = false;
 
-# Verify both configs are now false
+# Verify all configs are now false
 query T
 SELECT value FROM information_schema.df_settings
 WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
@@ -242,6 +312,12 @@ WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
 ----
 false
 
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown';
+----
+false
+
 statement ok
 set datafusion.catalog.information_schema = false
 
@@ -260,10 +336,9 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
 
 # Re-enable
 statement ok
@@ -272,7 +347,7 @@ SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
 statement ok
 set datafusion.catalog.information_schema = true
 
-# Verify both configs are now true
+# Verify all configs are now true
 query T
 SELECT value FROM information_schema.df_settings
 WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
@@ -285,6 +360,12 @@ WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
 ----
 true
 
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown';
+----
+true
+
 statement ok
 set datafusion.catalog.information_schema = false
 
@@ -303,10 +384,9 @@ logical_plan
 06)------TableScan: right_parquet projection=[id, info]
 physical_plan
 01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
 
 # Cleanup
 
@@ -328,6 +408,12 @@ DROP TABLE left_parquet;
 statement ok
 DROP TABLE right_parquet;
 
+statement ok
+DROP TABLE agg_source;
+
+statement ok
+DROP TABLE agg_parquet;
+
 # Reset configs to defaults
 statement ok
 SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
@@ -335,5 +421,8 @@ SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
 statement ok
 SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
 
+statement ok
+SET datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = true;
+
 statement ok
 SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt
index 300294f6e1157..f715f8f46a487 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -15,6 +15,32 @@
 # specific language governing permissions and limitations
 # under the License.
 
+query T
+SELECT encode(arrow_cast('tom', 'Utf8View'),'base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode(arrow_cast('dG9t', 'Utf8View'),'base64'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'BinaryView'),'base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode(arrow_cast('dG9t', 'BinaryView'),'base64'), 'Utf8');
+----
+tom
+
+# test for hex digest
+query T
+select encode(digest('hello', 'sha256'), 'hex');
+----
+2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
+
 statement ok
 CREATE TABLE test(
   num INT,
@@ -29,23 +55,29 @@ CREATE TABLE test(
 ;
 
 # errors
-query error 1st argument should be Utf8 or Binary or Null, got Int64
+query error DataFusion error: Error during planning: Internal error: Expect TypeSignatureClass::Binary but received NativeType::Int64, DataType: Int64
 select encode(12, 'hex');
 
-query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex
-select encode(bin_field, 'non_encoding') from test;
-
-query error 1st argument should be Utf8 or Binary or Null, got Int64
+query error DataFusion error: Error during planning: Internal error: Expect TypeSignatureClass::Binary but received NativeType::Int64, DataType: Int64
 select decode(12, 'hex');
 
 query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex
-select decode(hex_field, 'non_encoding') from test;
+select encode('', 'non_encoding');
+
+query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex
+select decode('', 'non_encoding');
+
+query error DataFusion error: Execution error: Encoding must be a non-null string
+select decode('', null) from test;
+
+query error DataFusion error: This feature is not implemented: Encoding must be a scalar; array specified encoding is not yet supported
+select decode('', hex_field) from test;
 
-query error
+query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'to_hex' function
 select to_hex(hex_field) from test;
 
-query error
-select arrow_cast(decode(X'8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), 'Utf8');
+query error DataFusion error: Execution error: Failed to decode value using base64
+select decode(X'8f50d3f60eae370ddbf85c86219c55108a350165', 'base64');
 
 # Arrays tests
 query T
@@ -56,13 +88,20 @@ SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
-query T
-SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
+query TTTTTT
+SELECT
+  arrow_cast(decode(arrow_cast(base64_field, 'Utf8'), 'base64'), 'Utf8'),
+  arrow_cast(decode(arrow_cast(base64_field, 'LargeUtf8'), 'base64'), 'Utf8'),
+  arrow_cast(decode(arrow_cast(base64_field, 'Utf8View'), 'base64'), 'Utf8'),
+  arrow_cast(decode(arrow_cast(base64_field, 'Binary'), 'base64'), 'Utf8'),
+  arrow_cast(decode(arrow_cast(base64_field, 'LargeBinary'), 'base64'), 'Utf8'),
+  arrow_cast(decode(arrow_cast(base64_field, 'BinaryView'), 'base64'), 'Utf8')
+FROM test ORDER BY num;
 ----
-abc
-qweqw
-NULL
-8f50d3f60eae370ddbf85c86219c55108a350165
+abc abc abc abc abc abc
+qweqw qweqw qweqw qweqw qweqw qweqw
+NULL NULL NULL NULL NULL NULL
+8f50d3f60eae370ddbf85c86219c55108a350165 8f50d3f60eae370ddbf85c86219c55108a350165 8f50d3f60eae370ddbf85c86219c55108a350165 8f50d3f60eae370ddbf85c86219c55108a350165 8f50d3f60eae370ddbf85c86219c55108a350165 8f50d3f60eae370ddbf85c86219c55108a350165
 
 query T
 SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
@@ -90,38 +129,77 @@ select decode(encode(bin_field, 'base64'), 'base64') = X'8f50d3f60eae370ddbf85c8
 ----
 true
 
-# test for Utf8View support for encode
 statement ok
-CREATE TABLE test_source AS VALUES
-    ('Andrew', 'X'),
-    ('Xiangpeng', 'Xiangpeng'),
-    ('Raphael', 'R'),
-    (NULL, 'R');
+drop table test
 
+# test for Utf8View support for encode
 statement ok
 CREATE TABLE test_utf8view AS
 select
   arrow_cast(column1, 'Utf8View') AS column1_utf8view,
   arrow_cast(column2, 'Utf8View') AS column2_utf8view
-FROM test_source;
+FROM VALUES
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R');
 
-query TTTTTT
+query TTTT
 SELECT
-  column1_utf8view,
   encode(column1_utf8view, 'base64') AS column1_base64,
   encode(column1_utf8view, 'hex') AS column1_hex,
-  column2_utf8view,
   encode(column2_utf8view, 'base64') AS column2_base64,
   encode(column2_utf8view, 'hex') AS column2_hex
 FROM test_utf8view;
 ----
-Andrew QW5kcmV3 416e64726577 X WA 58
-Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67
-Raphael UmFwaGFlbA 5261706861656c R Ug 52
-NULL NULL NULL R Ug 52
+QW5kcmV3 416e64726577 WA 58
+WGlhbmdwZW5n 5869616e6770656e67 WGlhbmdwZW5n 5869616e6770656e67
+UmFwaGFlbA 5261706861656c Ug 52
+NULL NULL Ug 52
 
-# test for hex digest
-query T
-select encode(digest('hello', 'sha256'), 'hex');
+query TTTTTT
+SELECT
+  encode(arrow_cast(column1_utf8view, 'Utf8'), 'base64'),
+  encode(arrow_cast(column1_utf8view, 'LargeUtf8'), 'base64'),
+  encode(arrow_cast(column1_utf8view, 'Utf8View'), 'base64'),
+  encode(arrow_cast(column1_utf8view, 'Binary'), 'base64'),
+  encode(arrow_cast(column1_utf8view, 'LargeBinary'), 'base64'),
+  encode(arrow_cast(column1_utf8view, 'BinaryView'), 'base64')
+FROM test_utf8view;
 ----
-2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
+QW5kcmV3 QW5kcmV3 QW5kcmV3 QW5kcmV3 QW5kcmV3 QW5kcmV3
+WGlhbmdwZW5n WGlhbmdwZW5n WGlhbmdwZW5n WGlhbmdwZW5n WGlhbmdwZW5n WGlhbmdwZW5n
+UmFwaGFlbA UmFwaGFlbA UmFwaGFlbA UmFwaGFlbA UmFwaGFlbA UmFwaGFlbA
+NULL NULL NULL NULL NULL NULL
+
+statement ok
+drop table test_utf8view
+
+# FixedSizeBinary support
+statement ok
+CREATE TABLE test_fsb AS
+SELECT arrow_cast(X'0123456789ABCDEF', 'FixedSizeBinary(8)') as fsb_col;
+
+query ??
+SELECT
+  decode(encode(arrow_cast(X'0123456789abcdef', 'FixedSizeBinary(8)'), 'base64'), 'base64'),
+  decode(encode(arrow_cast(X'0123456789abcdef', 'FixedSizeBinary(8)'), 'hex'), 'hex');
+----
+0123456789abcdef 0123456789abcdef
+
+query ??
+SELECT
+  decode(encode(column1, 'base64'), 'base64'),
+  decode(encode(column1, 'hex'), 'hex')
+FROM values
+  (arrow_cast(X'0123456789abcdef', 'FixedSizeBinary(8)')),
+  (arrow_cast(X'ffffffffffffffff', 'FixedSizeBinary(8)'));
+----
+0123456789abcdef 0123456789abcdef
+ffffffffffffffff ffffffffffffffff
+
+query error DataFusion error: Execution error: Failed to decode value using base64
+select decode('invalid', 'base64');
+
+query error DataFusion error: Execution error: Failed to decode value using hex
+select decode('invalid', 'hex');
diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt
index 41f747df5baac..22430774bbca2 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -74,6 +74,11 @@ statement error DataFusion error: Error during planning: Unsupported compound id
 SELECT COUNT(*) FROM way.too.many.namespaces.as.ident.prefixes.aggregate_test_100
 
 
+# fetch_clause_not_supported
+statement error FETCH clause is not supported yet
+SELECT 1 FETCH NEXT 1 ROW ONLY
+
+
 
 #
 # Wrong scalar function signature
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index a3b6d40aea2d1..9087aee56d978 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -43,10 +43,9 @@ logical_plan
 02)--Filter: aggregate_test_100.c2 > Int8(10)
 03)----TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: c2@1 > 10, projection=[c1@0]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true
+01)FilterExec: c2@1 > 10, projection=[c1@0]
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true
 
 # explain_csv_exec_scan_config
 
@@ -177,7 +176,7 @@ initial_logical_plan
 logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -190,7 +189,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -199,7 +197,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
 logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
 logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
 logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c]
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -212,7 +210,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -238,12 +235,12 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
 physical_plan after LimitPushdown SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after PushdownSort SAME TEXT AS ABOVE
 physical_plan after EnsureCooperative SAME TEXT AS ABOVE
 physical_plan after FilterPushdown(Post) SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
@@ -291,22 +288,22 @@ CREATE EXTERNAL TABLE alltypes_plain STORED AS PARQUET LOCATION '../../parquet-t
 query TT
 EXPLAIN SELECT * FROM alltypes_plain limit 10;
 ----
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 
 # explain verbose with both collect & show statistics on
 query TT
 EXPLAIN VERBOSE SELECT * FROM alltypes_plain limit 10;
 ----
 initial_physical_plan
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 initial_physical_plan_with_schema
 01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 physical_plan after OutputRequirements
-01)OutputRequirementExec: order_by=[], dist_by=Unspecified, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+01)OutputRequirementExec: order_by=[], dist_by=Unspecified, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
+02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 physical_plan after aggregate_statistics SAME TEXT AS ABOVE
 physical_plan after join_selection SAME TEXT AS ABOVE
 physical_plan after LimitedDistinctAggregation SAME TEXT AS ABOVE
@@ -317,18 +314,18 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
-physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after PushdownSort SAME TEXT AS ABOVE
 physical_plan after EnsureCooperative SAME TEXT AS ABOVE
 physical_plan after FilterPushdown(Post) SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
@@ -343,8 +340,8 @@ initial_physical_plan
 01)GlobalLimitExec: skip=0, fetch=10
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
 initial_physical_plan_with_stats
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 initial_physical_plan_with_schema
 01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
@@ -362,7 +359,6 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements
 01)GlobalLimitExec: skip=0, fetch=10
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
@@ -370,11 +366,12 @@ physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
 physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after PushdownSort SAME TEXT AS ABOVE
 physical_plan after EnsureCooperative SAME TEXT AS ABOVE
 physical_plan after FilterPushdown(Post) SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
 physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
-physical_plan_with_stats DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan_with_stats DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]: ScanBytes=Exact(32)),(Col[1]: ScanBytes=Inexact(24)),(Col[2]: ScanBytes=Exact(32)),(Col[3]: ScanBytes=Exact(32)),(Col[4]: ScanBytes=Exact(32)),(Col[5]: ScanBytes=Exact(64)),(Col[6]: ScanBytes=Exact(32)),(Col[7]: ScanBytes=Exact(64)),(Col[8]: ScanBytes=Inexact(88)),(Col[9]: ScanBytes=Inexact(49)),(Col[10]: ScanBytes=Exact(64))]]
 physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
@@ -541,7 +538,7 @@ initial_logical_plan
 logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -554,7 +551,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -563,7 +559,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
 logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
 logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
 logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c]
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -576,7 +572,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -602,12 +597,12 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
 physical_plan after LimitPushdown SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after PushdownSort SAME TEXT AS ABOVE
 physical_plan after EnsureCooperative SAME TEXT AS ABOVE
 physical_plan after FilterPushdown(Post) SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
diff --git a/datafusion/sqllogictest/test_files/explain_analyze.slt b/datafusion/sqllogictest/test_files/explain_analyze.slt
new file mode 100644
index 0000000000000..e109b32a95ed1
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/explain_analyze.slt
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+set datafusion.explain.analyze_level = summary;
+
+query TT
+EXPLAIN ANALYZE SELECT * FROM generate_series(100);
+----
+Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>]
+
+# --------------------------------------------
+# Test ProjectionExec's per-expression metrics
+# --------------------------------------------
+
+statement ok
+set datafusion.explain.analyze_level = dev;
+
+# 1 expr
+# Expect metric `expr_0_eval_time` exists in ProjectionExec
+query TT
+EXPLAIN ANALYZE
+SELECT a
+FROM generate_series(1, 100) as t1(a);
+----
+Plan with Metrics
+01)ProjectionExec: expr=[value@0 as a], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=64.0 KB, output_batches=1, expr_0_eval_time=<slt:ignore>]
+<slt:ignore>
+
+# 2 exprs
+# Expect metrics `expr_0_eval_time` and `expr_1_eval_time` exist in ProjectionExec
+query TT
+EXPLAIN ANALYZE
+SELECT a+1, pow(a,2)
+FROM generate_series(1, 100) as t1(a);
+----
+Plan with Metrics
+01)ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=1632.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
+<slt:ignore>
+
+# common expressions
+# Expect metrics `expr_0_eval_time` and `expr_1_eval_time` exist in ProjectionExec
+query TT
+EXPLAIN ANALYZE
+SELECT a+1, a+1 as another_a_plus_one
+FROM generate_series(1, 100) as t1(a);
+----
+Plan with Metrics
+01)ProjectionExec: expr=[__common_expr_1@0 as t1.a + Int64(1), __common_expr_1@0 as another_a_plus_one], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
+02)--ProjectionExec: expr=[a@0 + 1 as __common_expr_1], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>]
+<slt:ignore>
+
+statement ok
+reset datafusion.explain.analyze_level;
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt
index 22f19a0af32e4..9215ce87e3bef 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -166,32 +166,26 @@ explain SELECT int_col FROM table1 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│        format: csv        │
-27)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│        format: csv        │
+21)└───────────────────────────┘
 
 # Aggregate
 query TT
@@ -210,44 +204,38 @@ physical_plan
 10)│      FinalPartitioned     │
 11)└─────────────┬─────────────┘
 12)┌─────────────┴─────────────┐
-13)│    CoalesceBatchesExec    │
+13)│      RepartitionExec      │
 14)│    --------------------   │
-15)│     target_batch_size:    │
-16)│            8192           │
-17)└─────────────┬─────────────┘
-18)┌─────────────┴─────────────┐
-19)│      RepartitionExec      │
-20)│    --------------------   │
-21)│ partition_count(in->out): │
-22)│           4 -> 4          │
-23)│                           │
-24)│    partitioning_scheme:   │
-25)│  Hash([string_col@0], 4)  │
-26)└─────────────┬─────────────┘
-27)┌─────────────┴─────────────┐
-28)│       AggregateExec       │
-29)│    --------------------   │
-30)│           aggr:           │
-31)│   sum(table1.bigint_col)  │
-32)│                           │
-33)│    group_by: string_col   │
-34)│       mode: Partial       │
-35)└─────────────┬─────────────┘
-36)┌─────────────┴─────────────┐
-37)│      RepartitionExec      │
-38)│    --------------------   │
-39)│ partition_count(in->out): │
-40)│           1 -> 4          │
-41)│                           │
-42)│    partitioning_scheme:   │
-43)│     RoundRobinBatch(4)    │
-44)└─────────────┬─────────────┘
-45)┌─────────────┴─────────────┐
-46)│       DataSourceExec      │
-47)│    --------------------   │
-48)│          files: 1         │
-49)│        format: csv        │
-50)└───────────────────────────┘
+15)│ partition_count(in->out): │
+16)│           4 -> 4          │
+17)│                           │
+18)│    partitioning_scheme:   │
+19)│  Hash([string_col@0], 4)  │
+20)└─────────────┬─────────────┘
+21)┌─────────────┴─────────────┐
+22)│       AggregateExec       │
+23)│    --------------------   │
+24)│           aggr:           │
+25)│   sum(table1.bigint_col)  │
+26)│                           │
+27)│    group_by: string_col   │
+28)│       mode: Partial       │
+29)└─────────────┬─────────────┘
+30)┌─────────────┴─────────────┐
+31)│      RepartitionExec      │
+32)│    --------------------   │
+33)│ partition_count(in->out): │
+34)│           1 -> 4          │
+35)│                           │
+36)│    partitioning_scheme:   │
+37)│     RoundRobinBatch(4)    │
+38)└─────────────┬─────────────┘
+39)┌─────────────┴─────────────┐
+40)│       DataSourceExec      │
+41)│    --------------------   │
+42)│          files: 1         │
+43)│        format: csv        │
+44)└───────────────────────────┘
 
 
 # Limit
@@ -304,36 +292,30 @@ physical_plan
 07)│         string_col        │
 08)└─────────────┬─────────────┘
 09)┌─────────────┴─────────────┐
-10)│    CoalesceBatchesExec    │
+10)│        HashJoinExec       │
 11)│    --------------------   │
-12)│     target_batch_size:    │
-13)│            8192           │
-14)└─────────────┬─────────────┘
-15)┌─────────────┴─────────────┐
-16)│        HashJoinExec       │
-17)│    --------------------   │
-18)│          filter:          │
-19)│ CAST(int_col + int_col AS │
-20)│       Int64) % 2 = 0      ├──────────────┐
-21)│                           │              │
-22)│            on:            │              │
-23)│    (int_col = int_col)    │              │
-24)└─────────────┬─────────────┘              │
-25)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-26)│       DataSourceExec      ││      RepartitionExec      │
-27)│    --------------------   ││    --------------------   │
-28)│          files: 1         ││ partition_count(in->out): │
-29)│      format: parquet      ││           1 -> 4          │
-30)│                           ││                           │
-31)│                           ││    partitioning_scheme:   │
-32)│                           ││     RoundRobinBatch(4)    │
-33)└───────────────────────────┘└─────────────┬─────────────┘
-34)-----------------------------┌─────────────┴─────────────┐
-35)-----------------------------│       DataSourceExec      │
-36)-----------------------------│    --------------------   │
-37)-----------------------------│          files: 1         │
-38)-----------------------------│        format: csv        │
-39)-----------------------------└───────────────────────────┘
+12)│          filter:          │
+13)│ CAST(int_col + int_col AS │
+14)│       Int64) % 2 = 0      ├──────────────┐
+15)│                           │              │
+16)│            on:            │              │
+17)│    (int_col = int_col)    │              │
+18)└─────────────┬─────────────┘              │
+19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+20)│       DataSourceExec      ││      RepartitionExec      │
+21)│    --------------------   ││    --------------------   │
+22)│          files: 1         ││ partition_count(in->out): │
+23)│      format: parquet      ││           1 -> 4          │
+24)│                           ││                           │
+25)│                           ││    partitioning_scheme:   │
+26)│                           ││     RoundRobinBatch(4)    │
+27)└───────────────────────────┘└─────────────┬─────────────┘
+28)-----------------------------┌─────────────┴─────────────┐
+29)-----------------------------│       DataSourceExec      │
+30)-----------------------------│    --------------------   │
+31)-----------------------------│          files: 1         │
+32)-----------------------------│        format: csv        │
+33)-----------------------------└───────────────────────────┘
 
 # 3 Joins
 query TT
@@ -355,53 +337,41 @@ physical_plan
 07)│         string_col        │
 08)└─────────────┬─────────────┘
 09)┌─────────────┴─────────────┐
-10)│    CoalesceBatchesExec    │
+10)│        HashJoinExec       │
 11)│    --------------------   │
-12)│     target_batch_size:    │
-13)│            8192           │
-14)└─────────────┬─────────────┘
-15)┌─────────────┴─────────────┐
-16)│        HashJoinExec       │
-17)│    --------------------   │
-18)│            on:            ├──────────────┐
-19)│    (int_col = int_col)    │              │
-20)└─────────────┬─────────────┘              │
-21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-22)│       DataSourceExec      ││       ProjectionExec      │
-23)│    --------------------   ││    --------------------   │
-24)│         bytes: 520        ││     date_col: date_col    │
-25)│       format: memory      ││      int_col: int_col     │
-26)│          rows: 1          ││                           │
-27)│                           ││        string_col:        │
-28)│                           ││         string_col        │
-29)└───────────────────────────┘└─────────────┬─────────────┘
-30)-----------------------------┌─────────────┴─────────────┐
-31)-----------------------------│    CoalesceBatchesExec    │
-32)-----------------------------│    --------------------   │
-33)-----------------------------│     target_batch_size:    │
-34)-----------------------------│            8192           │
-35)-----------------------------└─────────────┬─────────────┘
-36)-----------------------------┌─────────────┴─────────────┐
-37)-----------------------------│        HashJoinExec       │
-38)-----------------------------│    --------------------   │
-39)-----------------------------│            on:            ├──────────────┐
-40)-----------------------------│    (int_col = int_col)    │              │
-41)-----------------------------└─────────────┬─────────────┘              │
-42)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-43)-----------------------------│       DataSourceExec      ││      RepartitionExec      │
-44)-----------------------------│    --------------------   ││    --------------------   │
-45)-----------------------------│          files: 1         ││ partition_count(in->out): │
-46)-----------------------------│      format: parquet      ││           1 -> 4          │
-47)-----------------------------│                           ││                           │
-48)-----------------------------│         predicate:        ││    partitioning_scheme:   │
-49)-----------------------------│  DynamicFilter [ empty ]  ││     RoundRobinBatch(4)    │
-50)-----------------------------└───────────────────────────┘└─────────────┬─────────────┘
-51)----------------------------------------------------------┌─────────────┴─────────────┐
-52)----------------------------------------------------------│       DataSourceExec      │
-53)----------------------------------------------------------│    --------------------   │
-54)----------------------------------------------------------│          files: 1         │
-55)----------------------------------------------------------│        format: csv        │
-56)----------------------------------------------------------└───────────────────────────┘
+12)│            on:            ├──────────────┐
+13)│    (int_col = int_col)    │              │
+14)└─────────────┬─────────────┘              │
+15)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+16)│       DataSourceExec      ││       ProjectionExec      │
+17)│    --------------------   ││    --------------------   │
+18)│         bytes: 520        ││     date_col: date_col    │
+19)│       format: memory      ││      int_col: int_col     │
+20)│          rows: 1          ││                           │
+21)│                           ││        string_col:        │
+22)│                           ││         string_col        │
+23)└───────────────────────────┘└─────────────┬─────────────┘
+24)-----------------------------┌─────────────┴─────────────┐
+25)-----------------------------│        HashJoinExec       │
+26)-----------------------------│    --------------------   │
+27)-----------------------------│            on:            ├──────────────┐
+28)-----------------------------│    (int_col = int_col)    │              │
+29)-----------------------------└─────────────┬─────────────┘              │
+30)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+31)-----------------------------│       DataSourceExec      ││      RepartitionExec      │
+32)-----------------------------│    --------------------   ││    --------------------   │
+33)-----------------------------│          files: 1         ││ partition_count(in->out): │
+34)-----------------------------│      format: parquet      ││           1 -> 4          │
+35)-----------------------------│                           ││                           │
+36)-----------------------------│         predicate:        ││    partitioning_scheme:   │
+37)-----------------------------│  DynamicFilter [ empty ]  ││     RoundRobinBatch(4)    │
+38)-----------------------------└───────────────────────────┘└─────────────┬─────────────┘
+39)----------------------------------------------------------┌─────────────┴─────────────┐
+40)----------------------------------------------------------│       DataSourceExec      │
+41)----------------------------------------------------------│    --------------------   │
+42)----------------------------------------------------------│          files: 1         │
+43)----------------------------------------------------------│        format: csv        │
+44)----------------------------------------------------------└───────────────────────────┘
 
 # Long Filter (demonstrate what happens with wrapping)
 query TT
@@ -411,36 +381,30 @@ WHERE string_col != 'foo' AND string_col != 'bar' AND string_col != 'a really lo
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
-06)└─────────────┬─────────────┘
-07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
-09)│    --------------------   │
-10)│         predicate:        │
-11)│   string_col != foo AND   │
-12)│      string_col != bar    │
-13)│     AND string_col != a   │
-14)│     really long string    │
-15)│          constant         │
-16)└─────────────┬─────────────┘
-17)┌─────────────┴─────────────┐
-18)│      RepartitionExec      │
-19)│    --------------------   │
-20)│ partition_count(in->out): │
-21)│           1 -> 4          │
-22)│                           │
-23)│    partitioning_scheme:   │
-24)│     RoundRobinBatch(4)    │
-25)└─────────────┬─────────────┘
-26)┌─────────────┴─────────────┐
-27)│       DataSourceExec      │
-28)│    --------------------   │
-29)│          files: 1         │
-30)│        format: csv        │
-31)└───────────────────────────┘
+04)│         predicate:        │
+05)│   string_col != foo AND   │
+06)│      string_col != bar    │
+07)│     AND string_col != a   │
+08)│     really long string    │
+09)│          constant         │
+10)└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐
+12)│      RepartitionExec      │
+13)│    --------------------   │
+14)│ partition_count(in->out): │
+15)│           1 -> 4          │
+16)│                           │
+17)│    partitioning_scheme:   │
+18)│     RoundRobinBatch(4)    │
+19)└─────────────┬─────────────┘
+20)┌─────────────┴─────────────┐
+21)│       DataSourceExec      │
+22)│    --------------------   │
+23)│          files: 1         │
+24)│        format: csv        │
+25)└───────────────────────────┘
 
 # Check maximum line limit.
 query TT
@@ -449,17 +413,17 @@ WHERE string_col != 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
-06)└─────────────┬─────────────┘
-07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
-09)│    --------------------   │
-10)│         predicate:        │
-11)│       string_col !=       │
-12)│        aaaaaaaaaaaa       │
+04)│         predicate:        │
+05)│       string_col !=       │
+06)│        aaaaaaaaaaaa       │
+07)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+08)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+09)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+10)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+11)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+12)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
 13)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
 14)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
 15)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
@@ -480,29 +444,23 @@ physical_plan
 30)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
 31)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
 32)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-33)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-34)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-35)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-36)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-37)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-38)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
-39)│            ...            │
-40)└─────────────┬─────────────┘
-41)┌─────────────┴─────────────┐
-42)│      RepartitionExec      │
-43)│    --------------------   │
-44)│ partition_count(in->out): │
-45)│           1 -> 4          │
-46)│                           │
-47)│    partitioning_scheme:   │
-48)│     RoundRobinBatch(4)    │
-49)└─────────────┬─────────────┘
-50)┌─────────────┴─────────────┐
-51)│       DataSourceExec      │
-52)│    --------------------   │
-53)│          files: 1         │
-54)│        format: csv        │
-55)└───────────────────────────┘
+33)│            ...            │
+34)└─────────────┬─────────────┘
+35)┌─────────────┴─────────────┐
+36)│      RepartitionExec      │
+37)│    --------------------   │
+38)│ partition_count(in->out): │
+39)│           1 -> 4          │
+40)│                           │
+41)│    partitioning_scheme:   │
+42)│     RoundRobinBatch(4)    │
+43)└─────────────┬─────────────┘
+44)┌─────────────┴─────────────┐
+45)│       DataSourceExec      │
+46)│    --------------------   │
+47)│          files: 1         │
+48)│        format: csv        │
+49)└───────────────────────────┘
 
 # Check exactly the render width.
 query TT
@@ -511,32 +469,26 @@ WHERE string_col != 'aaaaaaaaaaaaa';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│string_col != aaaaaaaaaaaaa│
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│string_col != aaaaaaaaaaaaa│
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│        format: csv        │
-27)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│        format: csv        │
+21)└───────────────────────────┘
 
 # Check with the render witdth + 1.
 query TT
@@ -545,34 +497,28 @@ WHERE string_col != 'aaaaaaaaaaaaaaa';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
-06)└─────────────┬─────────────┘
-07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
-09)│    --------------------   │
-10)│         predicate:        │
-11)│       string_col !=       │
-12)│        aaaaaaaaaaaa       │
-13)│            aaa            │
-14)└─────────────┬─────────────┘
-15)┌─────────────┴─────────────┐
-16)│      RepartitionExec      │
-17)│    --------------------   │
-18)│ partition_count(in->out): │
-19)│           1 -> 4          │
-20)│                           │
-21)│    partitioning_scheme:   │
-22)│     RoundRobinBatch(4)    │
-23)└─────────────┬─────────────┘
-24)┌─────────────┴─────────────┐
-25)│       DataSourceExec      │
-26)│    --------------------   │
-27)│          files: 1         │
-28)│        format: csv        │
-29)└───────────────────────────┘
+04)│         predicate:        │
+05)│       string_col !=       │
+06)│        aaaaaaaaaaaa       │
+07)│            aaa            │
+08)└─────────────┬─────────────┘
+09)┌─────────────┴─────────────┐
+10)│      RepartitionExec      │
+11)│    --------------------   │
+12)│ partition_count(in->out): │
+13)│           1 -> 4          │
+14)│                           │
+15)│    partitioning_scheme:   │
+16)│     RoundRobinBatch(4)    │
+17)└─────────────┬─────────────┘
+18)┌─────────────┴─────────────┐
+19)│       DataSourceExec      │
+20)│    --------------------   │
+21)│          files: 1         │
+22)│        format: csv        │
+23)└───────────────────────────┘
 
 # Query with filter on csv
 query TT
@@ -580,32 +526,26 @@ explain SELECT int_col FROM table1 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│        format: csv        │
-27)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│        format: csv        │
+21)└───────────────────────────┘
 
 
 # Query with filter on parquet
@@ -614,35 +554,29 @@ explain SELECT int_col FROM table2 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│      format: parquet      │
-27)│                           │
-28)│         predicate:        │
-29)│     string_col != foo     │
-30)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│      format: parquet      │
+21)│                           │
+22)│         predicate:        │
+23)│     string_col != foo     │
+24)└───────────────────────────┘
 
 # Query with filter on memory
 query TT
@@ -650,24 +584,18 @@ explain SELECT int_col FROM table3 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│       DataSourceExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│       DataSourceExec      │
-15)│    --------------------   │
-16)│         bytes: 520        │
-17)│       format: memory      │
-18)│          rows: 1          │
-19)└───────────────────────────┘
+10)│         bytes: 520        │
+11)│       format: memory      │
+12)│          rows: 1          │
+13)└───────────────────────────┘
 
 # Query with filter on json
 query TT
@@ -675,32 +603,26 @@ explain SELECT int_col FROM table4 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│        format: json       │
-27)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│        format: json       │
+21)└───────────────────────────┘
 
 # Query with filter on arrow
 query TT
@@ -708,32 +630,26 @@ explain SELECT int_col FROM table5 WHERE string_col != 'foo';
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│    CoalesceBatchesExec    │
+02)│         FilterExec        │
 03)│    --------------------   │
-04)│     target_batch_size:    │
-05)│            8192           │
+04)│         predicate:        │
+05)│     string_col != foo     │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│         FilterExec        │
+08)│      RepartitionExec      │
 09)│    --------------------   │
-10)│         predicate:        │
-11)│     string_col != foo     │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│      RepartitionExec      │
-15)│    --------------------   │
-16)│ partition_count(in->out): │
-17)│           1 -> 4          │
-18)│                           │
-19)│    partitioning_scheme:   │
-20)│     RoundRobinBatch(4)    │
-21)└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐
-23)│       DataSourceExec      │
-24)│    --------------------   │
-25)│          files: 1         │
-26)│       format: arrow       │
-27)└───────────────────────────┘
+10)│ partition_count(in->out): │
+11)│           1 -> 4          │
+12)│                           │
+13)│    partitioning_scheme:   │
+14)│     RoundRobinBatch(4)    │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│       format: arrow       │
+21)└───────────────────────────┘
 
 
 # Query with window agg.
@@ -1012,23 +928,11 @@ explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table2;
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│       ProjectionExec      │
+02)│       DataSourceExec      │
 03)│    --------------------   │
-04)│        bigint_col:        │
-05)│         bigint_col        │
-06)│                           │
-07)│      int_col: int_col     │
-08)│                           │
-09)│          sum_col:         │
-10)│  CAST(int_col AS Int64) + │
-11)│         bigint_col        │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│       DataSourceExec      │
-15)│    --------------------   │
-16)│          files: 1         │
-17)│      format: parquet      │
-18)└───────────────────────────┘
+04)│          files: 1         │
+05)│      format: parquet      │
+06)└───────────────────────────┘
 
 # Query with projection on memory
 query TT
@@ -1183,34 +1087,28 @@ physical_plan
 11)│         string_col        │
 12)└─────────────┬─────────────┘
 13)┌─────────────┴─────────────┐
-14)│    CoalesceBatchesExec    │
+14)│        HashJoinExec       │
 15)│    --------------------   │
-16)│     target_batch_size:    │
-17)│            8192           │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│        HashJoinExec       │
-21)│    --------------------   │
-22)│            on:            │
-23)│   (int_col = int_col),    ├──────────────┐
-24)│       (string_col =       │              │
-25)│         string_col)       │              │
-26)└─────────────┬─────────────┘              │
-27)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-28)│       DataSourceExec      ││      RepartitionExec      │
-29)│    --------------------   ││    --------------------   │
-30)│          files: 1         ││ partition_count(in->out): │
-31)│      format: parquet      ││           1 -> 4          │
-32)│                           ││                           │
-33)│                           ││    partitioning_scheme:   │
-34)│                           ││     RoundRobinBatch(4)    │
-35)└───────────────────────────┘└─────────────┬─────────────┘
-36)-----------------------------┌─────────────┴─────────────┐
-37)-----------------------------│       DataSourceExec      │
-38)-----------------------------│    --------------------   │
-39)-----------------------------│          files: 1         │
-40)-----------------------------│        format: csv        │
-41)-----------------------------└───────────────────────────┘
+16)│            on:            │
+17)│   (int_col = int_col),    ├──────────────┐
+18)│       (string_col =       │              │
+19)│         string_col)       │              │
+20)└─────────────┬─────────────┘              │
+21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+22)│       DataSourceExec      ││      RepartitionExec      │
+23)│    --------------------   ││    --------------------   │
+24)│          files: 1         ││ partition_count(in->out): │
+25)│      format: parquet      ││           1 -> 4          │
+26)│                           ││                           │
+27)│                           ││    partitioning_scheme:   │
+28)│                           ││     RoundRobinBatch(4)    │
+29)└───────────────────────────┘└─────────────┬─────────────┘
+30)-----------------------------┌─────────────┴─────────────┐
+31)-----------------------------│       DataSourceExec      │
+32)-----------------------------│    --------------------   │
+33)-----------------------------│          files: 1         │
+34)-----------------------------│        format: csv        │
+35)-----------------------------└───────────────────────────┘
 
 # Query with outer hash join.
 query TT
@@ -1230,36 +1128,30 @@ physical_plan
 11)│         string_col        │
 12)└─────────────┬─────────────┘
 13)┌─────────────┴─────────────┐
-14)│    CoalesceBatchesExec    │
+14)│        HashJoinExec       │
 15)│    --------------------   │
-16)│     target_batch_size:    │
-17)│            8192           │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│        HashJoinExec       │
-21)│    --------------------   │
-22)│      join_type: Right     │
-23)│                           │
-24)│            on:            ├──────────────┐
-25)│   (int_col = int_col),    │              │
-26)│       (string_col =       │              │
-27)│         string_col)       │              │
-28)└─────────────┬─────────────┘              │
-29)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-30)│       DataSourceExec      ││      RepartitionExec      │
-31)│    --------------------   ││    --------------------   │
-32)│          files: 1         ││ partition_count(in->out): │
-33)│      format: parquet      ││           1 -> 4          │
-34)│                           ││                           │
-35)│                           ││    partitioning_scheme:   │
-36)│                           ││     RoundRobinBatch(4)    │
-37)└───────────────────────────┘└─────────────┬─────────────┘
-38)-----------------------------┌─────────────┴─────────────┐
-39)-----------------------------│       DataSourceExec      │
-40)-----------------------------│    --------------------   │
-41)-----------------------------│          files: 1         │
-42)-----------------------------│        format: csv        │
-43)-----------------------------└───────────────────────────┘
+16)│      join_type: Right     │
+17)│                           │
+18)│            on:            ├──────────────┐
+19)│   (int_col = int_col),    │              │
+20)│       (string_col =       │              │
+21)│         string_col)       │              │
+22)└─────────────┬─────────────┘              │
+23)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+24)│       DataSourceExec      ││      RepartitionExec      │
+25)│    --------------------   ││    --------------------   │
+26)│          files: 1         ││ partition_count(in->out): │
+27)│      format: parquet      ││           1 -> 4          │
+28)│                           ││                           │
+29)│                           ││    partitioning_scheme:   │
+30)│                           ││     RoundRobinBatch(4)    │
+31)└───────────────────────────┘└─────────────┬─────────────┘
+32)-----------------------------┌─────────────┴─────────────┐
+33)-----------------------------│       DataSourceExec      │
+34)-----------------------------│    --------------------   │
+35)-----------------------------│          files: 1         │
+36)-----------------------------│        format: csv        │
+37)-----------------------------└───────────────────────────┘
 
 # Query with nested loop join.
 query TT
@@ -1396,42 +1288,27 @@ physical_plan
 25)│      FinalPartitioned     ││      FinalPartitioned     │
 26)└─────────────┬─────────────┘└─────────────┬─────────────┘
 27)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-28)│    CoalesceBatchesExec    ││    CoalesceBatchesExec    │
+28)│      RepartitionExec      ││      RepartitionExec      │
 29)│    --------------------   ││    --------------------   │
-30)│     target_batch_size:    ││     target_batch_size:    │
-31)│            8192           ││            8192           │
-32)└─────────────┬─────────────┘└─────────────┬─────────────┘
-33)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-34)│      RepartitionExec      ││      RepartitionExec      │
-35)│    --------------------   ││    --------------------   │
-36)│ partition_count(in->out): ││ partition_count(in->out): │
-37)│           4 -> 4          ││           4 -> 4          │
-38)│                           ││                           │
-39)│    partitioning_scheme:   ││    partitioning_scheme:   │
-40)│     Hash([name@0], 4)     ││     Hash([name@0], 4)     │
+30)│ partition_count(in->out): ││ partition_count(in->out): │
+31)│           1 -> 4          ││           1 -> 4          │
+32)│                           ││                           │
+33)│    partitioning_scheme:   ││    partitioning_scheme:   │
+34)│     Hash([name@0], 4)     ││     Hash([name@0], 4)     │
+35)└─────────────┬─────────────┘└─────────────┬─────────────┘
+36)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+37)│       AggregateExec       ││       AggregateExec       │
+38)│    --------------------   ││    --------------------   │
+39)│       group_by: name      ││       group_by: name      │
+40)│       mode: Partial       ││       mode: Partial       │
 41)└─────────────┬─────────────┘└─────────────┬─────────────┘
 42)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-43)│      RepartitionExec      ││      RepartitionExec      │
+43)│       DataSourceExec      ││       DataSourceExec      │
 44)│    --------------------   ││    --------------------   │
-45)│ partition_count(in->out): ││ partition_count(in->out): │
-46)│           1 -> 4          ││           1 -> 4          │
-47)│                           ││                           │
-48)│    partitioning_scheme:   ││    partitioning_scheme:   │
-49)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
-50)└─────────────┬─────────────┘└─────────────┬─────────────┘
-51)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-52)│       AggregateExec       ││       AggregateExec       │
-53)│    --------------------   ││    --------------------   │
-54)│       group_by: name      ││       group_by: name      │
-55)│       mode: Partial       ││       mode: Partial       │
-56)└─────────────┬─────────────┘└─────────────┬─────────────┘
-57)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-58)│       DataSourceExec      ││       DataSourceExec      │
-59)│    --------------------   ││    --------------------   │
-60)│         bytes: 296        ││         bytes: 288        │
-61)│       format: memory      ││       format: memory      │
-62)│          rows: 1          ││          rows: 1          │
-63)└───────────────────────────┘└───────────────────────────┘
+45)│         bytes: 296        ││         bytes: 288        │
+46)│       format: memory      ││       format: memory      │
+47)│          rows: 1          ││          rows: 1          │
+48)└───────────────────────────┘└───────────────────────────┘
 
 # Test explain tree for UnionExec
 query TT
@@ -1492,31 +1369,25 @@ physical_plan
 05)│       ASC NULLS LAST      │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│    CoalesceBatchesExec    │
+08)│         FilterExec        │
 09)│    --------------------   │
-10)│     target_batch_size:    │
-11)│            8192           │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│         FilterExec        │
-15)│    --------------------   │
-16)│   predicate: ticker = A   │
-17)└─────────────┬─────────────┘
-18)┌─────────────┴─────────────┐
-19)│      RepartitionExec      │
-20)│    --------------------   │
-21)│ partition_count(in->out): │
-22)│           1 -> 4          │
-23)│                           │
-24)│    partitioning_scheme:   │
-25)│     RoundRobinBatch(4)    │
-26)└─────────────┬─────────────┘
-27)┌─────────────┴─────────────┐
-28)│     StreamingTableExec    │
-29)│    --------------------   │
-30)│       infinite: true      │
-31)│        limit: None        │
-32)└───────────────────────────┘
+10)│   predicate: ticker = A   │
+11)└─────────────┬─────────────┘
+12)┌─────────────┴─────────────┐
+13)│      RepartitionExec      │
+14)│    --------------------   │
+15)│ partition_count(in->out): │
+16)│           1 -> 4          │
+17)│                           │
+18)│    partitioning_scheme:   │
+19)│     RoundRobinBatch(4)    │
+20)└─────────────┬─────────────┘
+21)┌─────────────┴─────────────┐
+22)│     StreamingTableExec    │
+23)│    --------------------   │
+24)│       infinite: true      │
+25)│        limit: None        │
+26)└───────────────────────────┘
 
 
 # constant ticker, CAST(time AS DATE) = time, order by time
@@ -1532,33 +1403,27 @@ physical_plan
 04)│    time ASC NULLS LAST    │
 05)└─────────────┬─────────────┘
 06)┌─────────────┴─────────────┐
-07)│    CoalesceBatchesExec    │
+07)│         FilterExec        │
 08)│    --------------------   │
-09)│     target_batch_size:    │
-10)│            8192           │
-11)└─────────────┬─────────────┘
-12)┌─────────────┴─────────────┐
-13)│         FilterExec        │
-14)│    --------------------   │
-15)│         predicate:        │
-16)│  ticker = A AND CAST(time │
-17)│      AS Date32) = date    │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│      RepartitionExec      │
-21)│    --------------------   │
-22)│ partition_count(in->out): │
-23)│           1 -> 4          │
-24)│                           │
-25)│    partitioning_scheme:   │
-26)│     RoundRobinBatch(4)    │
-27)└─────────────┬─────────────┘
-28)┌─────────────┴─────────────┐
-29)│     StreamingTableExec    │
-30)│    --------------------   │
-31)│       infinite: true      │
-32)│        limit: None        │
-33)└───────────────────────────┘
+09)│         predicate:        │
+10)│  ticker = A AND CAST(time │
+11)│      AS Date32) = date    │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│      RepartitionExec      │
+15)│    --------------------   │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
+18)│                           │
+19)│    partitioning_scheme:   │
+20)│     RoundRobinBatch(4)    │
+21)└─────────────┬─────────────┘
+22)┌─────────────┴─────────────┐
+23)│     StreamingTableExec    │
+24)│    --------------------   │
+25)│       infinite: true      │
+26)│        limit: None        │
+27)└───────────────────────────┘
 
 # same thing but order by date
 query TT
@@ -1573,33 +1438,27 @@ physical_plan
 04)│    date ASC NULLS LAST    │
 05)└─────────────┬─────────────┘
 06)┌─────────────┴─────────────┐
-07)│    CoalesceBatchesExec    │
+07)│         FilterExec        │
 08)│    --------------------   │
-09)│     target_batch_size:    │
-10)│            8192           │
-11)└─────────────┬─────────────┘
-12)┌─────────────┴─────────────┐
-13)│         FilterExec        │
-14)│    --------------------   │
-15)│         predicate:        │
-16)│  ticker = A AND CAST(time │
-17)│      AS Date32) = date    │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│      RepartitionExec      │
-21)│    --------------------   │
-22)│ partition_count(in->out): │
-23)│           1 -> 4          │
-24)│                           │
-25)│    partitioning_scheme:   │
-26)│     RoundRobinBatch(4)    │
-27)└─────────────┬─────────────┘
-28)┌─────────────┴─────────────┐
-29)│     StreamingTableExec    │
-30)│    --------------------   │
-31)│       infinite: true      │
-32)│        limit: None        │
-33)└───────────────────────────┘
+09)│         predicate:        │
+10)│  ticker = A AND CAST(time │
+11)│      AS Date32) = date    │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│      RepartitionExec      │
+15)│    --------------------   │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
+18)│                           │
+19)│    partitioning_scheme:   │
+20)│     RoundRobinBatch(4)    │
+21)└─────────────┬─────────────┘
+22)┌─────────────┴─────────────┐
+23)│     StreamingTableExec    │
+24)│    --------------------   │
+25)│       infinite: true      │
+26)│        limit: None        │
+27)└───────────────────────────┘
 
 # same thing but order by ticker
 query TT
@@ -1612,33 +1471,27 @@ physical_plan
 02)│   CoalescePartitionsExec  │
 03)└─────────────┬─────────────┘
 04)┌─────────────┴─────────────┐
-05)│    CoalesceBatchesExec    │
+05)│         FilterExec        │
 06)│    --------------------   │
-07)│     target_batch_size:    │
-08)│            8192           │
-09)└─────────────┬─────────────┘
-10)┌─────────────┴─────────────┐
-11)│         FilterExec        │
-12)│    --------------------   │
-13)│         predicate:        │
-14)│  ticker = A AND CAST(time │
-15)│      AS Date32) = date    │
-16)└─────────────┬─────────────┘
-17)┌─────────────┴─────────────┐
-18)│      RepartitionExec      │
-19)│    --------------------   │
-20)│ partition_count(in->out): │
-21)│           1 -> 4          │
-22)│                           │
-23)│    partitioning_scheme:   │
-24)│     RoundRobinBatch(4)    │
-25)└─────────────┬─────────────┘
-26)┌─────────────┴─────────────┐
-27)│     StreamingTableExec    │
-28)│    --------------------   │
-29)│       infinite: true      │
-30)│        limit: None        │
-31)└───────────────────────────┘
+07)│         predicate:        │
+08)│  ticker = A AND CAST(time │
+09)│      AS Date32) = date    │
+10)└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐
+12)│      RepartitionExec      │
+13)│    --------------------   │
+14)│ partition_count(in->out): │
+15)│           1 -> 4          │
+16)│                           │
+17)│    partitioning_scheme:   │
+18)│     RoundRobinBatch(4)    │
+19)└─────────────┬─────────────┘
+20)┌─────────────┴─────────────┐
+21)│     StreamingTableExec    │
+22)│    --------------------   │
+23)│       infinite: true      │
+24)│        limit: None        │
+25)└───────────────────────────┘
 
 
 # same thing but order by time, date
@@ -1655,33 +1508,27 @@ physical_plan
 05)│       ASC NULLS LAST      │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│    CoalesceBatchesExec    │
+08)│         FilterExec        │
 09)│    --------------------   │
-10)│     target_batch_size:    │
-11)│            8192           │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│         FilterExec        │
-15)│    --------------------   │
-16)│         predicate:        │
-17)│  ticker = A AND CAST(time │
-18)│      AS Date32) = date    │
-19)└─────────────┬─────────────┘
-20)┌─────────────┴─────────────┐
-21)│      RepartitionExec      │
-22)│    --------------------   │
-23)│ partition_count(in->out): │
-24)│           1 -> 4          │
-25)│                           │
-26)│    partitioning_scheme:   │
-27)│     RoundRobinBatch(4)    │
-28)└─────────────┬─────────────┘
-29)┌─────────────┴─────────────┐
-30)│     StreamingTableExec    │
-31)│    --------------------   │
-32)│       infinite: true      │
-33)│        limit: None        │
-34)└───────────────────────────┘
+10)│         predicate:        │
+11)│  ticker = A AND CAST(time │
+12)│      AS Date32) = date    │
+13)└─────────────┬─────────────┘
+14)┌─────────────┴─────────────┐
+15)│      RepartitionExec      │
+16)│    --------------------   │
+17)│ partition_count(in->out): │
+18)│           1 -> 4          │
+19)│                           │
+20)│    partitioning_scheme:   │
+21)│     RoundRobinBatch(4)    │
+22)└─────────────┬─────────────┘
+23)┌─────────────┴─────────────┐
+24)│     StreamingTableExec    │
+25)│    --------------------   │
+26)│       infinite: true      │
+27)│        limit: None        │
+28)└───────────────────────────┘
 
 
 
@@ -1702,34 +1549,26 @@ physical_plan
 07)│     time ASC NULLS LAST   │
 08)└─────────────┬─────────────┘
 09)┌─────────────┴─────────────┐
-10)│    CoalesceBatchesExec    │
+10)│         FilterExec        │
 11)│    --------------------   │
-12)│          limit: 5         │
-13)│                           │
-14)│     target_batch_size:    │
-15)│            8192           │
-16)└─────────────┬─────────────┘
-17)┌─────────────┴─────────────┐
-18)│         FilterExec        │
-19)│    --------------------   │
-20)│         predicate:        │
-21)│     date = 2006-01-02     │
-22)└─────────────┬─────────────┘
-23)┌─────────────┴─────────────┐
-24)│      RepartitionExec      │
-25)│    --------------------   │
-26)│ partition_count(in->out): │
-27)│           1 -> 4          │
-28)│                           │
-29)│    partitioning_scheme:   │
-30)│     RoundRobinBatch(4)    │
-31)└─────────────┬─────────────┘
-32)┌─────────────┴─────────────┐
-33)│     StreamingTableExec    │
-34)│    --------------------   │
-35)│       infinite: true      │
-36)│        limit: None        │
-37)└───────────────────────────┘
+12)│         predicate:        │
+13)│     date = 2006-01-02     │
+14)└─────────────┬─────────────┘
+15)┌─────────────┴─────────────┐
+16)│      RepartitionExec      │
+17)│    --------------------   │
+18)│ partition_count(in->out): │
+19)│           1 -> 4          │
+20)│                           │
+21)│    partitioning_scheme:   │
+22)│     RoundRobinBatch(4)    │
+23)└─────────────┬─────────────┘
+24)┌─────────────┴─────────────┐
+25)│     StreamingTableExec    │
+26)│    --------------------   │
+27)│       infinite: true      │
+28)│        limit: None        │
+29)└───────────────────────────┘
 
 
 
@@ -1748,32 +1587,26 @@ physical_plan
 05)│     time ASC NULLS LAST   │
 06)└─────────────┬─────────────┘
 07)┌─────────────┴─────────────┐
-08)│    CoalesceBatchesExec    │
+08)│         FilterExec        │
 09)│    --------------------   │
-10)│     target_batch_size:    │
-11)│            8192           │
+10)│         predicate:        │
+11)│     date = 2006-01-02     │
 12)└─────────────┬─────────────┘
 13)┌─────────────┴─────────────┐
-14)│         FilterExec        │
+14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│         predicate:        │
-17)│     date = 2006-01-02     │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│      RepartitionExec      │
-21)│    --------------------   │
-22)│ partition_count(in->out): │
-23)│           1 -> 4          │
-24)│                           │
-25)│    partitioning_scheme:   │
-26)│     RoundRobinBatch(4)    │
-27)└─────────────┬─────────────┘
-28)┌─────────────┴─────────────┐
-29)│     StreamingTableExec    │
-30)│    --------------------   │
-31)│       infinite: true      │
-32)│        limit: None        │
-33)└───────────────────────────┘
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
+18)│                           │
+19)│    partitioning_scheme:   │
+20)│     RoundRobinBatch(4)    │
+21)└─────────────┬─────────────┘
+22)┌─────────────┴─────────────┐
+23)│     StreamingTableExec    │
+24)│    --------------------   │
+25)│       infinite: true      │
+26)│        limit: None        │
+27)└───────────────────────────┘
 
 
 
@@ -1803,30 +1636,24 @@ physical_plan
 12)│                           ││         id: id + 1        │
 13)└───────────────────────────┘└─────────────┬─────────────┘
 14)-----------------------------┌─────────────┴─────────────┐
-15)-----------------------------│    CoalesceBatchesExec    │
+15)-----------------------------│         FilterExec        │
 16)-----------------------------│    --------------------   │
-17)-----------------------------│     target_batch_size:    │
-18)-----------------------------│            8192           │
-19)-----------------------------└─────────────┬─────────────┘
-20)-----------------------------┌─────────────┴─────────────┐
-21)-----------------------------│         FilterExec        │
-22)-----------------------------│    --------------------   │
-23)-----------------------------│     predicate: id < 10    │
-24)-----------------------------└─────────────┬─────────────┘
-25)-----------------------------┌─────────────┴─────────────┐
-26)-----------------------------│      RepartitionExec      │
-27)-----------------------------│    --------------------   │
-28)-----------------------------│ partition_count(in->out): │
-29)-----------------------------│           1 -> 4          │
-30)-----------------------------│                           │
-31)-----------------------------│    partitioning_scheme:   │
-32)-----------------------------│     RoundRobinBatch(4)    │
-33)-----------------------------└─────────────┬─────────────┘
-34)-----------------------------┌─────────────┴─────────────┐
-35)-----------------------------│       WorkTableExec       │
-36)-----------------------------│    --------------------   │
-37)-----------------------------│        name: nodes        │
-38)-----------------------------└───────────────────────────┘
+17)-----------------------------│     predicate: id < 10    │
+18)-----------------------------└─────────────┬─────────────┘
+19)-----------------------------┌─────────────┴─────────────┐
+20)-----------------------------│      RepartitionExec      │
+21)-----------------------------│    --------------------   │
+22)-----------------------------│ partition_count(in->out): │
+23)-----------------------------│           1 -> 4          │
+24)-----------------------------│                           │
+25)-----------------------------│    partitioning_scheme:   │
+26)-----------------------------│     RoundRobinBatch(4)    │
+27)-----------------------------└─────────────┬─────────────┘
+28)-----------------------------┌─────────────┴─────────────┐
+29)-----------------------------│       WorkTableExec       │
+30)-----------------------------│    --------------------   │
+31)-----------------------------│        name: nodes        │
+32)-----------------------------└───────────────────────────┘
 
 query TT
 explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03'))
@@ -1946,25 +1773,17 @@ physical_plan
 38)│          skip: 6          │
 39)└─────────────┬─────────────┘
 40)┌─────────────┴─────────────┐
-41)│    CoalesceBatchesExec    │
+41)│         FilterExec        │
 42)│    --------------------   │
-43)│          limit: 9         │
-44)│                           │
-45)│     target_batch_size:    │
-46)│            8192           │
-47)└─────────────┬─────────────┘
-48)┌─────────────┴─────────────┐
-49)│         FilterExec        │
-50)│    --------------------   │
-51)│      predicate: a > 3     │
-52)└─────────────┬─────────────┘
-53)┌─────────────┴─────────────┐
-54)│       DataSourceExec      │
-55)│    --------------------   │
-56)│         bytes: 160        │
-57)│       format: memory      │
-58)│          rows: 1          │
-59)└───────────────────────────┘
+43)│      predicate: a > 3     │
+44)└─────────────┬─────────────┘
+45)┌─────────────┴─────────────┐
+46)│       DataSourceExec      │
+47)│    --------------------   │
+48)│         bytes: 160        │
+49)│       format: memory      │
+50)│          rows: 1          │
+51)└───────────────────────────┘
 
 # clean up
 statement ok
@@ -1994,33 +1813,25 @@ physical_plan
 04)│          limit: 5         │
 05)└─────────────┬─────────────┘
 06)┌─────────────┴─────────────┐
-07)│    CoalesceBatchesExec    │
+07)│         FilterExec        │
 08)│    --------------------   │
-09)│          limit: 5         │
-10)│                           │
-11)│     target_batch_size:    │
-12)│            8192           │
-13)└─────────────┬─────────────┘
-14)┌─────────────┴─────────────┐
-15)│         FilterExec        │
-16)│    --------------------   │
-17)│     predicate: c3 > 0     │
-18)└─────────────┬─────────────┘
-19)┌─────────────┴─────────────┐
-20)│      RepartitionExec      │
-21)│    --------------------   │
-22)│ partition_count(in->out): │
-23)│           1 -> 4          │
-24)│                           │
-25)│    partitioning_scheme:   │
-26)│     RoundRobinBatch(4)    │
-27)└─────────────┬─────────────┘
-28)┌─────────────┴─────────────┐
-29)│     StreamingTableExec    │
-30)│    --------------------   │
-31)│       infinite: true      │
-32)│        limit: None        │
-33)└───────────────────────────┘
+09)│     predicate: c3 > 0     │
+10)└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐
+12)│      RepartitionExec      │
+13)│    --------------------   │
+14)│ partition_count(in->out): │
+15)│           1 -> 4          │
+16)│                           │
+17)│    partitioning_scheme:   │
+18)│     RoundRobinBatch(4)    │
+19)└─────────────┬─────────────┘
+20)┌─────────────┴─────────────┐
+21)│     StreamingTableExec    │
+22)│    --------------------   │
+23)│       infinite: true      │
+24)│        limit: None        │
+25)└───────────────────────────┘
 
 # Test explain tree for PlaceholderRowExec
 query TT
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 87345b833e264..cec9b63675a66 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -22,7 +22,7 @@ SELECT true, false, false = false, true = false
 true false true false
 
 # test_mathematical_expressions_with_null
-query RRRRRRRRRRRRRRRRRRRRRRRRIIIRRRRRRBB
+query RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBB
 SELECT
     sqrt(NULL),
     cbrt(NULL),
@@ -1066,6 +1066,213 @@ SELECT '2' NOT IN ('a','b',NULL,1)
 ----
 NULL
 
+# ========================================================================
+# Comprehensive IN LIST tests with NULL handling
+# These tests validate SQL three-valued logic for IN operations
+# ========================================================================
+
+# test_in_list_null_literals
+# NULL IN (any_list) should always return NULL per SQL three-valued logic
+
+query B
+SELECT NULL IN (1, 1)
+----
+NULL
+
+query B
+SELECT NULL IN (NULL, 1)
+----
+NULL
+
+query B
+SELECT NULL IN (NULL, NULL)
+----
+NULL
+
+# test_in_list_with_columns
+# Create test table for column-based IN LIST tests
+
+statement ok
+CREATE OR REPLACE TABLE in_list_test(b INT) AS VALUES (1), (2), (3), (4), (NULL);
+
+# Test: b IN (1, 2) with various values
+
+query B
+SELECT b IN (1, 2) FROM in_list_test WHERE b = 1;
+----
+true
+
+query IB
+SELECT b, b IN (1, 2) FROM in_list_test WHERE b IN (1, 2) ORDER BY b;
+----
+1 true
+2 true
+
+query IB
+SELECT b, b IN (1, 2) FROM in_list_test WHERE b IN (3, 4) ORDER BY b;
+----
+3 false
+4 false
+
+query B
+SELECT b IN (1, 2) FROM in_list_test WHERE b = 1;
+----
+true
+
+query B
+SELECT b IN (1, 2) FROM in_list_test WHERE b = 3;
+----
+false
+
+query B
+SELECT b IN (1, 2) FROM in_list_test WHERE b IS NULL;
+----
+NULL
+
+# Test: b IN (NULL, 1) - list contains NULL
+
+query B
+SELECT b IN (NULL, 1) FROM in_list_test WHERE b = 1;
+----
+true
+
+query B
+SELECT b IN (NULL, 1) FROM in_list_test WHERE b = 2;
+----
+NULL
+
+query B
+SELECT b IN (NULL, 1) FROM in_list_test WHERE b IS NULL;
+----
+NULL
+
+# Test: b IN (NULL, NULL) - list contains only NULLs
+
+query B
+SELECT b IN (NULL, NULL) FROM in_list_test WHERE b = 1;
+----
+NULL
+
+query B
+SELECT b IN (NULL, NULL) FROM in_list_test WHERE b IS NULL;
+----
+NULL
+
+# Test: literal IN (list_with_column) - column appears in the list
+
+statement ok
+CREATE OR REPLACE TABLE in_list_col_test(b INT) AS VALUES (1), (3), (NULL);
+
+query B
+SELECT 1 IN (2, b) FROM in_list_col_test WHERE b = 1;
+----
+true
+
+query B
+SELECT 1 IN (2, b) FROM in_list_col_test WHERE b = 3;
+----
+false
+
+query B
+SELECT 1 IN (2, b) FROM in_list_col_test WHERE b IS NULL;
+----
+NULL
+
+# Test: b IN (1, b) - column references itself in list
+
+query B
+SELECT b IN (1, b) FROM in_list_col_test WHERE b = 1;
+----
+true
+
+query B
+SELECT b IN (1, b) FROM in_list_col_test WHERE b = 3;
+----
+true
+
+query B
+SELECT b IN (1, b) FROM in_list_col_test WHERE b IS NULL;
+----
+NULL
+
+# test_in_list_tuples
+# Test tuple/row-wise IN comparisons using struct syntax
+# Note: Using arrow_cast for precise type control
+
+# (NULL, NULL) IN ((1, 2)) => FALSE
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), arrow_cast(NULL, 'Int32')) IN (struct(1, 2))
+----
+false
+
+# (NULL, NULL) IN ((NULL, 1)) => FALSE
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), arrow_cast(NULL, 'Int32')) IN (struct(arrow_cast(NULL, 'Int32'), 1))
+----
+false
+
+# (NULL, NULL) IN ((NULL, NULL)) => TRUE (exact match)
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), arrow_cast(NULL, 'Int32')) IN (struct(arrow_cast(NULL, 'Int32'), arrow_cast(NULL, 'Int32')))
+----
+true
+
+# (NULL, 1) IN ((1, 2)) => FALSE
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), 1) IN (struct(1, 2))
+----
+false
+
+# (NULL, 1) IN ((NULL, 1)) => TRUE (exact match)
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), 1) IN (struct(arrow_cast(NULL, 'Int32'), 1))
+----
+true
+
+# (NULL, 1) IN ((NULL, NULL)) => FALSE
+query B
+SELECT struct(arrow_cast(NULL, 'Int32'), 1) IN (struct(arrow_cast(NULL, 'Int32'), arrow_cast(NULL, 'Int32')))
+----
+false
+
+# (1, 2) IN ((1, 2)) => TRUE
+query B
+SELECT struct(1, 2) IN (struct(1, 2))
+----
+true
+
+# (1, 3) IN ((1, 2)) => FALSE
+query B
+SELECT struct(1, 3) IN (struct(1, 2))
+----
+false
+
+# (4, 4) IN ((1, 2)) => FALSE
+query B
+SELECT struct(4, 4) IN (struct(1, 2))
+----
+false
+
+# (1, 1) IN ((NULL, 1)) => FALSE
+query B
+SELECT struct(1, 1) IN (struct(NULL, 1))
+----
+false
+
+# (1, 1) IN ((NULL, NULL)) => FALSE
+query B
+SELECT struct(1, 1) IN (struct(NULL, NULL))
+----
+false
+
+# Cleanup test tables
+
+statement ok
+DROP TABLE in_list_test;
+
+statement ok
+DROP TABLE in_list_col_test;
+
 query T
 SELECT encode('tom','base64');
 ----
@@ -1191,6 +1398,11 @@ SELECT md5('tom');
 ----
 34b7da764b21d298ef307d04d8152dc5
 
+query T
+SELECT md5(arrow_cast('tom', 'Dictionary(Int32, Utf8)'));
+----
+34b7da764b21d298ef307d04d8152dc5
+
 query ?
 SELECT digest('tom','md5');
 ----
diff --git a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
index a09d8ce26ddfb..ec069212f5586 100644
--- a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
+++ b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
@@ -38,10 +38,9 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)SortPreservingMergeExec: [date@0 ASC NULLS LAST, time@2 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: ticker@1 = A
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: ticker@1 = A
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 
 # constant ticker, CAST(time AS DATE) = time, order by time
 query TT
@@ -55,10 +54,9 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)SortPreservingMergeExec: [time@2 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 
 # same thing but order by date
 query TT
@@ -72,10 +70,9 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)SortPreservingMergeExec: [date@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 
 # same thing but order by ticker
 query TT
@@ -89,10 +86,9 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)CoalescePartitionsExec
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 
 # same thing but order by time, date
 query TT
@@ -106,10 +102,9 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)SortPreservingMergeExec: [time@2 ASC NULLS LAST, date@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 
 # CAST(time AS DATE) <> date (should require a sort)
 # no physical plan due to sort breaking pipeline
@@ -147,7 +142,6 @@ logical_plan
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
 01)SortPreservingMergeExec: [ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: date@0 = 2006-01-02
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
+02)--FilterExec: date@0 = 2006-01-02
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+04)------StreamingTableExec: partition_sizes=1, projection=[date, ticker, time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 20f79622a62c6..6c87d618c7278 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -193,10 +193,25 @@ SELECT substr('alphabet', 3, CAST(NULL AS int))
 ----
 NULL
 
-statement error The first argument of the substr function can only be a string, but got Int64
+query T
+SELECT substr(NULL, 1, 2)
+----
+NULL
+
+query T
+SELECT substr('alphabet', 1, NULL)
+----
+NULL
+
+query T
+SELECT substr('alphabet', NULL, 2)
+----
+NULL
+
+statement error Function 'substr' failed to match any signature
 SELECT substr(1, 3)
 
-statement error The first argument of the substr function can only be a string, but got Int64
+statement error Function 'substr' failed to match any signature
 SELECT substr(1, 3, 4)
 
 query T
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index fe7871c22b4c3..cd1ed2bc0caca 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2017,15 +2017,13 @@ physical_plan
 02)--SortExec: expr=[col0@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[col0@0 as col0, last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]@3 as last_col1]
 04)------AggregateExec: mode=FinalPartitioned, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(col0@0, col0@0)]
-12)----------------------DataSourceExec: partitions=1, partition_sizes=[3]
-13)----------------------DataSourceExec: partitions=1, partition_sizes=[3]
+05)--------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
+09)----------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(col0@0, col0@0)]
+10)------------------DataSourceExec: partitions=1, partition_sizes=[3]
+11)------------------DataSourceExec: partitions=1, partition_sizes=[3]
 
 # Columns in the table are a,b,c,d. Source is DataSourceExec which is ordered by
 # a,b,c column. Column a has cardinality 2, column b has cardinality 4.
@@ -2944,10 +2942,9 @@ physical_plan
 02)--ProjectionExec: expr=[zip_code@1 as zip_code, country@2 as country, sn@0 as sn, ts@3 as ts, currency@4 as currency, last_value(e.amount) ORDER BY [e.sn ASC NULLS LAST]@5 as last_rate]
 03)----AggregateExec: mode=Single, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency], aggr=[last_value(e.amount) ORDER BY [e.sn ASC NULLS LAST]]
 04)------ProjectionExec: expr=[zip_code@2 as zip_code, country@3 as country, sn@4 as sn, ts@5 as ts, currency@6 as currency, sn@0 as sn, amount@1 as amount]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@2, currency@4)], filter=ts@0 >= ts@1, projection=[sn@0, amount@3, zip_code@4, country@5, sn@6, ts@7, currency@8]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@2, currency@4)], filter=ts@0 >= ts@1, projection=[sn@0, amount@3, zip_code@4, country@5, sn@6, ts@7, currency@8]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITIPTR rowsort
 SELECT s.zip_code, s.country, s.sn, s.ts, s.currency, LAST_VALUE(e.amount ORDER BY e.sn) AS last_rate
@@ -2988,11 +2985,9 @@ physical_plan
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@2 as fv2]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TRR
 SELECT country, FIRST_VALUE(amount ORDER BY ts ASC) AS fv1,
@@ -3024,11 +3019,9 @@ physical_plan
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@2 as fv2]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query TRR
@@ -3188,12 +3181,11 @@ physical_plan
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as array_agg1]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
-05)--------CoalesceBatchesExec: target_batch_size=4
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
-08)--------------SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[true]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+07)------------SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[true]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query T?
 SELECT country, ARRAY_AGG(amount ORDER BY amount ASC) AS array_agg1
@@ -3224,12 +3216,11 @@ physical_plan
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
-05)--------CoalesceBatchesExec: target_batch_size=4
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
-08)--------------SortExec: expr=[amount@1 DESC], preserve_partitioning=[true]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
+07)------------SortExec: expr=[amount@1 DESC], preserve_partitioning=[true]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query T?RR
 SELECT country, ARRAY_AGG(amount ORDER BY amount DESC) AS amounts,
@@ -3425,11 +3416,10 @@ physical_plan
 02)--SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[sn@0 as sn, amount@1 as amount, 2 * CAST(sn@0 AS Int64) as Int64(2) * s.sn]
 04)------AggregateExec: mode=FinalPartitioned, gby=[sn@0 as sn, amount@1 as amount], aggr=[]
-05)--------CoalesceBatchesExec: target_batch_size=4
-06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[sn@0 as sn, amount@1 as amount], aggr=[]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: partitions=1, partition_sizes=[2]
+05)--------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[sn@0 as sn, amount@1 as amount], aggr=[]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+08)--------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query IRI
 SELECT s.sn, s.amount, 2*s.sn
@@ -3494,13 +3484,12 @@ physical_plan
 02)--SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[sn@0 as sn, sum(l.amount)@2 as sum(l.amount), amount@1 as amount]
 04)------AggregateExec: mode=FinalPartitioned, gby=[sn@0 as sn, amount@1 as amount], aggr=[sum(l.amount)]
-05)--------CoalesceBatchesExec: target_batch_size=4
-06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[sn@1 as sn, amount@2 as amount], aggr=[sum(l.amount)]
-08)--------------NestedLoopJoinExec: join_type=Inner, filter=sn@0 >= sn@1, projection=[amount@1, sn@2, amount@3]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[2]
-10)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-11)------------------DataSourceExec: partitions=1, partition_sizes=[2]
+05)--------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[sn@1 as sn, amount@2 as amount], aggr=[sum(l.amount)]
+07)------------NestedLoopJoinExec: join_type=Inner, filter=sn@0 >= sn@1, projection=[amount@1, sn@2, amount@3]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[2]
+09)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+10)----------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query IRR
 SELECT r.sn, SUM(l.amount), r.amount
@@ -3641,13 +3630,12 @@ physical_plan
 02)--SortExec: expr=[sn@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[zip_code@1 as zip_code, country@2 as country, sn@0 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount]
 04)------AggregateExec: mode=FinalPartitioned, gby=[sn@0 as sn, zip_code@1 as zip_code, country@2 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
-05)--------CoalesceBatchesExec: target_batch_size=4
-06)----------RepartitionExec: partitioning=Hash([sn@0, zip_code@1, country@2, ts@3, currency@4, amount@5, sum_amount@6], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount]
-10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
+05)--------RepartitionExec: partitioning=Hash([sn@0, zip_code@1, country@2, ts@3, currency@4, amount@5, sum_amount@6], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+08)--------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount]
+09)----------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+10)------------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 
 query ITIPTRR
@@ -3869,11 +3857,10 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[first_value(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.a ASC NULLS LAST]@1 as first_a, last_value(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]@2 as last_c]
 02)--AggregateExec: mode=FinalPartitioned, gby=[d@0 as d], aggr=[first_value(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.a ASC NULLS LAST], last_value(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([d@0], 8), input_partitions=8
-05)--------AggregateExec: mode=Partial, gby=[d@2 as d], aggr=[first_value(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.a ASC NULLS LAST], first_value(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
+03)----RepartitionExec: partitioning=Hash([d@0], 8), input_partitions=8
+04)------AggregateExec: mode=Partial, gby=[d@2 as d], aggr=[first_value(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.a ASC NULLS LAST], first_value(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query II rowsort
 SELECT FIRST_VALUE(a ORDER BY a ASC) as first_a,
@@ -3939,12 +3926,11 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd]
 02)--AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]], ordering_mode=Sorted
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
+05)------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+06)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # reset partition number to 8.
 statement ok
@@ -3981,11 +3967,10 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
 02)--SortExec: expr=[c@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8
-05)--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
+03)----RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8
+04)------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
+05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
 
 # drop table multiple_ordered_table_with_pk
 statement ok
@@ -4022,11 +4007,10 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
 02)--SortExec: expr=[c@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8
-05)--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
+03)----RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8
+04)------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
+05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
 
 statement ok
 set datafusion.execution.target_partitions = 1;
@@ -4095,14 +4079,13 @@ logical_plan
 10)----------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, c@2 as c, sum1@1 as sum1, sum1@3 as sum1]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, b@1)], projection=[c@0, sum1@2, c@3, sum1@5]
-04)------ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
-05)--------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
-07)------ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
-08)--------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-09)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, b@1)], projection=[c@0, sum1@2, c@3, sum1@5]
+03)----ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
+04)------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
+06)----ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
+07)------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
+08)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT lhs.c, rhs.c, lhs.sum1, rhs.sum1
@@ -4244,11 +4227,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(DISTINCT t1.x)@1 as sum(DISTINCT t1.x), max(DISTINCT t1.x)@2 as max(DISTINCT t1.x)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
-05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y;
@@ -4261,15 +4242,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(alias1)@1 as sum(DISTINCT t1.x), max(alias1)@2 as max(DISTINCT t1.x)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
-05)--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
-07)------------CoalesceBatchesExec: target_batch_size=2
-08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(x@0 AS Float64) as alias1], aggr=[]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
+04)------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
+06)----------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(x@0 AS Float64) as alias1], aggr=[]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # create an unbounded table that contains ordered timestamp.
 statement ok
@@ -4298,11 +4276,10 @@ physical_plan
 01)SortPreservingMergeExec: [time_chunks@0 DESC], fetch=5
 02)--ProjectionExec: expr=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as time_chunks]
 03)----AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
-06)----------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-08)--------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
+04)------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
+05)--------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
+06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
+07)------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
 
 query P
 SELECT date_bin('15 minutes', ts) as time_chunks
@@ -4353,11 +4330,10 @@ physical_plan
 02)--SortExec: TopK(fetch=5), expr=[months@0 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as months]
 04)------AggregateExec: mode=FinalPartitioned, gby=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[date_part(MONTH, ts@0) as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 DESC], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[date_part(MONTH, ts@0) as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 DESC], file_type=csv, has_header=false
 
 query I
 SELECT extract(month from ts) as months
@@ -4396,7 +4372,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [name@0 DESC, time_chunks@1 DESC], fetch=5
 02)--ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks]
-03)----RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1, maintains_sort_order=true
 04)------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
 
 statement ok
@@ -4465,15 +4441,13 @@ physical_plan
 02)--SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, count(alias1)@1 as count(DISTINCT aggregate_test_100.c2), min(alias1)@2 as min(DISTINCT aggregate_test_100.c2), sum(alias2)@3 as sum(aggregate_test_100.c3), max(alias3)@4 as max(aggregate_test_100.c4)]
 04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(alias1), min(alias1), sum(alias2), max(alias3)]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[count(alias1), min(alias1), sum(alias2), max(alias3)]
-08)--------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1, alias1@1 as alias1], aggr=[alias2, alias3]
-09)----------------CoalesceBatchesExec: target_batch_size=2
-10)------------------RepartitionExec: partitioning=Hash([c1@0, alias1@1], 8), input_partitions=8
-11)--------------------AggregateExec: mode=Partial, gby=[c1@0 as c1, c2@1 as alias1], aggr=[alias2, alias3]
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[count(alias1), min(alias1), sum(alias2), max(alias3)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1, alias1@1 as alias1], aggr=[alias2, alias3]
+08)--------------RepartitionExec: partitioning=Hash([c1@0, alias1@1], 8), input_partitions=8
+09)----------------AggregateExec: mode=Partial, gby=[c1@0 as c1, c2@1 as alias1], aggr=[alias2, alias3]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4], file_type=csv, has_header=true
 
 query II
 SELECT c2, count(distinct c3) FILTER (WHERE c1 != 'a') FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
@@ -4635,11 +4609,10 @@ physical_plan
 01)SortPreservingMergeExec: [max(timestamp_table.t1)@1 DESC], fetch=4
 02)--SortExec: TopK(fetch=4), expr=[max(timestamp_table.t1)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
-06)----------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=4
-08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]}, projection=[t1, c2], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
+05)--------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=4
+07)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]}, projection=[t1, c2], file_type=csv, has_header=true
 
 # Clean up
 statement ok
@@ -5171,10 +5144,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))@0 as ts_chunk, count(keywords_stream.keyword)@1 as alert_keyword_count]
 02)--AggregateExec: mode=Single, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }, ts@0, 946684800000000000) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))], aggr=[count(keywords_stream.keyword)]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(keyword@0, keyword@1)]
-05)--------DataSourceExec: partitions=1, partition_sizes=[3]
-06)--------DataSourceExec: partitions=1, partition_sizes=[3]
+03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(keyword@0, keyword@1)]
+04)------DataSourceExec: partitions=1, partition_sizes=[3]
+05)------DataSourceExec: partitions=1, partition_sizes=[3]
 
 query PI
 SELECT
diff --git a/datafusion/sqllogictest/test_files/grouping.slt b/datafusion/sqllogictest/test_files/grouping.slt
index 64d040d012f99..3d38576bdbf5f 100644
--- a/datafusion/sqllogictest/test_files/grouping.slt
+++ b/datafusion/sqllogictest/test_files/grouping.slt
@@ -212,3 +212,15 @@ select c1, grouping(c1, c2) from test group by CUBE(c1);
 
 statement error zero arguments
 select c1, grouping() from test group by CUBE(c1);
+
+# grouping_sets_with_empty_set
+query I
+SELECT COUNT(*) FROM test GROUP BY GROUPING SETS (());
+----
+2
+
+# grouping_sets_with_empty_set
+query I
+SELECT SUM(v1) FROM generate_series(10) AS t1(v1) GROUP BY GROUPING SETS(())
+----
+55
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 7a34b240bd7c7..18f72cb9f7798 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -217,6 +217,7 @@ datafusion.catalog.newlines_in_values false
 datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
 datafusion.execution.collect_statistics true
+datafusion.execution.enable_ansi_mode false
 datafusion.execution.enable_recursive_ctes true
 datafusion.execution.enforce_batch_size_in_joins false
 datafusion.execution.keep_partition_by_columns false
@@ -243,6 +244,7 @@ datafusion.execution.parquet.dictionary_enabled true
 datafusion.execution.parquet.dictionary_page_size_limit 1048576
 datafusion.execution.parquet.enable_page_index true
 datafusion.execution.parquet.encoding NULL
+datafusion.execution.parquet.force_filter_selections false
 datafusion.execution.parquet.max_predicate_cache_size NULL
 datafusion.execution.parquet.max_row_group_size 1048576
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2
@@ -289,22 +291,27 @@ datafusion.format.timestamp_tz_format NULL
 datafusion.format.types_info false
 datafusion.optimizer.allow_symmetric_joins_without_pruning true
 datafusion.optimizer.default_filter_selectivity 20
+datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true
 datafusion.optimizer.enable_dynamic_filter_pushdown true
 datafusion.optimizer.enable_join_dynamic_filter_pushdown true
 datafusion.optimizer.enable_piecewise_merge_join false
 datafusion.optimizer.enable_round_robin_repartition true
+datafusion.optimizer.enable_sort_pushdown true
 datafusion.optimizer.enable_topk_aggregation true
 datafusion.optimizer.enable_topk_dynamic_filter_pushdown true
 datafusion.optimizer.enable_window_limits true
 datafusion.optimizer.expand_views_at_output false
 datafusion.optimizer.filter_null_join_keys false
+datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values 150
+datafusion.optimizer.hash_join_inlist_pushdown_max_size 131072
 datafusion.optimizer.hash_join_single_partition_threshold 1048576
 datafusion.optimizer.hash_join_single_partition_threshold_rows 131072
 datafusion.optimizer.max_passes 3
 datafusion.optimizer.prefer_existing_sort false
 datafusion.optimizer.prefer_existing_union false
 datafusion.optimizer.prefer_hash_join true
+datafusion.optimizer.preserve_file_partitions 0
 datafusion.optimizer.repartition_aggregations true
 datafusion.optimizer.repartition_file_min_size 10485760
 datafusion.optimizer.repartition_file_scans true
@@ -312,7 +319,14 @@ datafusion.optimizer.repartition_joins true
 datafusion.optimizer.repartition_sorts true
 datafusion.optimizer.repartition_windows true
 datafusion.optimizer.skip_failed_rules false
+datafusion.optimizer.subset_repartition_threshold 4
 datafusion.optimizer.top_down_join_key_reordering true
+datafusion.runtime.list_files_cache_limit 1M
+datafusion.runtime.list_files_cache_ttl NULL
+datafusion.runtime.max_temp_directory_size 100G
+datafusion.runtime.memory_limit unlimited
+datafusion.runtime.metadata_cache_limit 50M
+datafusion.runtime.temp_directory NULL
 datafusion.sql_parser.collect_spans false
 datafusion.sql_parser.default_null_ordering nulls_max
 datafusion.sql_parser.dialect generic
@@ -338,6 +352,7 @@ datafusion.catalog.newlines_in_values false Specifies whether newlines in (quote
 datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
 datafusion.execution.collect_statistics true Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true.
+datafusion.execution.enable_ansi_mode false Whether to enable ANSI SQL mode. The flag is experimental and relevant only for DataFusion Spark built-in functions When `enable_ansi_mode` is set to `true`, the query engine follows ANSI SQL semantics for expressions, casting, and error handling. This means: - **Strict type coercion rules:** implicit casts between incompatible types are disallowed. - **Standard SQL arithmetic behavior:** operations such as division by zero,   numeric overflow, or invalid casts raise runtime errors rather than returning   `NULL` or adjusted values. - **Consistent ANSI behavior** for string concatenation, comparisons, and `NULL` handling. When `enable_ansi_mode` is `false` (the default), the engine uses a more permissive, non-ANSI mode designed for user convenience and backward compatibility. In this mode: - Implicit casts between types are allowed (e.g., string to integer when possible). - Arithmetic operations are more lenient — for example, `abs()` on the minimum   representable integer value returns the input value instead of raising overflow. - Division by zero or invalid casts may return `NULL` instead of failing. # Default `false` — ANSI SQL mode is disabled by default.
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
 datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
 datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches
@@ -364,6 +379,7 @@ datafusion.execution.parquet.dictionary_enabled true (writing) Sets if dictionar
 datafusion.execution.parquet.dictionary_page_size_limit 1048576 (writing) Sets best effort maximum dictionary page size, in bytes
 datafusion.execution.parquet.enable_page_index true (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.
 datafusion.execution.parquet.encoding NULL (writing)  Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.force_filter_selections false (reading) Force the use of RowSelections for filter results, when pushdown_filters is enabled. If false, the reader will automatically choose between a RowSelection and a Bitmap based on the number and pattern of selected rows.
 datafusion.execution.parquet.max_predicate_cache_size NULL (reading) The maximum predicate cache size, in bytes. When `pushdown_filters` is enabled, sets the maximum memory used to cache the results of predicate evaluation between filter evaluation and output generation. Decreasing this value will reduce memory usage, but may increase IO and CPU usage. None means use the default parquet reader setting. 0 means no caching.
 datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
@@ -410,22 +426,27 @@ datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with t
 datafusion.format.types_info false Show types in visual representation batches
 datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.
 datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).
+datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase.
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
-datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
+datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
 datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.
 datafusion.optimizer.enable_piecewise_merge_join false When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.
 datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
+datafusion.optimizer.enable_sort_pushdown true Enable sort pushdown optimization. When enabled, attempts to push sort requirements down to data sources that can natively handle them (e.g., by reversing file/row group read order). Returns **inexact ordering**: Sort operator is kept for correctness, but optimized input enables early termination for TopK queries (ORDER BY ... LIMIT N), providing significant speedup. Memory: No additional overhead (only changes read order). Future: Will add option to detect perfectly sorted data and eliminate Sort completely. Default: true
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
 datafusion.optimizer.enable_topk_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.
 datafusion.optimizer.enable_window_limits true When set to true, the optimizer will attempt to push limit operations past window functions, if possible
 datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
 datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
+datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values 150 Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: <https://trino.io/docs/current/admin/dynamic-filtering.html#dynamic-filter-collection-thresholds>
+datafusion.optimizer.hash_join_inlist_pushdown_max_size 131072 Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides larger than this will use hash table lookups instead. Set to 0 to always use hash table lookups. InList pushdown can be more efficient for small build sides because it can result in better statistics pruning as well as use any bloom filters present on the scan side. InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion. On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory. This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` * `target_partitions` memory. The default is 128kB per partition. This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases but avoids excessive memory usage or overhead for larger joins.
 datafusion.optimizer.hash_join_single_partition_threshold 1048576 The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition
 datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition
 datafusion.optimizer.max_passes 3 Number of times that the optimizer will attempt to optimize the plan
 datafusion.optimizer.prefer_existing_sort false When true, DataFusion will opportunistically remove sorts when the data is already sorted, (i.e. setting `preserve_order` to true on `RepartitionExec`  and using `SortPreservingMergeExec`) When false, DataFusion will maximize plan parallelism using `RepartitionExec` even if this requires subsequently resorting data using a `SortExec`.
 datafusion.optimizer.prefer_existing_union false When set to true, the optimizer will not attempt to convert Union to Interleave
 datafusion.optimizer.prefer_hash_join true When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory
+datafusion.optimizer.preserve_file_partitions 0 Minimum number of distinct partition values required to group files by their Hive partition column values (enabling Hash partitioning declaration). How the option is used:     - preserve_file_partitions=0: Disable it.     - preserve_file_partitions=1: Always enable it.     - preserve_file_partitions=N, actual file partitions=M: Only enable when M >= N.     This threshold preserves I/O parallelism when file partitioning is below it. Note: This may reduce parallelism, rooting from the I/O level, if the number of distinct partitions is less than the target_partitions.
 datafusion.optimizer.repartition_aggregations true Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level
 datafusion.optimizer.repartition_file_min_size 10485760 Minimum total files size in bytes to perform file scan repartitioning.
 datafusion.optimizer.repartition_file_scans true When set to `true`, datasource partitions will be repartitioned to achieve maximum parallelism. This applies to both in-memory partitions and FileSource's file groups (1 group is 1 partition). For FileSources, only Parquet and CSV formats are currently supported. If set to `true` for a FileSource, all files will be repartitioned evenly (i.e., a single large file might be partitioned into smaller chunks) for parallel scanning. If set to `false` for a FileSource, different files will be read in parallel, but repartitioning won't happen within a single file. If set to `true` for an in-memory source, all memtable's partitions will have their batches repartitioned evenly to the desired number of `target_partitions`. Repartitioning can change the total number of partitions and batches per partition, but does not slice the initial record tables provided to the MemTable on creation.
@@ -433,7 +454,14 @@ datafusion.optimizer.repartition_joins true Should DataFusion repartition data u
 datafusion.optimizer.repartition_sorts true Should DataFusion execute sorts in a per-partition fashion and merge afterwards instead of coalescing first and sorting globally. With this flag is enabled, plans in the form below ```text      "SortExec: [a@0 ASC]",      "  CoalescePartitionsExec",      "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ``` would turn into the plan below which performs better in multithreaded environments ```text      "SortPreservingMergeExec: [a@0 ASC]",      "  SortExec: [a@0 ASC]",      "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ```
 datafusion.optimizer.repartition_windows true Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level
 datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
+datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): ```text     Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a])     If current partitions (3) < threshold (4), repartition:     AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)]       RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3         AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)]           DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3)     If current partitions (8) >= threshold (4), use subset satisfaction:     AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)]       DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ```
 datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
+datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes.
+datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.memory_limit unlimited Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.temp_directory NULL The path to the temporary file directory.
 datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
 datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index b8b2a7c372768..8ef2596f18e33 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -70,10 +70,8 @@ physical_plan
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 query I
 INSERT INTO table_without_values SELECT
@@ -130,10 +128,8 @@ physical_plan
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 
@@ -181,10 +177,8 @@ physical_plan
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 query I
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index dc8ef59bbedcb..4702b0b9ca3b6 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -424,10 +424,8 @@ physical_plan
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 query I
 INSERT INTO table_without_values SELECT
@@ -485,10 +483,8 @@ physical_plan
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 
diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part
index fe3356af88fcc..5d111374ac8cf 100644
--- a/datafusion/sqllogictest/test_files/join.slt.part
+++ b/datafusion/sqllogictest/test_files/join.slt.part
@@ -776,10 +776,9 @@ logical_plan
 03)--SubqueryAlias: t2
 04)----TableScan: t1 projection=[a, b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Reset the configs to old values
 statement ok
@@ -936,10 +935,9 @@ logical_plan
 06)----TableScan: department projection=[dept_name]
 physical_plan
 01)CrossJoinExec
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: name@1 = Alice OR name@1 = Bob
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)--DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: name@1 = Alice OR name@1 = Bob
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # expect no row for Carol
 query ITT
@@ -984,15 +982,12 @@ logical_plan
 07)------SubqueryAlias: d
 08)--------TableScan: department projection=[emp_id, dept_name]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: dept_name@2 != Engineering AND name@1 = Alice OR name@1 != Alice AND name@1 = Carol
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------HashJoinExec: mode=CollectLeft, join_type=Left, on=[(emp_id@0, emp_id@0)], projection=[emp_id@0, name@1, dept_name@3]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: name@1 = Alice OR name@1 != Alice AND name@1 = Carol
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)----------DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: dept_name@2 != Engineering AND name@1 = Alice OR name@1 != Alice AND name@1 = Carol
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(emp_id@0, emp_id@0)], projection=[emp_id@0, name@1, dept_name@3]
+04)------FilterExec: name@1 = Alice OR name@1 != Alice AND name@1 = Carol
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITT
 SELECT e.emp_id, e.name, d.dept_name
@@ -1180,16 +1175,14 @@ logical_plan
 06)--------TableScan: t5 projection=[v0, v1, v2, v3, v4]
 07)----TableScan: t0 projection=[v0, v1]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(t1.v0 AS Float64)@6, v1@1)], filter=v1@1 + CAST(v0@0 AS Float64) > 0, projection=[v0@0, v1@1, v2@3, v3@4, v4@5, v0@7, v1@8]
-03)----CoalescePartitionsExec
-04)------ProjectionExec: expr=[v0@0 as v0, v1@1 as v1, v0@2 as v0, v2@3 as v2, v3@4 as v3, v4@5 as v4, CAST(v0@0 AS Float64) as CAST(t1.v0 AS Float64)]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0), (v1@1, v1@1)], projection=[v0@0, v1@1, v0@2, v2@4, v3@5, v4@6]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[0]
-09)--------------DataSourceExec: partitions=1, partition_sizes=[0]
-10)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(t1.v0 AS Float64)@6, v1@1)], filter=v1@1 + CAST(v0@0 AS Float64) > 0, projection=[v0@0, v1@1, v2@3, v3@4, v4@5, v0@7, v1@8]
+02)--CoalescePartitionsExec
+03)----ProjectionExec: expr=[v0@0 as v0, v1@1 as v1, v0@2 as v0, v2@3 as v2, v3@4 as v3, v4@5 as v4, CAST(v0@0 AS Float64) as CAST(t1.v0 AS Float64)]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0), (v1@1, v1@1)], projection=[v0@0, v1@1, v0@2, v2@4, v3@5, v4@6]
+06)----------DataSourceExec: partitions=1, partition_sizes=[0]
+07)----------DataSourceExec: partitions=1, partition_sizes=[0]
+08)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 
 
@@ -1375,20 +1368,17 @@ logical_plan
 07)--TableScan: s projection=[b]
 physical_plan
 01)ProjectionExec: expr=[col0@1 as col0, col1@2 as col1, a@3 as a, b@0 as b]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(s.b AS Int64)@1, col1@1)], projection=[b@0, col0@2, col1@3, a@4]
-04)------ProjectionExec: expr=[b@0 as b, CAST(b@0 AS Int64) as CAST(s.b AS Int64)]
-05)--------DataSourceExec: partitions=1, partition_sizes=[1]
-06)------ProjectionExec: expr=[col0@1 as col0, col1@2 as col1, a@0 as a]
-07)--------CoalesceBatchesExec: target_batch_size=8192
-08)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(f.a AS Int64)@1, col0@0)], projection=[a@0, col0@2, col1@3]
-09)------------ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as CAST(f.a AS Int64)]
-10)--------------DataSourceExec: partitions=1, partition_sizes=[1]
-11)------------ProjectionExec: expr=[CAST(x@0 AS Int64) + 1 as col0, CAST(y@1 AS Int64) + 1 as col1]
-12)--------------RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1
-13)----------------CoalesceBatchesExec: target_batch_size=8192
-14)------------------FilterExec: y@1 = x@0
-15)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(s.b AS Int64)@1, col1@1)], projection=[b@0, col0@2, col1@3, a@4]
+03)----ProjectionExec: expr=[b@0 as b, CAST(b@0 AS Int64) as CAST(s.b AS Int64)]
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)----ProjectionExec: expr=[col0@1 as col0, col1@2 as col1, a@0 as a]
+06)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(f.a AS Int64)@1, col0@0)], projection=[a@0, col0@2, col1@3]
+07)--------ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as CAST(f.a AS Int64)]
+08)----------DataSourceExec: partitions=1, partition_sizes=[1]
+09)--------ProjectionExec: expr=[CAST(x@0 AS Int64) + 1 as col0, CAST(y@1 AS Int64) + 1 as col1]
+10)----------RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1
+11)------------FilterExec: y@1 = x@0
+12)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table pairs;
@@ -1432,17 +1422,14 @@ logical_plan
 06)--------TableScan: t1 projection=[v0, v1]
 physical_plan
 01)ProjectionExec: expr=[v0@1 as v0, v1@2 as v1, sum(t1.v1)@0 as sum(t1.v1)]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(v0@1, v0@0)], projection=[sum(t1.v1)@0, v0@2, v1@3]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[sum(t1.v1)@1 as sum(t1.v1), v0@0 as v0]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([v0@0], 4), input_partitions=4
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------AggregateExec: mode=Partial, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
-12)------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(v0@1, v0@0)], projection=[sum(t1.v1)@0, v0@2, v1@3]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[sum(t1.v1)@1 as sum(t1.v1), v0@0 as v0]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
+06)----------RepartitionExec: partitioning=Hash([v0@0], 4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+09)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 SELECT *
@@ -1462,10 +1449,9 @@ logical_plan
 02)--TableScan: t0 projection=[v0, v1]
 03)--TableScan: t1 projection=[v0, v1]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IIII
 SELECT * FROM t0, LATERAL (SELECT * FROM t1 WHERE t0.v0 = t1.v0);
@@ -1509,7 +1495,7 @@ drop table t0;
 statement ok
 create table t1(v1 int, v2 int);
 
-query error DataFusion error: Schema error: No field named tt1.v2. Valid fields are tt1.v1.
+query error DataFusion error: Error during planning: Column in ORDER BY must be in GROUP BY or an aggregate function
 select v1 from t1 as tt1 natural join t1 as tt2 group by v1 order by v2;
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index a1efc1317b4aa..c16b3528aa7a5 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -55,11 +55,10 @@ logical_plan
 07)--------TableScan: annotated_data projection=[a, c]
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
-02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], file_type=csv, has_header=true
-05)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], file_type=csv, has_header=true
+04)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # preserve_inner_join
 query IIII nosort
@@ -96,13 +95,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
-03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
-04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], file_type=csv, has_header=true
-06)--------CoalesceBatchesExec: target_batch_size=8192
-07)----------FilterExec: d@3 = 3
-08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], file_type=csv, has_header=true
+05)------FilterExec: d@3 = 3
+06)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 # preserve_right_semi_join
 query II nosort
diff --git a/datafusion/sqllogictest/test_files/join_is_not_distinct_from.slt b/datafusion/sqllogictest/test_files/join_is_not_distinct_from.slt
index 0336cfc2d3314..8246f489c446d 100644
--- a/datafusion/sqllogictest/test_files/join_is_not_distinct_from.slt
+++ b/datafusion/sqllogictest/test_files/join_is_not_distinct_from.slt
@@ -80,10 +80,9 @@ logical_plan
 04)----TableScan: t2 projection=[id, val]
 physical_plan
 01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.explain.format = "tree";
@@ -103,25 +102,19 @@ physical_plan
 06)│          val: val         │
 07)└─────────────┬─────────────┘
 08)┌─────────────┴─────────────┐
-09)│    CoalesceBatchesExec    │
+09)│        HashJoinExec       │
 10)│    --------------------   │
-11)│     target_batch_size:    │
-12)│            8192           │
-13)└─────────────┬─────────────┘
-14)┌─────────────┴─────────────┐
-15)│        HashJoinExec       │
-16)│    --------------------   │
-17)│      NullsEqual: true     ├──────────────┐
-18)│                           │              │
-19)│      on: (val = val)      │              │
-20)└─────────────┬─────────────┘              │
-21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-22)│       DataSourceExec      ││       DataSourceExec      │
-23)│    --------------------   ││    --------------------   │
-24)│         bytes: 288        ││         bytes: 288        │
-25)│       format: memory      ││       format: memory      │
-26)│          rows: 1          ││          rows: 1          │
-27)└───────────────────────────┘└───────────────────────────┘
+11)│      NullsEqual: true     ├──────────────┐
+12)│                           │              │
+13)│      on: (val = val)      │              │
+14)└─────────────┬─────────────┘              │
+15)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+16)│       DataSourceExec      ││       DataSourceExec      │
+17)│    --------------------   ││    --------------------   │
+18)│         bytes: 288        ││         bytes: 288        │
+19)│       format: memory      ││       format: memory      │
+20)│          rows: 1          ││          rows: 1          │
+21)└───────────────────────────┘└───────────────────────────┘
 
 statement ok
 set datafusion.explain.format = "indent";
@@ -148,16 +141,14 @@ logical_plan
 05)----TableScan: t2 projection=[id, val]
 physical_plan
 01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4], NullsEqual: true
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------FilterExec: CAST(val@1 AS Int64) + 1 IS NOT DISTINCT FROM 11
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
-10)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
-11)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4], NullsEqual: true
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------FilterExec: CAST(val@1 AS Int64) + 1 IS NOT DISTINCT FROM 11
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)----ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
+09)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Mixed join predicate with `IS DISTINCT FROM` and `IS NOT DISTINCT FROM`
 query IIII rowsort
@@ -179,12 +170,11 @@ logical_plan
 04)----TableScan: t2 projection=[id, val]
 physical_plan
 01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4], NullsEqual: true
-04)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
-05)--------DataSourceExec: partitions=1, partition_sizes=[1]
-06)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
-07)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4], NullsEqual: true
+03)----ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)----ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
+06)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Test mixed equal and IS NOT DISTINCT FROM conditions
 # The `IS NOT DISTINCT FROM` expr should NOT in HashJoin's `on` predicate
@@ -200,10 +190,9 @@ logical_plan
 04)----TableScan: t2 projection=[id, val]
 physical_plan
 01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], filter=val@0 IS NOT DISTINCT FROM val@1
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], filter=val@0 IS NOT DISTINCT FROM val@1
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Test the mixed condition join result
 query IIII rowsort
@@ -241,14 +230,12 @@ logical_plan
 06)----TableScan: t0 projection=[val]
 physical_plan
 01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4], NullsEqual: true
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)--------CoalesceBatchesExec: target_batch_size=8192
-07)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
-08)------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4], NullsEqual: true
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
+07)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Test IS NOT DISTINCT FROM with multiple columns
 statement ok
@@ -285,13 +272,12 @@ JOIN t4 ON (t3.val1 IS NOT DISTINCT FROM t4.val1) AND (t3.val2 IS NOT DISTINCT F
 ----
 01)Projection: t3.id AS t3_id, t4.id AS t4_id, t3.val1, t4.val1, t3.val2, t4.val2
 01)ProjectionExec: expr=[id@0 as t3_id, id@3 as t4_id, val1@1 as val1, val1@4 as val1, val2@2 as val2, val2@5 as val2]
-02)--CoalesceBatchesExec: target_batch_size=8192
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)], NullsEqual: true
 02)--Inner Join: t3.val1 = t4.val1, t3.val2 = t4.val2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)], NullsEqual: true
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
 03)----TableScan: t3 projection=[id, val1, val2]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
+04)----DataSourceExec: partitions=1, partition_sizes=[1]
 04)----TableScan: t4 projection=[id, val1, val2]
-05)------DataSourceExec: partitions=1, partition_sizes=[1]
 logical_plan
 physical_plan
 
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 4bdf2e5da9632..38037ede21db2 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1339,14 +1339,12 @@ logical_plan
 05)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[t1_id@0 as t1_id], aggr=[]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-04)------AggregateExec: mode=Partial, gby=[t1_id@0 as t1_id], aggr=[]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+03)----AggregateExec: mode=Partial, gby=[t1_id@0 as t1_id], aggr=[]
+04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+07)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Join on struct
 query TT
@@ -1359,11 +1357,10 @@ logical_plan
 02)--TableScan: join_t3 projection=[s3]
 03)--TableScan: join_t4 projection=[s4]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s3@0, s4@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)------DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s3@0, s4@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ??
 select join_t3.s3, join_t4.s4
@@ -1397,14 +1394,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@1 as count(*)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[t1_id@0 as t1_id], aggr=[count(Int64(1))]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-05)--------AggregateExec: mode=Partial, gby=[t1_id@0 as t1_id], aggr=[count(Int64(1))]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+04)------AggregateExec: mode=Partial, gby=[t1_id@0 as t1_id], aggr=[count(Int64(1))]
+05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN
@@ -1426,14 +1421,12 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------RepartitionExec: partitioning=Hash([alias1@0], 2), input_partitions=2
-08)--------------AggregateExec: mode=Partial, gby=[t1_id@0 as alias1], aggr=[]
-09)----------------CoalesceBatchesExec: target_batch_size=2
-10)------------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
-12)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 2), input_partitions=2
+07)------------AggregateExec: mode=Partial, gby=[t1_id@0 as alias1], aggr=[]
+08)--------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+10)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+11)------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.explain.logical_plan_only = true;
@@ -1492,15 +1485,14 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
+08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1518,15 +1510,14 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
+08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Both side expr key inner join
 
@@ -1546,15 +1537,14 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
+08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1572,15 +1562,14 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
+08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Left side expr key inner join
 
@@ -1601,12 +1590,11 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1625,12 +1613,11 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Right side expr key inner join
 
@@ -1651,14 +1638,13 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1677,14 +1663,13 @@ logical_plan
 04)----TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Select wildcard with expr key inner join
 
@@ -1703,12 +1688,11 @@ logical_plan
 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)--------DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1725,12 +1709,11 @@ logical_plan
 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)--------DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 #####
 # Config teardown
@@ -2052,14 +2035,12 @@ physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id]
 02)--NestedLoopJoinExec: join_type=Inner, filter=t1_id@0 > t2_id@1
 03)----CoalescePartitionsExec
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)----CoalesceBatchesExec: target_batch_size=2
-09)------FilterExec: t1_id@0 > 10
-10)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)----------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----FilterExec: t1_id@0 > 10
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT join_t1.t1_id, join_t2.t2_id
@@ -2092,14 +2073,12 @@ logical_plan
 physical_plan
 01)NestedLoopJoinExec: join_type=Right, filter=t1_id@0 < t2_id@1
 02)--CoalescePartitionsExec
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------FilterExec: t1_id@0 > 22
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
-07)--CoalesceBatchesExec: target_batch_size=2
-08)----FilterExec: t2_id@0 > 11
-09)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)--------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----FilterExec: t1_id@0 > 22
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)--FilterExec: t2_id@0 > 11
+07)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT join_t1.t1_id, join_t2.t2_id
@@ -2571,11 +2550,10 @@ logical_plan
 04)--SubqueryAlias: t2
 05)----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(millis@2, millis@2)]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)------DataSourceExec: partitions=1, partition_sizes=[1]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(millis@2, millis@2)]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 # left_join_using_2
 query II
@@ -2743,17 +2721,13 @@ logical_plan
 04)--SubqueryAlias: t2
 05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 physical_plan
-01)SortMergeJoin: join_type=Inner, on=[(c1@0, c1@0)]
+01)SortMergeJoinExec: join_type=Inner, on=[(c1@0, c1@0)]
 02)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
-07)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
-08)----CoalesceBatchesExec: target_batch_size=2
-09)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)----------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
+06)----RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+07)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_date32 inner sort merge join on data type (Date32)
 query DDRTDDRT rowsort
@@ -2774,18 +2748,15 @@ logical_plan
 05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4]
-02)--SortMergeJoin: join_type=Right, on=[(CAST(t1.c3 AS Decimal128(10, 2))@4, c3@2)]
+02)--SortMergeJoinExec: join_type=Right, on=[(CAST(t1.c3 AS Decimal128(10, 2))@4, c3@2)]
 03)----SortExec: expr=[CAST(t1.c3 AS Decimal128(10, 2))@4 ASC], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------RepartitionExec: partitioning=Hash([CAST(t1.c3 AS Decimal128(10, 2))@4], 2), input_partitions=2
-06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, CAST(c3@2 AS Decimal128(10, 2)) as CAST(t1.c3 AS Decimal128(10, 2))]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)----SortExec: expr=[c3@2 ASC], preserve_partitioning=[true]
-10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=2
-12)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([CAST(t1.c3 AS Decimal128(10, 2))@4], 2), input_partitions=2
+05)--------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, CAST(c3@2 AS Decimal128(10, 2)) as CAST(t1.c3 AS Decimal128(10, 2))]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)----SortExec: expr=[c3@2 ASC], preserve_partitioning=[true]
+09)------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=1
+10)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_decimal right join on data type (Decimal)
 query DDRTDDRT rowsort
@@ -2837,12 +2808,11 @@ explain SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id I
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IT rowsort
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id IN (SELECT t2_id FROM left_semi_anti_join_table_t2 t2) ORDER BY t1_id
@@ -2873,12 +2843,11 @@ explain SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOI
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IT
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOIN left_semi_anti_join_table_t2 t2 ON (t1_id = t2_id) ORDER BY t1_id
@@ -2930,12 +2899,11 @@ explain SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id I
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IT rowsort
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id IN (SELECT t2_id FROM left_semi_anti_join_table_t2 t2) ORDER BY t1_id
@@ -2966,12 +2934,11 @@ explain SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOI
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IT
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOIN left_semi_anti_join_table_t2 t2 ON (t1_id = t2_id) ORDER BY t1_id
@@ -3024,12 +2991,11 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHER
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHERE EXISTS (SELECT * FROM right_semi_anti_join_table_t2 t2 where t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3041,12 +3007,11 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGH
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGHT SEMI JOIN right_semi_anti_join_table_t1 t1 on (t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3096,12 +3061,11 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHER
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHERE EXISTS (SELECT * FROM right_semi_anti_join_table_t2 t2 where t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3113,12 +3077,11 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGH
 ----
 physical_plan
 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGHT SEMI JOIN right_semi_anti_join_table_t1 t1 on (t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3194,17 +3157,13 @@ logical_plan
 08)------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [rn1@5 ASC NULLS LAST]
-02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-09)----CoalesceBatchesExec: target_batch_size=2
-10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--SortMergeJoinExec: join_type=Inner, on=[(a@1, a@1)]
+03)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+04)------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+05)--------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+07)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+08)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # sort merge join should propagate ordering equivalence of the right side
 # for right join. Hence final requirement rn1 ASC is already satisfied at
@@ -3228,22 +3187,18 @@ logical_plan
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST]
-02)--SortMergeJoin: join_type=Right, on=[(a@1, a@1)]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-12)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--SortMergeJoinExec: join_type=Right, on=[(a@1, a@1)]
+03)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+05)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+06)------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+07)--------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+08)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_existing_sort = false;
 
-# SortMergeJoin should add ordering equivalences of
+# SortMergeJoinExec should add ordering equivalences of
 # right table as lexicographical append to the global ordering
 # below query shouldn't add any SortExec for order by clause.
 # since its requirement is already satisfied at the output of SortMergeJoinExec
@@ -3269,22 +3224,15 @@ logical_plan
 10)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST]
-02)--SortExec: expr=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST], preserve_partitioning=[true]
-03)----SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
-04)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
-12)--------CoalesceBatchesExec: target_batch_size=2
-13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-17)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--SortMergeJoinExec: join_type=Inner, on=[(a@1, a@1)]
+03)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+04)------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+05)--------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+07)----RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1, maintains_sort_order=true
+08)------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+09)--------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+10)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_hash_join = true;
@@ -3314,12 +3262,11 @@ logical_plan
 07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+03)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+04)----BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
 # Hence final requirement rn1 ASC is already satisfied at the end of HashJoinExec.
@@ -3341,12 +3288,11 @@ logical_plan
 07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+01)HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true
+03)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+04)----BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test ordering preservation for RIGHT join
 query TT
@@ -3364,10 +3310,9 @@ logical_plan
 05)----SubqueryAlias: r_table
 06)------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@2, b@2)]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+01)HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@2, b@2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT l.a, LAST_VALUE(r.b ORDER BY r.a ASC NULLS FIRST) as last_col1
@@ -3389,10 +3334,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
 02)--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
-06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # create a table where there more than one valid ordering
 # that describes table.
@@ -3437,12 +3381,11 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd]
 02)--AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]], ordering_mode=Sorted
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
+05)------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+06)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # run query above in multiple partitions
 statement ok
@@ -3471,22 +3414,15 @@ logical_plan
 08)----------TableScan: annotated_data projection=[a, b]
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC]
-02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
-04)------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 2), input_partitions=2
-07)------------AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]
-08)--------------CoalesceBatchesExec: target_batch_size=2
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)]
-10)------------------CoalesceBatchesExec: target_batch_size=2
-11)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
-14)------------------CoalesceBatchesExec: target_batch_size=2
-15)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-16)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+02)--ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
+03)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
+04)------RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@0 ASC
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
+06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)]
+07)------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1, maintains_sort_order=true
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+09)------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1, maintains_sort_order=true
+10)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT *
@@ -3502,7 +3438,7 @@ logical_plan
 physical_plan
 01)NestedLoopJoinExec: join_type=Inner, filter=a@1 < a@0
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Currently datafusion can pushdown filter conditions with scalar UDF into
@@ -3520,11 +3456,10 @@ logical_plan
 05)----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)NestedLoopJoinExec: join_type=Inner, filter=example(join_proj_push_down_1@0, join_proj_push_down_2@1) > 3, projection=[a0@0, a@1, b@2, c@3, d@4, a0@6, a@7, b@8, c@9, d@10]
-02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_1]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-04)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
-05)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d, CAST(a@1 AS Float64) as join_proj_push_down_1], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+03)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
+04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 ####
 # Config teardown
@@ -3639,15 +3574,14 @@ logical_plan
 10)------EmptyRelation: rows=1
 physical_plan
 01)ProjectionExec: expr=[c@2 as c, d@3 as d, e@0 as e, f@1 as f]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Full, on=[(e@0, c@0)]
-04)------ProjectionExec: expr=[1 as e, 3 as f]
-05)--------PlaceholderRowExec
-06)------UnionExec
-07)--------ProjectionExec: expr=[1 as c, 2 as d]
-08)----------PlaceholderRowExec
-09)--------ProjectionExec: expr=[1 as c, 3 as d]
-10)----------PlaceholderRowExec
+02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(e@0, c@0)]
+03)----ProjectionExec: expr=[1 as e, 3 as f]
+04)------PlaceholderRowExec
+05)----UnionExec
+06)------ProjectionExec: expr=[1 as c, 2 as d]
+07)--------PlaceholderRowExec
+08)------ProjectionExec: expr=[1 as c, 3 as d]
+09)--------PlaceholderRowExec
 
 query IIII rowsort
 SELECT * FROM (
@@ -3682,15 +3616,14 @@ logical_plan
 10)------EmptyRelation: rows=1
 physical_plan
 01)ProjectionExec: expr=[c@2 as c, d@3 as d, e@0 as e, f@1 as f]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Full, on=[(e@0, c@0)]
-04)------ProjectionExec: expr=[1 as e, 3 as f]
-05)--------PlaceholderRowExec
-06)------UnionExec
-07)--------ProjectionExec: expr=[1 as c, 2 as d]
-08)----------PlaceholderRowExec
-09)--------ProjectionExec: expr=[1 as c, 3 as d]
-10)----------PlaceholderRowExec
+02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(e@0, c@0)]
+03)----ProjectionExec: expr=[1 as e, 3 as f]
+04)------PlaceholderRowExec
+05)----UnionExec
+06)------ProjectionExec: expr=[1 as c, 2 as d]
+07)--------PlaceholderRowExec
+08)------ProjectionExec: expr=[1 as c, 3 as d]
+09)--------PlaceholderRowExec
 
 query IIII rowsort
 SELECT * FROM (
@@ -3895,11 +3828,10 @@ logical_plan
 06)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@2 as a, b@3 as b, a@0 as a, b@1 as b]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
-04)------SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------DataSourceExec: partitions=1, partition_sizes=[2]
-06)------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
+03)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+04)------DataSourceExec: partitions=1, partition_sizes=[2]
+05)----DataSourceExec: partitions=1, partition_sizes=[2]
 
 
 
@@ -3953,10 +3885,9 @@ logical_plan
 05)----TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@2 as a, b@3 as b, a@0 as a, b@1 as b]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
-04)------DataSourceExec: partitions=1, partition_sizes=[2]
-05)------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
+03)----DataSourceExec: partitions=1, partition_sizes=[2]
+04)----DataSourceExec: partitions=1, partition_sizes=[2]
 
 
 # Null build indices:
@@ -4013,11 +3944,10 @@ logical_plan
 06)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@2 as a, b@3 as b, a@0 as a, b@1 as b]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
-04)------SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------DataSourceExec: partitions=1, partition_sizes=[2]
-06)------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)]
+03)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+04)------DataSourceExec: partitions=1, partition_sizes=[2]
+05)----DataSourceExec: partitions=1, partition_sizes=[2]
 
 
 # Test CROSS JOIN LATERAL syntax (planning)
@@ -4120,12 +4050,10 @@ physical_plan
 01)SortExec: expr=[sn@1 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ts@1 as ts, sn@0 as sn, amount@2 as amount, currency@3 as currency, CAST(amount@2 AS Float32) * last_value(e.rate)@4 as amount_usd]
 03)----AggregateExec: mode=Single, gby=[sn@1 as sn, ts@0 as ts, amount@2 as amount, currency@3 as currency], aggr=[last_value(e.rate)]
-04)------CoalesceBatchesExec: target_batch_size=3
-05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@3, currency_from@1)], filter=ts@0 >= ts@1, projection=[ts@0, sn@1, amount@2, currency@3, rate@6]
-06)----------DataSourceExec: partitions=1, partition_sizes=[0]
-07)----------CoalesceBatchesExec: target_batch_size=3
-08)------------FilterExec: currency_to@2 = USD, projection=[ts@0, currency_from@1, rate@3]
-09)--------------DataSourceExec: partitions=1, partition_sizes=[0]
+04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@3, currency_from@1)], filter=ts@0 >= ts@1, projection=[ts@0, sn@1, amount@2, currency@3, rate@6]
+05)--------DataSourceExec: partitions=1, partition_sizes=[0]
+06)--------FilterExec: currency_to@2 = USD, projection=[ts@0, currency_from@1, rate@3]
+07)----------DataSourceExec: partitions=1, partition_sizes=[0]
 
 statement ok
 DROP TABLE sales_global;
@@ -4164,11 +4092,10 @@ logical_plan
 03)----TableScan: left_table projection=[a, b, c]
 04)----TableScan: right_table projection=[x, y, z]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, y@1)], filter=a@0 < x@1
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----SortExec: expr=[x@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)------DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, y@1)], filter=a@0 < x@1
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--SortExec: expr=[x@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)----DataSourceExec: partitions=1, partition_sizes=[0]
 
 # Test full join with limit
 statement ok
@@ -4235,7 +4162,7 @@ logical_plan
 03)----TableScan: t0 projection=[c1, c2]
 04)----TableScan: t1 projection=[c1, c2, c3]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3, fetch=2
+01)GlobalLimitExec: skip=0, fetch=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)]
 03)----DataSourceExec: partitions=1, partition_sizes=[2]
 04)----DataSourceExec: partitions=1, partition_sizes=[2]
@@ -4265,7 +4192,7 @@ logical_plan
 03)----TableScan: t0 projection=[c1, c2]
 04)----TableScan: t1 projection=[c1, c2, c3]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3, fetch=2
+01)GlobalLimitExec: skip=0, fetch=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)], filter=c2@0 >= c2@1
 03)----DataSourceExec: partitions=1, partition_sizes=[2]
 04)----DataSourceExec: partitions=1, partition_sizes=[2]
@@ -4329,7 +4256,7 @@ logical_plan
 04)------TableScan: t1 projection=[a], fetch=2
 05)----TableScan: t2 projection=[b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3, fetch=2
+01)GlobalLimitExec: skip=0, fetch=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, b@0)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], limit=2, file_type=csv, has_header=true
 04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], file_type=csv, has_header=true
@@ -4363,7 +4290,7 @@ logical_plan
 04)----Limit: skip=0, fetch=2
 05)------TableScan: t2 projection=[b], fetch=2
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3, fetch=2
+01)GlobalLimitExec: skip=0, fetch=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@0, b@0)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], file_type=csv, has_header=true
 04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], limit=2, file_type=csv, has_header=true
@@ -4400,7 +4327,7 @@ logical_plan
 03)----TableScan: t1 projection=[a]
 04)----TableScan: t2 projection=[b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3, fetch=2
+01)GlobalLimitExec: skip=0, fetch=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(a@0, b@0)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], file_type=csv, has_header=true
 04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], file_type=csv, has_header=true
@@ -4442,10 +4369,9 @@ physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
 02)--AggregateExec: mode=Single, gby=[], aggr=[count(Int64(1))]
 03)----ProjectionExec: expr=[]
-04)------CoalesceBatchesExec: target_batch_size=3
-05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(binary_col@0, binary_col@0)]
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
-07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(binary_col@0, binary_col@0)]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Test hash join sort push down
 # Issue: https://github.com/apache/datafusion/issues/13559
@@ -4471,14 +4397,12 @@ logical_plan
 07)----------TableScan: test projection=[a, b]
 physical_plan
 01)SortPreservingMergeExec: [c@2 DESC]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)]
-04)------CoalescePartitionsExec
-05)--------CoalesceBatchesExec: target_batch_size=3
-06)----------FilterExec: b@1 > 3, projection=[a@0]
-07)------------DataSourceExec: partitions=2, partition_sizes=[1, 1]
-08)------SortExec: expr=[c@2 DESC], preserve_partitioning=[true]
-09)--------DataSourceExec: partitions=2, partition_sizes=[1, 1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)]
+03)----CoalescePartitionsExec
+04)------FilterExec: b@1 > 3, projection=[a@0]
+05)--------DataSourceExec: partitions=2, partition_sizes=[1, 1]
+06)----SortExec: expr=[c@2 DESC], preserve_partitioning=[true]
+07)------DataSourceExec: partitions=2, partition_sizes=[1, 1]
 
 query TT
 explain select * from test where a in (select a from test where b > 3) order by c desc nulls last;
@@ -4493,14 +4417,12 @@ logical_plan
 07)----------TableScan: test projection=[a, b]
 physical_plan
 01)SortPreservingMergeExec: [c@2 DESC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)]
-04)------CoalescePartitionsExec
-05)--------CoalesceBatchesExec: target_batch_size=3
-06)----------FilterExec: b@1 > 3, projection=[a@0]
-07)------------DataSourceExec: partitions=2, partition_sizes=[1, 1]
-08)------SortExec: expr=[c@2 DESC NULLS LAST], preserve_partitioning=[true]
-09)--------DataSourceExec: partitions=2, partition_sizes=[1, 1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)]
+03)----CoalescePartitionsExec
+04)------FilterExec: b@1 > 3, projection=[a@0]
+05)--------DataSourceExec: partitions=2, partition_sizes=[1, 1]
+06)----SortExec: expr=[c@2 DESC NULLS LAST], preserve_partitioning=[true]
+07)------DataSourceExec: partitions=2, partition_sizes=[1, 1]
 
 query III
 select * from test where a in (select a from test where b > 3) order by c desc nulls first;
@@ -4538,10 +4460,9 @@ logical_plan
 05)----SubqueryAlias: b
 06)------TableScan: person projection=[id, age, state]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2, state@5]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2, state@5]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 query TT
 explain SELECT age FROM (SELECT * FROM person a join person b using (id, age, state));
@@ -4554,10 +4475,9 @@ logical_plan
 05)----SubqueryAlias: b
 06)------TableScan: person projection=[id, age, state]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[age@1]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[age@1]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 query TT
 explain SELECT a.* FROM person a join person b using (id, age);
@@ -4570,10 +4490,9 @@ logical_plan
 05)----SubqueryAlias: b
 06)------TableScan: person projection=[id, age]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 query TT
 explain SELECT a.*, b.* FROM person a join person b using (id, age);
@@ -4585,10 +4504,9 @@ logical_plan
 04)--SubqueryAlias: b
 05)----TableScan: person projection=[id, age, state]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 query TT
 explain SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state);
@@ -4605,13 +4523,11 @@ logical_plan
 09)----SubqueryAlias: c
 10)------TableScan: person projection=[id, age, state]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2]
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2]
-03)----CoalesceBatchesExec: target_batch_size=3
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2]
-05)--------DataSourceExec: partitions=1, partition_sizes=[0]
-06)--------DataSourceExec: partitions=1, partition_sizes=[0]
-07)----DataSourceExec: partitions=1, partition_sizes=[0]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[0]
+05)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 query TT
 explain SELECT * FROM person a NATURAL JOIN lineitem b;
@@ -4637,10 +4553,9 @@ logical_plan
 04)----SubqueryAlias: lineitem2
 05)------TableScan: lineitem projection=[c1]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c1@0, c1@0)], projection=[c1@0]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c1@0, c1@0)], projection=[c1@0]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 statement count 0
 drop table person;
@@ -4773,12 +4688,11 @@ logical_plan
 02)--TableScan: person projection=[id]
 03)--TableScan: orders projection=[customer_id]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(person.id + Int64(10)@1, orders.customer_id * Int64(2)@1)], projection=[id@0, customer_id@2]
-03)----ProjectionExec: expr=[id@0 as id, CAST(id@0 AS Int64) + 10 as person.id + Int64(10)]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)----ProjectionExec: expr=[customer_id@0 as customer_id, CAST(customer_id@0 AS Int64) * 2 as orders.customer_id * Int64(2)]
-06)------DataSourceExec: partitions=1, partition_sizes=[0]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(person.id + Int64(10)@1, orders.customer_id * Int64(2)@1)], projection=[id@0, customer_id@2]
+02)--ProjectionExec: expr=[id@0 as id, CAST(id@0 AS Int64) + 10 as person.id + Int64(10)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)--ProjectionExec: expr=[customer_id@0 as customer_id, CAST(customer_id@0 AS Int64) * 2 as orders.customer_id * Int64(2)]
+05)----DataSourceExec: partitions=1, partition_sizes=[0]
 
 statement count 0
 drop table person;
@@ -4865,10 +4779,9 @@ logical_plan
 04)----TableScan: t2 projection=[k]
 physical_plan
 01)SortExec: TopK(fetch=2), expr=[k@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[1]
-05)------DataSourceExec: partitions=1, partition_sizes=[3334]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----DataSourceExec: partitions=1, partition_sizes=[3334]
 
 
 query II
@@ -4896,11 +4809,10 @@ logical_plan
 03)----TableScan: t1 projection=[k, v]
 04)----TableScan: t2 projection=[k]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----SortExec: expr=[k@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)------DataSourceExec: partitions=1, partition_sizes=[3334]
+01)HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+03)--SortExec: expr=[k@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)----DataSourceExec: partitions=1, partition_sizes=[3334]
 
 statement ok
 DROP TABLE t1;
@@ -4932,10 +4844,9 @@ JOIN t2 ON k1 = k2
 ----
 physical_plan
 01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k2@0, k1@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)------DataSourceExec: partitions=1, partition_sizes=[10000]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k2@0, k1@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query IIII
 SELECT sum(k1), sum(v1), sum(k2), sum(v2)
@@ -4953,10 +4864,9 @@ LEFT JOIN t2 ON k1 = k2
 ----
 physical_plan
 01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(k2@0, k1@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)------DataSourceExec: partitions=1, partition_sizes=[10000]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(k2@0, k1@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query IIII
 SELECT sum(k1), sum(v1), sum(k2), sum(v2)
@@ -4974,10 +4884,9 @@ RIGHT JOIN t2 ON k1 = k2
 ----
 physical_plan
 01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(k2@0, k1@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)------DataSourceExec: partitions=1, partition_sizes=[10000]
+02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(k2@0, k1@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query IIII
 SELECT sum(k1), sum(v1), sum(k2), sum(v2)
@@ -4994,10 +4903,9 @@ FROM t1
 LEFT SEMI JOIN t2 ON k1 = k2
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(k2@0, k1@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[10000]
+01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(k2@0, k1@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT sum(k1), sum(v1)
@@ -5014,10 +4922,9 @@ FROM t1
 RIGHT SEMI JOIN t2 ON k1 = k2
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(k2@0, k1@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[10000]
+01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(k2@0, k1@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT sum(k2), sum(v2)
@@ -5034,10 +4941,9 @@ FROM t1
 LEFT ANTI JOIN t2 ON k1 = k2
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[10000]
+01)HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT sum(k1), sum(v1)
@@ -5054,10 +4960,9 @@ FROM t1
 RIGHT ANTI JOIN t2 ON k1 = k2
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(k2@0, k1@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----DataSourceExec: partitions=1, partition_sizes=[10000]
+01)HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(k2@0, k1@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT sum(k2), sum(v2)
@@ -5075,10 +4980,9 @@ FULL JOIN t2 ON k1 = k2
 ----
 physical_plan
 01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=Full, on=[(k2@0, k1@0)]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)------DataSourceExec: partitions=1, partition_sizes=[10000]
+02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(k2@0, k1@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query IIII
 SELECT sum(k1), sum(v1), sum(k2), sum(v2)
@@ -5100,12 +5004,10 @@ WHERE k2 > 0
     )
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--FilterExec: k2@0 > 0 OR mark@2, projection=[k2@0, v2@1]
-03)----CoalesceBatchesExec: target_batch_size=3
-04)------HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(k2@0, k1@0)]
-05)--------DataSourceExec: partitions=1, partition_sizes=[0]
-06)--------DataSourceExec: partitions=1, partition_sizes=[10000]
+01)FilterExec: k2@0 > 0 OR mark@2, projection=[k2@0, v2@1]
+02)--HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(k2@0, k1@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT *
@@ -5127,10 +5029,9 @@ LEFT ANTI JOIN t2 ON k1 = k2
 ----
 physical_plan
 01)AggregateExec: mode=Single, gby=[v1@0 as v1], aggr=[]
-02)--CoalesceBatchesExec: target_batch_size=3
-03)----HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)], projection=[v1@1]
-04)------DataSourceExec: partitions=1, partition_sizes=[0]
-05)------DataSourceExec: partitions=1, partition_sizes=[10000]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)], projection=[v1@1]
+03)----DataSourceExec: partitions=1, partition_sizes=[0]
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query I
 SELECT distinct(v1)
@@ -5148,12 +5049,10 @@ LEFT ANTI JOIN t2 ON k1 = k2
 WHERE k1 < 0
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=3
-02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)]
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
-04)----CoalesceBatchesExec: target_batch_size=3
-05)------FilterExec: k1@0 < 0
-06)--------DataSourceExec: partitions=1, partition_sizes=[10000]
+01)HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)]
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
+03)--FilterExec: k1@0 < 0
+04)----DataSourceExec: partitions=1, partition_sizes=[10000]
 
 query II
 SELECT *
@@ -5220,12 +5119,10 @@ physical_plan
 01)SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
 03)----SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
-04)------CoalesceBatchesExec: target_batch_size=3
-05)--------FilterExec: t1_id@0 > 10
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
-07)----CoalesceBatchesExec: target_batch_size=3
-08)------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
-09)--------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------FilterExec: t1_id@0 > 10
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+07)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 DROP TABLE t1;
@@ -5238,3 +5135,66 @@ set datafusion.explain.physical_plan_only = false;
 
 statement ok
 set datafusion.optimizer.enable_piecewise_merge_join = false;
+
+# Test hash join with columns named c0, c1, c2
+# These names match the internal naming pattern in inlist_builder.rs
+# Regression test for https://github.com/apache/datafusion/pull/18393#discussion_r2601145291
+
+statement ok
+CREATE TABLE t1_c_source(c0 INT, c1 VARCHAR, c2 INT) AS VALUES
+(1, 'a', 100),
+(2, 'b', 200),
+(3, 'c', 300);
+
+statement ok
+CREATE TABLE t2_c_source(c0 INT, c1 VARCHAR) AS VALUES
+(1, 'x'),
+(3, 'z');
+
+query I
+COPY t1_c_source TO 'test_files/scratch/joins/t1_c.parquet' STORED AS PARQUET;
+----
+3
+
+query I
+COPY t2_c_source TO 'test_files/scratch/joins/t2_c.parquet' STORED AS PARQUET;
+----
+2
+
+statement ok
+CREATE EXTERNAL TABLE t1_c(c0 INT, c1 VARCHAR, c2 INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/joins/t1_c.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t2_c(c0 INT, c1 VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/joins/t2_c.parquet';
+
+# Test single-column join with column named c0
+query ITI rowsort
+SELECT t1.c0, t1.c1, t1.c2
+FROM t1_c t1
+INNER JOIN t2_c t2 ON t1.c0 = t2.c0;
+----
+1 a 100
+3 c 300
+
+# Test multi-column join with columns named c0, c1
+query ITI rowsort
+SELECT t1.c0, t1.c1, t1.c2
+FROM t1_c t1
+INNER JOIN t2_c t2 ON t1.c0 = t2.c0 AND t1.c1 = t2.c1;
+----
+
+statement ok
+DROP TABLE t1_c_source;
+
+statement ok
+DROP TABLE t2_c_source;
+
+statement ok
+DROP TABLE t1_c;
+
+statement ok
+DROP TABLE t2_c;
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index ae82aee5e1559..524304546d569 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -377,9 +377,8 @@ physical_plan
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------ProjectionExec: expr=[]
 07)------------GlobalLimitExec: skip=6, fetch=3
-08)--------------CoalesceBatchesExec: target_batch_size=8192, fetch=9
-09)----------------FilterExec: a@0 > 3
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)--------------FilterExec: a@0 > 3, fetch=9
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query I
 SELECT COUNT(*) FROM (SELECT a FROM t1 WHERE a > 3 LIMIT 3 OFFSET 6);
@@ -405,11 +404,9 @@ logical_plan
 02)--TableScan: t1000 projection=[i]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[i@0 as i], aggr=[]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([i@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
-06)----------DataSourceExec: partitions=1
+02)--RepartitionExec: partitioning=Hash([i@0], 4), input_partitions=1
+03)----AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
+04)------DataSourceExec: partitions=1
 
 statement ok
 set datafusion.explain.show_sizes = true;
@@ -637,11 +634,10 @@ physical_plan
 02)--SortPreservingMergeExec: [b@0 DESC], fetch=3
 03)----SortExec: TopK(fetch=3), expr=[b@0 DESC], preserve_partitioning=[true]
 04)------AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(ordered_table.a)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(ordered_table.a)]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(ordered_table.a)]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], file_type=csv, has_header=true
 
 # Applying offset & limit when multiple streams from union
 # the plan must still have a global limit to apply the offset
@@ -666,7 +662,7 @@ physical_plan
 03)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true]
 04)------UnionExec
 05)--------ProjectionExec: expr=[CAST(c@0 AS Int64) as c]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 08)--------ProjectionExec: expr=[CAST(d@0 AS Int64) as c]
 09)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
diff --git a/datafusion/sqllogictest/test_files/listing_table_statistics.slt b/datafusion/sqllogictest/test_files/listing_table_statistics.slt
index 37daf551c2c39..4298320d4aaba 100644
--- a/datafusion/sqllogictest/test_files/listing_table_statistics.slt
+++ b/datafusion/sqllogictest/test_files/listing_table_statistics.slt
@@ -35,7 +35,7 @@ query TT
 explain format indent select * from t;
 ----
 logical_plan TableScan: t projection=[int_col, str_col]
-physical_plan DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/2.parquet]]}, projection=[int_col, str_col], file_type=parquet, statistics=[Rows=Exact(4), Bytes=Exact(212), [(Col[0]: Min=Exact(Int64(-1)) Max=Exact(Int64(3)) Null=Exact(0)),(Col[1]: Min=Exact(Utf8View("a")) Max=Exact(Utf8View("d")) Null=Exact(0))]]
+physical_plan DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/2.parquet]]}, projection=[int_col, str_col], file_type=parquet, statistics=[Rows=Exact(4), Bytes=Absent, [(Col[0]: Min=Exact(Int64(-1)) Max=Exact(Int64(3)) Null=Exact(0) ScanBytes=Exact(32)),(Col[1]: Min=Exact(Utf8View("a")) Max=Exact(Utf8View("d")) Null=Exact(0) ScanBytes=Inexact(100))]]
 
 statement ok
 drop table t;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index a3234b4e7ee52..7ea54464d3e99 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -43,8 +43,8 @@ LOCATION '../core/tests/data/parquet_map.parquet';
 query TTT
 describe data;
 ----
-ints Map("entries": Struct("key": Utf8, "value": Int64), unsorted) NO
-strings Map("entries": Struct("key": Utf8, "value": Utf8), unsorted) NO
+ints Map("entries": non-null Struct("key": non-null Utf8, "value": non-null Int64), unsorted) NO
+strings Map("entries": non-null Struct("key": non-null Utf8, "value": non-null Utf8), unsorted) NO
 timestamp Utf8View NO
 
 query ??T
@@ -113,9 +113,8 @@ logical_plan
 01)Filter: table_with_map.int_field > Int64(0)
 02)--TableScan: table_with_map projection=[int_field, map_field]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: int_field@0 > 0
-03)----DataSourceExec: partitions=1, partition_sizes=[0]
+01)FilterExec: int_field@0 > 0
+02)--DataSourceExec: partitions=1, partition_sizes=[0]
 
 statement ok
 drop table table_with_map;
@@ -175,6 +174,16 @@ SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]);
 ----
 {POST: 41, HEAD: 33, PATCH: 30}
 
+query ?
+SELECT MAP('type', 'test');
+----
+{type: test}
+
+query ?
+SELECT MAP('a', 2, 'b', 3);
+----
+{a: 2, b: 3}
+
 query ?
 SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
 ----
@@ -188,10 +197,10 @@ SELECT MAP([[1,2], [3,4]], ['a', 'b']);
 query error
 SELECT MAP()
 
-query error DataFusion error: Execution error: map function requires 2 arguments, got 1
+query error DataFusion error: Error during planning: make_map requires an even number of arguments
 SELECT MAP(['POST', 'HEAD'])
 
-query error DataFusion error: Execution error: Expected list, large_list or fixed_size_list, got Null
+query error DataFusion error: Execution error: map key cannot be null
 SELECT MAP(null, [41, 33, 30]);
 
 query error DataFusion error: Execution error: map requires key and value lists to have the same length
@@ -233,7 +242,7 @@ SELECT map(column5, column6) FROM duplicate_keys_table;
 
 # key is a nested type
 query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[1, 2\]
-SELECT MAP([[1,2], [1,2], [NULL]], [41, 33, null]);
+SELECT MAP([[1,2], [1,2]], [41, 33]);
 
 query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[\{1:1\}\]
 SELECT MAP([Map {1:'1'}, Map {1:'1'}, Map {2:'2'}], [41, 33, null]);
@@ -281,8 +290,12 @@ SELECT map(column8, column9) FROM t;
 {[4]: b}
 {[1, 2]: c}
 
-query error
+query ?
 SELECT map(column6, column7) FROM t;
+----
+{[1, 2]: POST}
+{[3]: PUT}
+{[5]: NULL}
 
 query ?
 select Map {column6: column7} from t;
@@ -544,11 +557,19 @@ SELECT (CASE WHEN 1 > 0 THEN MAP {'x': 100} ELSE MAP {'y': 200} END)['x'];
 ----
 100
 
-# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
-# query ?
-# SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}];
-# ----
-# 1
+# fix accessing map with nested key
+query I
+SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}];
+----
+1
+
+query I
+SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {2:'b', 1:'a'}];
+----
+NULL
+
+# TODO(https://github.com/apache/datafusion/pull/18394): Test accessing map with empty map as key
+# TODO(https://github.com/apache/datafusion/pull/18394): Test accessing map with null map as key
 
 # accessing map with non-string key
 query I
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index edba5354e001d..53cf17fe7a545 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -696,8 +696,103 @@ query error DataFusion error: Arrow error: Compute error: Signed integer overflo
 select lcm(2, 9223372036854775803);
 
 
-query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2107754225 \^ 1221660777
+## pow/power
+
+# pow() with integer base and negative float exponent (verifies type coercion)
+query R
+SELECT pow(2, -0.5)
+----
+0.707106781187
+
+# pow() with negative integer base and negative float exponent (returns NaN)
+query R
+SELECT pow(-2, -0.5)
+----
+NaN
+
+# pow() with zero base and negative exponent (returns Infinity)
+query R
+SELECT pow(0, -0.5)
+----
+Infinity
+
+# pow() with integer base of 1 and negative exponent
+query R
+SELECT pow(1, -0.5)
+----
+1
+
+# pow() with large integer base and small negative exponent
+query R
+SELECT pow(1000, -0.1)
+----
+0.501187233627
+
+# pow() with integer base and negative integer exponent returns float (like PostgreSQL)
+query R
+SELECT pow(2, -2)
+----
+0.25
+
+# power() with very large exponent returns infinity (Float64 behavior)
+query R
 select power(2107754225, 1221660777);
+----
+Infinity
+
+query R rowsort
+select power(base::double, exponent::double)
+from values
+  (2.0, 2.0),
+  (5.0, 4.0),
+  (2.0, 3.0),
+  (3.0, 4.0) as t(base, exponent);
+----
+4
+625
+8
+81
+
+query R rowsort
+select power(base::bigint, exponent::bigint)
+from values
+  (2, 2),
+  (5, 4),
+  (2, 3),
+  (3, 4),
+  (2, NULL) as t(base, exponent);
+----
+4
+625
+8
+81
+NULL
+
+query RT rowsort
+select
+  power(base::decimal(38, 0), exponent::decimal(38, 0)),
+  arrow_typeof(power(base::decimal(38, 0), exponent::decimal(38, 0)))
+from values
+  (0, 4),
+  (5, 0),
+  (2, 2),
+  (5, 4),
+  (2, 3),
+  (3, 4) as t(base, exponent);
+----
+0 Decimal128(38, 0)
+1 Decimal128(38, 0)
+4 Decimal128(38, 0)
+625 Decimal128(38, 0)
+8 Decimal128(38, 0)
+81 Decimal128(38, 0)
+
+query RT
+select
+  pow(2.5::decimal(2, 1), 4::bigint),
+  arrow_typeof(pow(2.5::decimal(2, 1), 4::bigint));
+----
+39 Decimal128(2, 1)
 
 # factorial overflow
 query error DataFusion error: Arrow error: Compute error: Overflow happened on FACTORIAL\(350943270\)
@@ -713,9 +808,7 @@ EXPLAIN SELECT log(NULL, c2) from aggregate_simple;
 logical_plan
 01)Projection: Float64(NULL) AS log(NULL,aggregate_simple.c2)
 02)--TableScan: aggregate_simple projection=[]
-physical_plan
-01)ProjectionExec: expr=[NULL as log(NULL,aggregate_simple.c2)]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_simple.csv]]}, file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_simple.csv]]}, projection=[NULL as log(NULL,aggregate_simple.c2)], file_type=csv, has_header=true
 
 # Float 16/32/64 for log
 query RT
diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt
index 8753d39cb7ef7..41a511b5fa09d 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -235,7 +235,62 @@ order by 1 asc nulls last;
 3 1
 NULL 1
 
+# Regression test: first_value should preserve metadata
+query IT
+select first_value(id order by id asc nulls last), arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')
+from table_with_metadata;
+----
+1 the id field
+
+# Regression test: last_value should preserve metadata
+query IT
+select last_value(id order by id asc nulls first), arrow_metadata(last_value(id order by id asc nulls first), 'metadata_key')
+from table_with_metadata;
+----
+3 the id field
 
+# Regression test: DISTINCT ON should preserve metadata (uses first_value internally)
+query ITTT
+select distinct on (id) id, arrow_metadata(id, 'metadata_key'), name, arrow_metadata(name, 'metadata_key')
+from table_with_metadata order by id asc nulls last;
+----
+1 the id field NULL the name field
+3 the id field baz the name field
+NULL the id field bar the name field
+
+# Regression test: DISTINCT should preserve metadata
+query ITTT
+with res AS (
+  select distinct id, name from table_with_metadata
+)
+select id, arrow_metadata(id, 'metadata_key'), name, arrow_metadata(name, 'metadata_key')
+from res
+order by id asc nulls last;
+----
+1 the id field NULL the name field
+3 the id field baz the name field
+NULL the id field bar the name field
+
+# Regression test: grouped columns should preserve metadata
+query ITTT
+with res AS (
+  select name, count(*), id
+  from table_with_metadata
+  group by id, name
+)
+select id, arrow_metadata(id, 'metadata_key'), name, arrow_metadata(name, 'metadata_key')
+from res
+order by id asc nulls last, name asc nulls last
+----
+1 the id field NULL the name field
+3 the id field baz the name field
+NULL the id field bar the name field
+
+# Test arrow_metadata with single argument (returns Map)
+query ?
+select arrow_metadata(id) from table_with_metadata limit 1;
+----
+{metadata_key: the id field}
 
 statement ok
 drop table table_with_metadata;
diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
index 9c806cfa0d8aa..7feefc169fcab 100644
--- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
+++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
@@ -46,7 +46,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a_big@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[CAST(a@0 AS Int64) as a_big, b@1 as b]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
@@ -62,7 +62,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST, b@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Cast to larger types as well as preserving ordering
@@ -83,7 +83,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a_big@1 ASC NULLS LAST, b@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # test for common rename
@@ -97,9 +97,7 @@ logical_plan
 01)Sort: a_big ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
 02)--Projection: multiple_ordered_table.a, multiple_ordered_table.a AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, a@0 as a_big, b@1 as b]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, a@1 as a_big, b], output_ordering=[a@0 ASC NULLS LAST, b@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN
@@ -111,9 +109,7 @@ logical_plan
 01)Sort: multiple_ordered_table.a ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
 02)--Projection: multiple_ordered_table.a, multiple_ordered_table.a AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, a@0 as a_big, b@1 as b]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, a@1 as a_big, b], output_ordering=[a@0 ASC NULLS LAST, b@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 
 # test for cast Utf8
@@ -135,7 +131,7 @@ physical_plan
 01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 02)--SortExec: expr=[a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[CAST(a@0 AS Utf8View) as a_str, b@1 as b]
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # We cannot determine a+b is ordered from the
@@ -170,5 +166,5 @@ physical_plan
 01)SortPreservingMergeExec: [sum_expr@0 ASC NULLS LAST]
 02)--SortExec: expr=[sum_expr@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[CAST(a@0 + b@1 AS Int64) as sum_expr, a@0 as a, b@1 as b]
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
diff --git a/datafusion/sqllogictest/test_files/named_arguments.slt b/datafusion/sqllogictest/test_files/named_arguments.slt
index 4eab799fd261a..07b6cc6a79a0c 100644
--- a/datafusion/sqllogictest/test_files/named_arguments.slt
+++ b/datafusion/sqllogictest/test_files/named_arguments.slt
@@ -79,13 +79,14 @@ SELECT substr(Str => 'hello world', Start_Pos => 7);
 ----
 world
 
-# Error: case-sensitive quoted parameter names don't match
+# Error: quoted identifiers are case-sensitive per SQL standards
+# "STR" does not match parameter "str" (wrong case)
 query error DataFusion error: Error during planning: Unknown parameter name 'STR'
 SELECT substr("STR" => 'hello world', "start_pos" => 7);
 
 # Error: wrong number of arguments
 # This query provides only 1 argument but substr requires 2 or 3
-query error DataFusion error: Error during planning: Execution error: Function 'substr' user-defined coercion failed with "Error during planning: The substr function requires 2 or 3 arguments, but got 1."
+query error Function 'substr' failed to match any signature
 SELECT substr(str => 'hello world');
 
 #############
diff --git a/datafusion/sqllogictest/test_files/operator.slt b/datafusion/sqllogictest/test_files/operator.slt
index 6f3c40188172d..e50fa721c8850 100644
--- a/datafusion/sqllogictest/test_files/operator.slt
+++ b/datafusion/sqllogictest/test_files/operator.slt
@@ -287,9 +287,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 < 5 AND uint64 < 5 AND float64 < 5 AND decimal < 5;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: int64@3 < 5 AND uint64@7 < 5 AND float64@9 < 5 AND decimal@10 < Some(500),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: int64@3 < 5 AND uint64@7 < 5 AND float64@9 < 5 AND decimal@10 < Some(500),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## < negative  integer (expect no casts)
 query TT
@@ -297,9 +296,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 < -5 AND uint64 < -5 AND float64 < -5 AND decimal < -5;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: int64@3 < -5 AND CAST(uint64@7 AS Decimal128(20, 0)) < Some(-5),20,0 AND float64@9 < -5 AND decimal@10 < Some(-500),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: int64@3 < -5 AND CAST(uint64@7 AS Decimal128(20, 0)) < Some(-5),20,0 AND float64@9 < -5 AND decimal@10 < Some(-500),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## < decimal (expect casts for integers to float)
 query TT
@@ -307,9 +305,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 < 5.1 AND uint64 < 5.1 AND float64 < 5.1 AND decimal < 5.1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND decimal@10 < Some(510),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND decimal@10 < Some(510),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## < negative decimal (expect casts for integers to float)
 query TT
@@ -317,9 +314,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 < -5.1 AND uint64 < -5.1 AND float64 < -5.1 AND decimal < -5.1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND decimal@10 < Some(-510),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND decimal@10 < Some(-510),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 ############### Equality ###############
@@ -330,9 +326,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 = 5 AND uint64 = 5 AND float64 = 5 AND decimal = 5;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: int64@3 = 5 AND uint64@7 = 5 AND float64@9 = 5 AND decimal@10 = Some(500),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: int64@3 = 5 AND uint64@7 = 5 AND float64@9 = 5 AND decimal@10 = Some(500),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## = negative  integer (expect no casts)
 query TT
@@ -340,9 +335,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 = -5 AND uint64 = -5 AND float64 = -5 AND decimal = -5;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: int64@3 = -5 AND CAST(uint64@7 AS Decimal128(20, 0)) = Some(-5),20,0 AND float64@9 = -5 AND decimal@10 = Some(-500),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: int64@3 = -5 AND CAST(uint64@7 AS Decimal128(20, 0)) = Some(-5),20,0 AND float64@9 = -5 AND decimal@10 = Some(-500),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## = decimal (expect casts for integers to float)
 query TT
@@ -350,9 +344,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 = 5.1 AND uint64 = 5.1 AND float64 = 5.1 AND decimal = 5.1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND decimal@10 = Some(510),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND decimal@10 = Some(510),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ## = negative decimal (expect casts for integers to float)
 query TT
@@ -360,9 +353,8 @@ EXPLAIN SELECT * FROM numeric_types
 WHERE  int64 = -5.1 AND uint64 = -5.1 AND float64 = -5.1 AND decimal = -5.1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND decimal@10 = Some(-510),5,2
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND decimal@10 = Some(-510),5,2
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/options.slt b/datafusion/sqllogictest/test_files/options.slt
index 71ff12e8cc507..0d1583dbc0086 100644
--- a/datafusion/sqllogictest/test_files/options.slt
+++ b/datafusion/sqllogictest/test_files/options.slt
@@ -23,7 +23,6 @@
 statement ok
 create table a(c0 int) as values (1), (2);
 
-# Expect coalesce and default batch size
 query TT
 explain SELECT * FROM a WHERE c0 < 1;
 ----
@@ -31,9 +30,8 @@ logical_plan
 01)Filter: a.c0 < Int32(1)
 02)--TableScan: a projection=[c0]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: c0@0 < 1
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: c0@0 < 1
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 ##
 # test_disable_coalesce
@@ -72,9 +70,8 @@ logical_plan
 01)Filter: a.c0 < Int32(1)
 02)--TableScan: a projection=[c0]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=1234
-02)--FilterExec: c0@0 < 1
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: c0@0 < 1
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index a73f56079e3fa..7c857cae36971 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -419,6 +419,42 @@ select column1 + column2 from foo group by column1, column2 ORDER BY column2 des
 7
 3
 
+# Test ordering by aggregate on non-selected column (issue #18683)
+# Previously failed with "Schema error: No field named foo.column2"
+query I
+select column1 from foo group by column1 order by min(column2);
+----
+1
+3
+5
+
+# Test ordering by aggregate expression on non-selected columns
+query I
+select column1 from foo group by column1 order by min(column2) + max(column2);
+----
+1
+3
+5
+
+# Test ordering by multiple aggregates on non-selected columns
+query I
+select column1 from foo group by column1 order by min(column2), max(column2);
+----
+1
+3
+5
+
+# Test GROUP BY alias with ORDER BY column index
+# Regression test: GROUP BY an aliased column, ORDER BY using column index
+query TI
+with t as (select 'foo' as x)
+select x, count(*) as "Count"
+from t
+group by x
+order by 2 desc;
+----
+foo 1
+
 # Test issue: https://github.com/apache/datafusion/issues/11549
 query I
 select column1 from foo order by log(column2);
@@ -561,7 +597,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [result@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[b@1 + a@0 + c@2 as result]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_orderings=[[a@0 ASC NULLS LAST], [b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 statement ok
@@ -592,7 +628,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [db15@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0, 1659537600000000000) as db15]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=false
 
 query TT
@@ -607,7 +643,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [dt_day@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[date_trunc(DAY, ts@0) as dt_day]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=false
 
 statement ok
@@ -650,7 +686,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [atan_c11@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[atan(c11@0) as atan_c11]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11], output_ordering=[c11@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
@@ -665,7 +701,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [ceil_c11@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[ceil(c11@0) as ceil_c11]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11], output_ordering=[c11@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
@@ -680,7 +716,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [log_c11_base_c12@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[log(c12@1, c11@0) as log_c11_base_c12]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -695,7 +731,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [log_c12_base_c11@0 DESC NULLS LAST]
 02)--ProjectionExec: expr=[log(c11@0, c12@1) as log_c12_base_c11]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], file_type=csv, has_header=true
 
 statement ok
@@ -893,20 +929,16 @@ physical_plan
 03)----InterleaveExec
 04)------ProjectionExec: expr=[0 as m, t@0 as t]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=2
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
-10)------------------ProjectionExec: expr=[column1@0 as t]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
-12)------ProjectionExec: expr=[1 as m, t@0 as t]
-13)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
-14)----------CoalesceBatchesExec: target_batch_size=8192
-15)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=2
-16)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
-18)------------------ProjectionExec: expr=[column1@0 as t]
-19)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
+08)--------------ProjectionExec: expr=[column1@0 as t]
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+10)------ProjectionExec: expr=[1 as m, t@0 as t]
+11)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
+12)----------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=1
+13)------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
+14)--------------ProjectionExec: expr=[column1@0 as t]
+15)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 #####
 # Multi column sorting with lists
@@ -1057,8 +1089,8 @@ SELECT SUM(column1) FROM foo ORDER BY SUM(column1)
 ----
 16
 
-# Order by unprojected aggregate expressions is not supported
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression AggregateFunction
+# Order by unprojected aggregate expressions requires GROUP BY
+query error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function
 SELECT column2 FROM foo ORDER BY SUM(column1)
 
 statement ok
@@ -1145,7 +1177,7 @@ physical_plan
 01)SortPreservingMergeExec: [c_str@0 ASC NULLS LAST], fetch=5
 02)--SortExec: TopK(fetch=5), expr=[c_str@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[CAST(c@0 AS Utf8View) as c_str]
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 
@@ -1175,7 +1207,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c_bigint@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[CAST(c@0 AS Int64) as c_bigint]
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -1211,7 +1243,7 @@ physical_plan
 01)SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
 02)--SortExec: TopK(fetch=5), expr=[abs_c@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[abs(c@0) as abs_c]
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -1245,7 +1277,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[abs(c@0) as abs_c]
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Boolean to integer casts preserve the order.
@@ -1271,7 +1303,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[CAST(inc_col@0 > desc_col@1 AS Int32) as c]
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[inc_col, desc_col], output_orderings=[[inc_col@0 ASC NULLS LAST], [desc_col@1 DESC]], file_type=csv, has_header=true
 
 # Union a query with the actual data and one with a constant
@@ -1294,7 +1326,7 @@ logical_plan
 03)----TableScan: ordered_table projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@0 + b@1 as sum1]
-02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 03)----SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], file_type=csv, has_header=true
 
@@ -1333,7 +1365,7 @@ logical_plan
 03)----TableScan: ordered_table projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@0 + b@1 as sum1]
-02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 03)----SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], file_type=csv, has_header=true
 
@@ -1359,10 +1391,8 @@ physical_plan
 01)SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2
 02)--SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----UnionExec
-04)------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true
-06)------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d]
-07)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, a, NULL as a0, d], output_ordering=[c@1 ASC NULLS LAST], file_type=csv, has_header=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, NULL as a, a0, d], output_ordering=[c@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test: run the query from above
 query IIIII
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index c21f3129d4ee9..c786f7bdc77cb 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -454,10 +454,9 @@ logical_plan
 01)Filter: CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%")
 02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%")]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
+01)FilterExec: CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
 
 
 statement ok
@@ -502,10 +501,9 @@ logical_plan
 01)Filter: binary_as_string_option.binary_col LIKE Utf8View("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8View("%a%")
 02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8View("%a%"), binary_as_string_option.largebinary_col LIKE Utf8View("%a%"), binary_as_string_option.binaryview_col LIKE Utf8View("%a%")]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
+01)FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
 
 
 statement ok
@@ -553,10 +551,9 @@ logical_plan
 01)Filter: binary_as_string_both.binary_col LIKE Utf8View("%a%") AND binary_as_string_both.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_both.binaryview_col LIKE Utf8View("%a%")
 02)--TableScan: binary_as_string_both projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_both.binary_col LIKE Utf8View("%a%"), binary_as_string_both.largebinary_col LIKE Utf8View("%a%"), binary_as_string_both.binaryview_col LIKE Utf8View("%a%")]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
+01)FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], file_type=parquet, predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
 
 
 statement ok
@@ -668,10 +665,9 @@ logical_plan
 01)Filter: foo.column1 LIKE Utf8View("f%")
 02)--TableScan: foo projection=[column1], partial_filters=[foo.column1 LIKE Utf8View("f%")]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 LIKE f%
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 LIKE f%, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= g AND f <= column1_max@1, required_guarantees=[]
+01)FilterExec: column1@0 LIKE f%
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 LIKE f%, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= g AND f <= column1_max@1, required_guarantees=[]
 
 statement ok
 drop table foo
diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
index 0166cd2572ce6..8bb79d576990e 100644
--- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
+++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
@@ -95,10 +95,9 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST]
 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: b@1 > 2, projection=[a@0]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
+03)----FilterExec: b@1 > 2, projection=[a@0]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
 
 query TT
 EXPLAIN select a from t_pushdown where b > 2 ORDER BY a;
@@ -133,11 +132,9 @@ logical_plan
 04)------TableScan: t projection=[a, b], partial_filters=[t.b = Int32(2)]
 physical_plan
 01)CoalescePartitionsExec
-02)--ProjectionExec: expr=[a@0 as a]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: b@1 = 2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)]
+02)--FilterExec: b@1 = 2, projection=[a@0]
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)]
 
 query TT
 EXPLAIN select a from t_pushdown where b = 2 ORDER BY b;
@@ -266,10 +263,9 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST]
 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: b@1 > 2, projection=[a@0]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
+03)----FilterExec: b@1 > 2, projection=[a@0]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
 
 query TT
 EXPLAIN select a from t_pushdown where b > 2 ORDER BY a;
@@ -304,11 +300,9 @@ logical_plan
 04)------TableScan: t projection=[a, b], partial_filters=[t.b = Int32(2)]
 physical_plan
 01)CoalescePartitionsExec
-02)--ProjectionExec: expr=[a@0 as a]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: b@1 = 2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)]
+02)--FilterExec: b@1 = 2, projection=[a@0]
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)]
 
 query TT
 EXPLAIN select a from t_pushdown where b = 2 ORDER BY b;
@@ -344,10 +338,9 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST]
 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: b@1 > 2, projection=[a@0]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
-06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
+03)----FilterExec: b@1 > 2, projection=[a@0]
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[]
 
 query T
 select a from t_pushdown where b = 2 ORDER BY b;
@@ -416,9 +409,8 @@ logical_plan
 02)--Filter: CAST(t_pushdown.b AS Float64) > random()
 03)----TableScan: t_pushdown projection=[a, b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(b@1 AS Float64) > random(), projection=[a@0]
-03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet
+01)FilterExec: CAST(b@1 AS Float64) > random(), projection=[a@0]
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet
 
 ## cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
index fe909e70ffb00..5a559bdb94835 100644
--- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
@@ -274,5 +274,4 @@ logical_plan
 02)--TableScan: test_table projection=[constant_col]
 physical_plan
 01)SortPreservingMergeExec: [constant_col@0 ASC NULLS LAST]
-02)--SortExec: expr=[constant_col@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet
diff --git a/datafusion/sqllogictest/test_files/parquet_statistics.slt b/datafusion/sqllogictest/test_files/parquet_statistics.slt
index c04235ef4ee6f..8c77fb96ba75c 100644
--- a/datafusion/sqllogictest/test_files/parquet_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_statistics.slt
@@ -59,10 +59,9 @@ query TT
 EXPLAIN SELECT * FROM test_table WHERE column1 = 1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192, statistics=[Rows=Inexact(2), Bytes=Inexact(31), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0))]]
-02)--FilterExec: column1@0 = 1, statistics=[Rows=Inexact(2), Bytes=Inexact(31), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0))]]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Inexact(5), Bytes=Inexact(121), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0))]]
-04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Inexact(5), Bytes=Inexact(121), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0))]]
+01)FilterExec: column1@0 = 1, statistics=[Rows=Inexact(2), Bytes=Inexact(10), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0) ScanBytes=Inexact(40))]]
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0) ScanBytes=Inexact(40))]]
+03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0) ScanBytes=Inexact(40))]]
 
 # cleanup
 statement ok
@@ -85,10 +84,9 @@ query TT
 EXPLAIN SELECT * FROM test_table WHERE column1 = 1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192, statistics=[Rows=Inexact(2), Bytes=Inexact(31), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0))]]
-02)--FilterExec: column1@0 = 1, statistics=[Rows=Inexact(2), Bytes=Inexact(31), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0))]]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Inexact(5), Bytes=Inexact(121), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0))]]
-04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Inexact(5), Bytes=Inexact(121), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0))]]
+01)FilterExec: column1@0 = 1, statistics=[Rows=Inexact(2), Bytes=Inexact(10), [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)) Null=Inexact(0) ScanBytes=Inexact(40))]]
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0) ScanBytes=Inexact(40))]]
+03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(4)) Null=Inexact(0) ScanBytes=Inexact(40))]]
 
 # cleanup
 statement ok
@@ -112,10 +110,9 @@ query TT
 EXPLAIN SELECT * FROM test_table WHERE column1 = 1;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(1)))]]
-02)--FilterExec: column1@0 = 1, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)))]]
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:)]]
-04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:)]]
+01)FilterExec: column1@0 = 1, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]: Min=Exact(Int64(1)) Max=Exact(Int64(1)))]]
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:)]]
+03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/test_table/1.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 = 1, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= 1 AND 1 <= column1_max@1, required_guarantees=[column1 in (1)], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:)]]
 
 # cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index 77ee3e4f05a0d..7d33814b8bdbf 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -668,20 +668,15 @@ logical_plan
 05)----Filter: (part.p_brand = Utf8View("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8View("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8View("Brand#34") AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1)
 06)------TableScan: part projection=[p_partkey, p_brand, p_size], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8View("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8View("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8View("Brand#34") AND part.p_size <= Int32(15)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_partkey@0]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_quantity], file_type=csv, has_header=true
-09)----CoalesceBatchesExec: target_batch_size=8192
-10)------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-11)--------CoalesceBatchesExec: target_batch_size=8192
-12)----------FilterExec: (p_brand@1 = Brand#12 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_size@2 <= 15) AND p_size@2 >= 1
-13)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-14)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand, p_size], file_type=csv, has_header=true
+01)HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_partkey@0]
+02)--RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+03)----FilterExec: l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_quantity], file_type=csv, has_header=true
+06)--RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+07)----FilterExec: (p_brand@1 = Brand#12 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_size@2 <= 15) AND p_size@2 >= 1
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand, p_size], file_type=csv, has_header=true
 
 ########
 # TPCH Q19 - Pull predicates to inner join (simplified)
@@ -761,21 +756,15 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=SinglePartitioned, gby=[p_partkey@2 as p_partkey], aggr=[sum(lineitem.l_extendedprice), avg(lineitem.l_discount), count(DISTINCT partsupp.ps_suppkey)]
 02)--ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, p_partkey@3 as p_partkey, ps_suppkey@0 as ps_suppkey]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, p_partkey@2)], projection=[ps_suppkey@1, l_extendedprice@2, l_discount@3, p_partkey@4]
-05)--------DataSourceExec: partitions=1, partition_sizes=[1]
-06)--------CoalesceBatchesExec: target_batch_size=8192
-07)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_partkey@3]
-08)------------CoalesceBatchesExec: target_batch_size=8192
-09)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-10)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], file_type=csv, has_header=true
-12)------------CoalesceBatchesExec: target_batch_size=8192
-13)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-14)----------------CoalesceBatchesExec: target_batch_size=8192
-15)------------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23, projection=[p_partkey@0]
-16)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-17)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], file_type=csv, has_header=true
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, p_partkey@2)], projection=[ps_suppkey@1, l_extendedprice@2, l_discount@3, p_partkey@4]
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_partkey@3]
+06)--------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=1
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], file_type=csv, has_header=true
+08)--------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+09)----------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23, projection=[p_partkey@0]
+10)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], file_type=csv, has_header=true
 
 # Simplification of a binary operator with a NULL value
 
@@ -805,9 +794,8 @@ logical_plan
 01)Filter: t.x < Int32(5) AND Boolean(NULL)
 02)--TableScan: t projection=[x]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: x@0 < 5 AND NULL
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: x@0 < 5 AND NULL
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN FORMAT INDENT SELECT * FROM t WHERE x < 5 OR (10 * NULL < x);
@@ -816,9 +804,8 @@ logical_plan
 01)Filter: t.x < Int32(5) OR Boolean(NULL)
 02)--TableScan: t projection=[x]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: x@0 < 5 OR NULL
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: x@0 < 5 OR NULL
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table t;
@@ -847,9 +834,8 @@ logical_plan
 01)Filter: t.x = Int32(5)
 02)--TableScan: t projection=[x]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: x@0 = 5
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: x@0 = 5
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select x from t where x NOT IN (1,2,3,4,5) AND x IN (1,2,3);
diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt
index 486baca6f54d6..8e8b1cd8e6ad0 100644
--- a/datafusion/sqllogictest/test_files/prepare.slt
+++ b/datafusion/sqllogictest/test_files/prepare.slt
@@ -34,7 +34,7 @@ statement error DataFusion error: SQL error: ParserError
 PREPARE AS SELECT id, age  FROM person WHERE age = $foo;
 
 # param following a non-number, $foo, not supported
-statement error Invalid placeholder, not a number: \$foo
+statement error Unknown placeholder: \$foo
 PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = $foo;
 
 # not specify table hence cannot specify columns
@@ -204,9 +204,11 @@ EXECUTE my_plan6(20.0);
 statement error Cast error: Cannot cast string 'foo' to value of Int32 type
 EXECUTE my_plan6('foo');
 
-# TODO: support non-literal expressions
-statement error Unsupported parameter type
-EXECUTE my_plan6(10 + 20);
+# support non-literal expressions
+query II
+EXECUTE my_plan6(10 + 10);
+----
+1 20
 
 statement ok
 DEALLOCATE my_plan6;
@@ -359,3 +361,15 @@ SET datafusion.explain.logical_plan_only=false;
 
 statement ok
 DEALLOCATE my_plan
+
+
+statement ok
+PREPARE my_plan AS SELECT a, b FROM (VALUES ($1, $2)) AS t(a, b);
+
+query II
+EXECUTE my_plan(1, 2)
+----
+1 2
+
+statement ok
+DEALLOCATE my_plan
diff --git a/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt
new file mode 100644
index 0000000000000..34c5fd97b51f3
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/preserve_file_partitioning.slt
@@ -0,0 +1,596 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+# Tests for preserve_file_partitions optimization
+#
+# Data Model:
+# - Fact table: Hive-partitioned by f_dkey, sorted by f_dkey, timestamp
+#   Schema: timestamp TIMESTAMP, value FLOAT64, partition column: f_dkey STRING
+#
+# - Dimension table: Single file for CollectLeft joins
+#   Schema: d_dkey STRING, env STRING, service STRING, host STRING
+#
+# Key benefits demonstrated:
+# - Eliminates RepartitionExec for aggregates/joins/windows on partition columns
+# - Eliminates SortExec when data is already sorted by partition + order columns
+# - Uses SinglePartitioned aggregation mode
+##########
+
+##########
+# SETUP: Configuration and Data Generation
+##########
+
+statement ok
+set datafusion.execution.target_partitions = 3;
+
+# Create fact table partitioned by f_dkey
+# Each partition has data sorted by timestamp
+# Partition: f_dkey=A
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 95.5),
+    (TIMESTAMP '2023-01-01T09:00:10', 102.3),
+    (TIMESTAMP '2023-01-01T09:00:20', 98.7),
+    (TIMESTAMP '2023-01-01T09:12:20', 105.1),
+    (TIMESTAMP '2023-01-01T09:12:30', 100.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 150.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 120.8)
+))
+TO 'test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet'
+STORED AS PARQUET;
+----
+7
+
+# Partition: f_dkey=B
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 75.2),
+    (TIMESTAMP '2023-01-01T09:00:10', 82.4),
+    (TIMESTAMP '2023-01-01T09:00:20', 78.9),
+    (TIMESTAMP '2023-01-01T09:00:30', 85.6),
+    (TIMESTAMP '2023-01-01T09:12:30', 80.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 120.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 92.3)
+))
+TO 'test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet'
+STORED AS PARQUET;
+----
+7
+
+# Partition: f_dkey=C
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 300.5),
+    (TIMESTAMP '2023-01-01T09:00:10', 285.7),
+    (TIMESTAMP '2023-01-01T09:00:20', 310.2),
+    (TIMESTAMP '2023-01-01T09:00:30', 295.8),
+    (TIMESTAMP '2023-01-01T09:00:40', 300.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 250.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 275.4)
+))
+TO 'test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet'
+STORED AS PARQUET;
+----
+7
+
+# Create dimension table (single file for CollectLeft joins)
+query I
+COPY (SELECT column1 as d_dkey, column2 as env, column3 as service, column4 as host FROM (VALUES
+    ('A', 'dev', 'log', 'ma'),
+    ('B', 'prod', 'log', 'ma'),
+    ('C', 'prod', 'log', 'vim'),
+    ('D', 'prod', 'trace', 'vim')
+))
+TO 'test_files/scratch/preserve_file_partitioning/dimension/data.parquet'
+STORED AS PARQUET;
+----
+4
+
+# Create high-cardinality fact table (5 partitions > 3 target_partitions)
+# For testing partition merging with consistent hashing
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 100.0)
+))
+TO 'test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=A/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 200.0)
+))
+TO 'test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=B/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 300.0)
+))
+TO 'test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=C/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 400.0)
+))
+TO 'test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=D/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 500.0)
+))
+TO 'test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=E/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+##########
+# TABLE DECLARATIONS
+##########
+
+# Fact table without ordering (for basic aggregate tests)
+statement ok
+CREATE EXTERNAL TABLE fact_table (timestamp TIMESTAMP, value DOUBLE)
+STORED AS PARQUET
+PARTITIONED BY (f_dkey STRING)
+LOCATION 'test_files/scratch/preserve_file_partitioning/fact/';
+
+# Fact table with ordering (for sort elimination tests)
+statement ok
+CREATE EXTERNAL TABLE fact_table_ordered (timestamp TIMESTAMP, value DOUBLE)
+STORED AS PARQUET
+PARTITIONED BY (f_dkey STRING)
+WITH ORDER (f_dkey ASC, timestamp ASC)
+LOCATION 'test_files/scratch/preserve_file_partitioning/fact/';
+
+# Dimension table (for join tests)
+statement ok
+CREATE EXTERNAL TABLE dimension_table (d_dkey STRING, env STRING, service STRING, host STRING)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/preserve_file_partitioning/dimension/';
+
+# 'High'-cardinality fact table (5 partitions > 3 target_partitions)
+statement ok
+CREATE EXTERNAL TABLE high_cardinality_table (timestamp TIMESTAMP, value DOUBLE)
+STORED AS PARQUET
+PARTITIONED BY (f_dkey STRING)
+LOCATION 'test_files/scratch/preserve_file_partitioning/high_cardinality/';
+
+##########
+# TEST 1: Basic Aggregate - Without Optimization
+# Shows RepartitionExec and two-phase aggregation
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 0;
+
+query TT
+EXPLAIN SELECT f_dkey, count(*), sum(value) FROM fact_table GROUP BY f_dkey;
+----
+logical_plan
+01)Projection: fact_table.f_dkey, count(Int64(1)) AS count(*), sum(fact_table.value)
+02)--Aggregate: groupBy=[[fact_table.f_dkey]], aggr=[[count(Int64(1)), sum(fact_table.value)]]
+03)----TableScan: fact_table projection=[value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), sum(fact_table.value)@2 as sum(fact_table.value)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey], aggr=[count(Int64(1)), sum(fact_table.value)]
+03)----RepartitionExec: partitioning=Hash([f_dkey@0], 3), input_partitions=3
+04)------AggregateExec: mode=Partial, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), sum(fact_table.value)]
+05)--------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], file_type=parquet
+
+# Verify results without optimization
+query TIR rowsort
+SELECT f_dkey, count(*), sum(value) FROM fact_table GROUP BY f_dkey;
+----
+A 7 772.4
+B 7 614.4
+C 7 2017.6
+
+##########
+# TEST 2: Basic Aggregate - With Optimization
+# Shows SinglePartitioned mode, no RepartitionExec
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+query TT
+EXPLAIN SELECT f_dkey, count(*), sum(value) FROM fact_table GROUP BY f_dkey;
+----
+logical_plan
+01)Projection: fact_table.f_dkey, count(Int64(1)) AS count(*), sum(fact_table.value)
+02)--Aggregate: groupBy=[[fact_table.f_dkey]], aggr=[[count(Int64(1)), sum(fact_table.value)]]
+03)----TableScan: fact_table projection=[value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), sum(fact_table.value)@2 as sum(fact_table.value)]
+02)--AggregateExec: mode=SinglePartitioned, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), sum(fact_table.value)]
+03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], file_type=parquet
+
+# Verify results with optimization match results without optimization
+query TIR rowsort
+SELECT f_dkey, count(*), sum(value) FROM fact_table GROUP BY f_dkey;
+----
+A 7 772.4
+B 7 614.4
+C 7 2017.6
+
+##########
+# TEST 3: Aggregate with ORDER BY - Without Optimization
+# Shows SortExec and RepartitionExec
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 0;
+
+query TT
+EXPLAIN SELECT f_dkey, count(*), avg(value) FROM fact_table_ordered GROUP BY f_dkey ORDER BY f_dkey;
+----
+logical_plan
+01)Sort: fact_table_ordered.f_dkey ASC NULLS LAST
+02)--Projection: fact_table_ordered.f_dkey, count(Int64(1)) AS count(*), avg(fact_table_ordered.value)
+03)----Aggregate: groupBy=[[fact_table_ordered.f_dkey]], aggr=[[count(Int64(1)), avg(fact_table_ordered.value)]]
+04)------TableScan: fact_table_ordered projection=[value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), avg(fact_table_ordered.value)@2 as avg(fact_table_ordered.value)]
+03)----AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+04)------SortExec: expr=[f_dkey@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)--------RepartitionExec: partitioning=Hash([f_dkey@0], 3), input_partitions=3
+06)----------AggregateExec: mode=Partial, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+07)------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet
+
+# Verify results without optimization
+query TIR
+SELECT f_dkey, count(*), avg(value) FROM fact_table_ordered GROUP BY f_dkey ORDER BY f_dkey;
+----
+A 7 110.342857142857
+B 7 87.771428571429
+C 7 288.228571428571
+
+##########
+# TEST 4: Aggregate with ORDER BY - With Optimization
+# No SortExec, no RepartitionExec, just SortPreservingMergeExec
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+query TT
+EXPLAIN SELECT f_dkey, count(*), avg(value) FROM fact_table_ordered GROUP BY f_dkey ORDER BY f_dkey;
+----
+logical_plan
+01)Sort: fact_table_ordered.f_dkey ASC NULLS LAST
+02)--Projection: fact_table_ordered.f_dkey, count(Int64(1)) AS count(*), avg(fact_table_ordered.value)
+03)----Aggregate: groupBy=[[fact_table_ordered.f_dkey]], aggr=[[count(Int64(1)), avg(fact_table_ordered.value)]]
+04)------TableScan: fact_table_ordered projection=[value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), avg(fact_table_ordered.value)@2 as avg(fact_table_ordered.value)]
+03)----AggregateExec: mode=SinglePartitioned, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet
+
+query TIR
+SELECT f_dkey, count(*), avg(value) FROM fact_table_ordered GROUP BY f_dkey ORDER BY f_dkey;
+----
+A 7 110.342857142857
+B 7 87.771428571429
+C 7 288.228571428571
+
+##########
+# TEST 5: Join with Hash Partitioning Propagation - Without Optimization
+# CollectLeft join followed by RepartitionExec and SortExec for aggregate
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 0;
+
+query TT
+EXPLAIN SELECT f.f_dkey, MAX(d.env), MAX(d.service), count(*), sum(f.value)
+FROM fact_table_ordered f
+INNER JOIN dimension_table d ON f.f_dkey = d.d_dkey
+WHERE d.service = 'log'
+GROUP BY f.f_dkey
+ORDER BY f.f_dkey;
+----
+logical_plan
+01)Sort: f.f_dkey ASC NULLS LAST
+02)--Projection: f.f_dkey, max(d.env), max(d.service), count(Int64(1)) AS count(*), sum(f.value)
+03)----Aggregate: groupBy=[[f.f_dkey]], aggr=[[max(d.env), max(d.service), count(Int64(1)), sum(f.value)]]
+04)------Projection: f.value, f.f_dkey, d.env, d.service
+05)--------Inner Join: f.f_dkey = d.d_dkey
+06)----------SubqueryAlias: f
+07)------------TableScan: fact_table_ordered projection=[value, f_dkey]
+08)----------SubqueryAlias: d
+09)------------Filter: dimension_table.service = Utf8View("log")
+10)--------------TableScan: dimension_table projection=[d_dkey, env, service], partial_filters=[dimension_table.service = Utf8View("log")]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, max(d.env)@1 as max(d.env), max(d.service)@2 as max(d.service), count(Int64(1))@3 as count(*), sum(f.value)@4 as sum(f.value)]
+03)----AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey], aggr=[max(d.env), max(d.service), count(Int64(1)), sum(f.value)], ordering_mode=Sorted
+04)------SortExec: expr=[f_dkey@0 ASC NULLS LAST], preserve_partitioning=[true]
+05)--------RepartitionExec: partitioning=Hash([f_dkey@0], 3), input_partitions=3
+06)----------AggregateExec: mode=Partial, gby=[f_dkey@1 as f_dkey], aggr=[max(d.env), max(d.service), count(Int64(1)), sum(f.value)], ordering_mode=Sorted
+07)------------ProjectionExec: expr=[value@2 as value, f_dkey@3 as f_dkey, env@0 as env, service@1 as service]
+08)--------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d_dkey@0, f_dkey@1)], projection=[env@1, service@2, value@3, f_dkey@4]
+09)----------------CoalescePartitionsExec
+10)------------------FilterExec: service@2 = log
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension/data.parquet]]}, projection=[d_dkey, env, service], file_type=parquet, predicate=service@2 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)]
+13)----------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Verify results without optimization
+query TTTIR rowsort
+SELECT f.f_dkey, MAX(d.env), MAX(d.service), count(*), sum(f.value)
+FROM fact_table_ordered f
+INNER JOIN dimension_table d ON f.f_dkey = d.d_dkey
+WHERE d.service = 'log'
+GROUP BY f.f_dkey
+ORDER BY f.f_dkey;
+----
+A dev log 7 772.4
+B prod log 7 614.4
+C prod log 7 2017.6
+
+##########
+# TEST 6: Join with Hash Partitioning Propagation - With Optimization
+# Hash partitioning propagates through join, no RepartitionExec/SortExec after join
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+query TT
+EXPLAIN SELECT f.f_dkey, MAX(d.env), MAX(d.service), count(*), sum(f.value)
+FROM fact_table_ordered f
+INNER JOIN dimension_table d ON f.f_dkey = d.d_dkey
+WHERE d.service = 'log'
+GROUP BY f.f_dkey
+ORDER BY f.f_dkey;
+----
+logical_plan
+01)Sort: f.f_dkey ASC NULLS LAST
+02)--Projection: f.f_dkey, max(d.env), max(d.service), count(Int64(1)) AS count(*), sum(f.value)
+03)----Aggregate: groupBy=[[f.f_dkey]], aggr=[[max(d.env), max(d.service), count(Int64(1)), sum(f.value)]]
+04)------Projection: f.value, f.f_dkey, d.env, d.service
+05)--------Inner Join: f.f_dkey = d.d_dkey
+06)----------SubqueryAlias: f
+07)------------TableScan: fact_table_ordered projection=[value, f_dkey]
+08)----------SubqueryAlias: d
+09)------------Filter: dimension_table.service = Utf8View("log")
+10)--------------TableScan: dimension_table projection=[d_dkey, env, service], partial_filters=[dimension_table.service = Utf8View("log")]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, max(d.env)@1 as max(d.env), max(d.service)@2 as max(d.service), count(Int64(1))@3 as count(*), sum(f.value)@4 as sum(f.value)]
+03)----AggregateExec: mode=SinglePartitioned, gby=[f_dkey@1 as f_dkey], aggr=[max(d.env), max(d.service), count(Int64(1)), sum(f.value)], ordering_mode=Sorted
+04)------ProjectionExec: expr=[value@2 as value, f_dkey@3 as f_dkey, env@0 as env, service@1 as service]
+05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d_dkey@0, f_dkey@1)], projection=[env@1, service@2, value@3, f_dkey@4]
+06)----------CoalescePartitionsExec
+07)------------FilterExec: service@2 = log
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/dimension/data.parquet]]}, projection=[d_dkey, env, service], file_type=parquet, predicate=service@2 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)]
+10)----------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], output_ordering=[f_dkey@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+query TTTIR rowsort
+SELECT f.f_dkey, MAX(d.env), MAX(d.service), count(*), sum(f.value)
+FROM fact_table_ordered f
+INNER JOIN dimension_table d ON f.f_dkey = d.d_dkey
+WHERE d.service = 'log'
+GROUP BY f.f_dkey
+ORDER BY f.f_dkey;
+----
+A dev log 7 772.4
+B prod log 7 614.4
+C prod log 7 2017.6
+
+##########
+# TEST 7: Window Function - Without Optimization
+# Shows RepartitionExec and SortExec (hash repartition destroys ordering)
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 0;
+
+query TT
+EXPLAIN SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn
+FROM fact_table_ordered;
+----
+logical_plan
+01)Projection: fact_table_ordered.f_dkey, fact_table_ordered.timestamp, fact_table_ordered.value, row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
+02)--WindowAggr: windowExpr=[[row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@2 as f_dkey, timestamp@0 as timestamp, value@1 as value, row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rn]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----SortExec: expr=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------RepartitionExec: partitioning=Hash([f_dkey@2], 3), input_partitions=3
+05)--------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+# Verify results without optimization (limited for readability)
+query TPRI rowsort
+SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn
+FROM fact_table_ordered
+WHERE timestamp < TIMESTAMP '2023-01-01T09:00:30';
+----
+A 2023-01-01T09:00:00 95.5 1
+A 2023-01-01T09:00:10 102.3 2
+A 2023-01-01T09:00:20 98.7 3
+B 2023-01-01T09:00:00 75.2 1
+B 2023-01-01T09:00:10 82.4 2
+B 2023-01-01T09:00:20 78.9 3
+C 2023-01-01T09:00:00 300.5 1
+C 2023-01-01T09:00:10 285.7 2
+C 2023-01-01T09:00:20 310.2 3
+
+##########
+# TEST 8: Window Function - With Optimization
+# No RepartitionExec, no SortExec (data already sorted by f_dkey, timestamp)
+##########
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+query TT
+EXPLAIN SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn
+FROM fact_table_ordered;
+----
+logical_plan
+01)Projection: fact_table_ordered.f_dkey, fact_table_ordered.timestamp, fact_table_ordered.value, row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
+02)--WindowAggr: windowExpr=[[row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@2 as f_dkey, timestamp@0 as timestamp, value@1 as value, row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rn]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [fact_table_ordered.f_dkey] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+query TPRI rowsort
+SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (PARTITION BY f_dkey ORDER BY timestamp) as rn
+FROM fact_table_ordered
+WHERE timestamp < TIMESTAMP '2023-01-01T09:00:30';
+----
+A 2023-01-01T09:00:00 95.5 1
+A 2023-01-01T09:00:10 102.3 2
+A 2023-01-01T09:00:20 98.7 3
+B 2023-01-01T09:00:00 75.2 1
+B 2023-01-01T09:00:10 82.4 2
+B 2023-01-01T09:00:20 78.9 3
+C 2023-01-01T09:00:00 300.5 1
+C 2023-01-01T09:00:10 285.7 2
+C 2023-01-01T09:00:20 310.2 3
+
+##########
+# TEST 9: High-Cardinality Partitions (more partitions than target_partitions)
+# Since num_partitions > target_partitions (5 > 3), files are merged using
+# round-robin assignment to ensure exactly target_partitions groups are created.
+##########
+
+# First verify results without optimization
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 0;
+
+query TIR rowsort
+SELECT f_dkey, count(*), sum(value)
+FROM high_cardinality_table
+GROUP BY f_dkey;
+----
+A 1 100
+B 1 200
+C 1 300
+D 1 400
+E 1 500
+
+# Now with optimization - verify plan shows SinglePartitioned mode and no RepartitionExec
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+# Verify the plan uses SinglePartitioned mode with no RepartitionExec
+# The 5 partitions are merged into 3 file groups using round-robin assignment
+query TT
+EXPLAIN SELECT f_dkey, count(*), sum(value) FROM high_cardinality_table GROUP BY f_dkey;
+----
+logical_plan
+01)Projection: high_cardinality_table.f_dkey, count(Int64(1)) AS count(*), sum(high_cardinality_table.value)
+02)--Aggregate: groupBy=[[high_cardinality_table.f_dkey]], aggr=[[count(Int64(1)), sum(high_cardinality_table.value)]]
+03)----TableScan: high_cardinality_table projection=[value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), sum(high_cardinality_table.value)@2 as sum(high_cardinality_table.value)]
+02)--AggregateExec: mode=SinglePartitioned, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), sum(high_cardinality_table.value)]
+03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=B/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=E/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=C/data.parquet]]}, projection=[value, f_dkey], file_type=parquet
+
+# Verify results with optimization match results without optimization
+query TIR rowsort
+SELECT f_dkey, count(*), sum(value)
+FROM high_cardinality_table
+GROUP BY f_dkey;
+----
+A 1 100
+B 1 200
+C 1 300
+D 1 400
+E 1 500
+
+query R
+SELECT sum(value) FROM high_cardinality_table;
+----
+1500
+
+##########
+# Test 10: Threshold higher than distinct partition count
+##########
+# When preserve_file_partitions threshold is higher than the number of distinct
+# partition values, the optimization should NOT apply and we fall back to split_files.
+# The high_cardinality_table has 5 distinct partition values (A, B, C, D, E).
+# Setting threshold to 10 means we need at least 10 distinct partitions to enable
+# Hash partitioning, so this should show RepartitionExec in the plan.
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 10;
+
+# Verify the plan falls back to regular aggregation with RepartitionExec
+query TT
+EXPLAIN SELECT f_dkey, count(*), sum(value) FROM high_cardinality_table GROUP BY f_dkey;
+----
+logical_plan
+01)Projection: high_cardinality_table.f_dkey, count(Int64(1)) AS count(*), sum(high_cardinality_table.value)
+02)--Aggregate: groupBy=[[high_cardinality_table.f_dkey]], aggr=[[count(Int64(1)), sum(high_cardinality_table.value)]]
+03)----TableScan: high_cardinality_table projection=[value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@0 as f_dkey, count(Int64(1))@1 as count(*), sum(high_cardinality_table.value)@2 as sum(high_cardinality_table.value)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey], aggr=[count(Int64(1)), sum(high_cardinality_table.value)]
+03)----RepartitionExec: partitioning=Hash([f_dkey@0], 3), input_partitions=3
+04)------AggregateExec: mode=Partial, gby=[f_dkey@1 as f_dkey], aggr=[count(Int64(1)), sum(high_cardinality_table.value)]
+05)--------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=C/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/preserve_file_partitioning/high_cardinality/f_dkey=E/data.parquet]]}, projection=[value, f_dkey], file_type=parquet
+
+query TIR rowsort
+SELECT f_dkey, count(*), sum(value)
+FROM high_cardinality_table
+GROUP BY f_dkey;
+----
+A 1 100
+B 1 200
+C 1 300
+D 1 400
+E 1 500
+
+##########
+# CLEANUP
+##########
+
+statement ok
+DROP TABLE fact_table;
+
+statement ok
+DROP TABLE fact_table_ordered;
+
+statement ok
+DROP TABLE dimension_table;
+
+statement ok
+DROP TABLE high_cardinality_table;
diff --git a/datafusion/sqllogictest/test_files/projection.slt b/datafusion/sqllogictest/test_files/projection.slt
index 9f840e7bdc2f0..5a4411233424a 100644
--- a/datafusion/sqllogictest/test_files/projection.slt
+++ b/datafusion/sqllogictest/test_files/projection.slt
@@ -276,7 +276,6 @@ logical_plan
 03)----TableScan: t1 projection=[a], partial_filters=[t1.a > Int64(1)]
 physical_plan
 01)ProjectionExec: expr=[]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: a@0 > 1
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection/17513.parquet]]}, projection=[a], file_type=parquet, predicate=a@0 > 1, pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > 1, required_guarantees=[]
+02)--FilterExec: a@0 > 1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection/17513.parquet]]}, projection=[a], file_type=parquet, predicate=a@0 > 1, pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > 1, required_guarantees=[]
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt
index 47095d92d9376..4353f805c848b 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -40,9 +40,8 @@ physical_plan
 02)--UnnestExec
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------ProjectionExec: expr=[column2@0 as __unnest_placeholder(v.column2)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: column1@0 = 2, projection=[column2@1]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------FilterExec: column1@0 = 2, projection=[column2@1]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query I
 select uc2 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3;
@@ -56,12 +55,11 @@ explain select uc2 from (select unnest(column2) as uc2, column1 from v) where uc
 ----
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(v.column2,depth=1)@0 as uc2]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------UnnestExec
-06)----------ProjectionExec: expr=[column2@0 as __unnest_placeholder(v.column2)]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------UnnestExec
+05)--------ProjectionExec: expr=[column2@0 as __unnest_placeholder(v.column2)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 select uc2, column1 from  (select unnest(column2) as uc2, column1 from v) where uc2 > 3 AND column1 = 2;
@@ -75,14 +73,12 @@ explain select uc2, column1 from  (select unnest(column2) as uc2, column1 from v
 ----
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(v.column2,depth=1)@0 as uc2, column1@1 as column1]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3
-04)------UnnestExec
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------FilterExec: column1@0 = 2
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3
+03)----UnnestExec
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1]
+06)----------FilterExec: column1@0 = 2
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 select uc2, column1 from  (select unnest(column2) as uc2, column1 from v) where uc2 > 3 OR column1 = 2;
@@ -97,12 +93,11 @@ explain select uc2, column1 from  (select unnest(column2) as uc2, column1 from v
 ----
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(v.column2,depth=1)@0 as uc2, column1@1 as column1]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3 OR column1@1 = 2
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------UnnestExec
-06)----------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3 OR column1@1 = 2
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------UnnestExec
+05)--------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table v;
@@ -121,12 +116,11 @@ explain select * from (select column1, unnest(column2) as o from d) where o['a']
 ----
 physical_plan
 01)ProjectionExec: expr=[column1@0 as column1, __unnest_placeholder(d.column2,depth=1)@1 as o]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: get_field(__unnest_placeholder(d.column2,depth=1)@1, a) = 1
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------UnnestExec
-06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as __unnest_placeholder(d.column2)]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: get_field(__unnest_placeholder(d.column2,depth=1)@1, a) = 1
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------UnnestExec
+05)--------ProjectionExec: expr=[column1@0 as column1, column2@1 as __unnest_placeholder(d.column2)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table d;
@@ -143,12 +137,11 @@ query TT
 explain select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------UnnestExec
-05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)]
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: __unnest_placeholder(d.column1).b@1 > 5
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----UnnestExec
+04)------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table d;
@@ -306,11 +299,10 @@ query TT
 explain select * from small_table join large_table on small_table.k = large_table.k where large_table.v >= 50;
 ----
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k@0, k@0)]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/small_table.parquet]]}, projection=[k], file_type=parquet
-04)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/large_table.parquet]]}, projection=[k, v], file_type=parquet, predicate=v@1 >= 50 AND DynamicFilter [ empty ], pruning_predicate=v_null_count@1 != row_count@2 AND v_max@0 >= 50, required_guarantees=[]
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k@0, k@0)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/small_table.parquet]]}, projection=[k], file_type=parquet
+03)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/large_table.parquet]]}, projection=[k, v], file_type=parquet, predicate=v@1 >= 50 AND DynamicFilter [ empty ], pruning_predicate=v_null_count@1 != row_count@2 AND v_max@0 >= 50, required_guarantees=[]
 
 statement ok
 drop table small_table;
@@ -420,3 +412,79 @@ FROM (
 WHERE t.start_timestamp::time < '00:00:01'::time;
 ----
 1
+
+# Test aggregate dynamic filter pushdown
+# Note: most of the test coverage lives in `datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs`
+# , to compare dynamic filter content easier. Here the tests are simple end-to-end
+# exercises.
+
+statement ok
+set datafusion.explain.format = 'indent';
+
+statement ok
+set datafusion.explain.physical_plan_only = true;
+
+statement ok
+set datafusion.execution.target_partitions = 2;
+
+statement ok
+set datafusion.execution.parquet.pushdown_filters = true;
+
+statement ok
+set datafusion.optimizer.enable_dynamic_filter_pushdown = true;
+
+statement ok
+set datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown = true;
+
+statement ok
+create external table agg_dyn_test stored as parquet location '../core/tests/data/test_statistics_per_partition';
+
+# Expect dynamic filter available inside data source
+query TT
+explain select max(id) from agg_dyn_test where id > 1;
+----
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_test.id)]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_test.id)]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 > 1 AND DynamicFilter [ empty ], pruning_predicate=id_null_count@1 != row_count@2 AND id_max@0 > 1, required_guarantees=[]
+
+query I
+select max(id) from agg_dyn_test where id > 1;
+----
+4
+
+# Expect dynamic filter available inside data source
+query TT
+explain select max(id) from agg_dyn_test where (id+1) > 1;
+----
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_test.id)]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_test.id)]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=CAST(id@0 AS Int64) + 1 > 1 AND DynamicFilter [ empty ]
+
+# Expect dynamic filter available inside data source
+query TT
+explain select max(id), min(id) from agg_dyn_test where id < 10;
+----
+physical_plan
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_test.id), min(agg_dyn_test.id)]
+02)--CoalescePartitionsExec
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_test.id), min(agg_dyn_test.id)]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 < 10 AND DynamicFilter [ empty ], pruning_predicate=id_null_count@1 != row_count@2 AND id_min@0 < 10, required_guarantees=[]
+
+# Dynamic filter should not be available for grouping sets
+query TT
+explain select max(id) from agg_dyn_test where id < 10
+group by grouping sets ((), (id))
+----
+physical_plan
+01)ProjectionExec: expr=[max(agg_dyn_test.id)@2 as max(agg_dyn_test.id)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, __grouping_id@1 as __grouping_id], aggr=[max(agg_dyn_test.id)]
+03)----RepartitionExec: partitioning=Hash([id@0, __grouping_id@1], 2), input_partitions=2
+04)------AggregateExec: mode=Partial, gby=[(NULL as id), (id@0 as id)], aggr=[max(agg_dyn_test.id)]
+05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 < 10, pruning_predicate=id_null_count@1 != row_count@2 AND id_min@0 < 10, required_guarantees=[]
+
+statement ok
+drop table agg_dyn_test;
diff --git a/datafusion/sqllogictest/test_files/pwmj.slt b/datafusion/sqllogictest/test_files/pwmj.slt
index eafa4d0ba3945..295eb94318ee5 100644
--- a/datafusion/sqllogictest/test_files/pwmj.slt
+++ b/datafusion/sqllogictest/test_files/pwmj.slt
@@ -87,13 +87,11 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
 04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: t1_id@0 > 10
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------CoalesceBatchesExec: target_batch_size=8192
-10)----------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
-11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------FilterExec: t1_id@0 > 10
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+09)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT t1.t1_id, t2.t2_id
@@ -134,13 +132,11 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----PiecewiseMergeJoin: operator=GtEq, join_type=Inner, on=(t1_id >= t2_id)
 04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: t1_id@0 >= 22
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------CoalesceBatchesExec: target_batch_size=8192
-10)----------FilterExec: t2_int@1 = 3, projection=[t2_id@0]
-11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------FilterExec: t1_id@0 >= 22
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------FilterExec: t2_int@1 = 3, projection=[t2_id@0]
+09)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT t1.t1_id, t2.t2_id
@@ -183,9 +179,8 @@ physical_plan
 04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)--------CoalesceBatchesExec: target_batch_size=8192
-08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
-09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+07)--------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+08)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query II
@@ -231,9 +226,8 @@ physical_plan
 04)------SortExec: expr=[CAST(t1_id@0 AS Int64) DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)--------CoalesceBatchesExec: target_batch_size=8192
-08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
-09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+07)--------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+08)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT t1.t1_id, t2.t2_id
@@ -275,13 +269,11 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----PiecewiseMergeJoin: operator=LtEq, join_type=Inner, on=(t1_id <= t2_id)
 04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: t1_id@0 = 11 OR t1_id@0 = 44
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------CoalesceBatchesExec: target_batch_size=8192
-10)----------FilterExec: t2_name@1 != y, projection=[t2_id@0]
-11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------FilterExec: t1_id@0 = 11 OR t1_id@0 = 44
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------FilterExec: t2_name@1 != y, projection=[t2_id@0]
+09)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 CREATE TABLE null_join_t1 (id INT);
diff --git a/datafusion/sqllogictest/test_files/qualify.slt b/datafusion/sqllogictest/test_files/qualify.slt
index 366d65df67929..ce58e3998cf57 100644
--- a/datafusion/sqllogictest/test_files/qualify.slt
+++ b/datafusion/sqllogictest/test_files/qualify.slt
@@ -273,10 +273,9 @@ logical_plan
 05)--------TableScan: users projection=[id, name]
 physical_plan
 01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
-04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
+03)----WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan row_number()
 query TT
@@ -290,12 +289,11 @@ logical_plan
 05)--------TableScan: users projection=[dept]
 physical_plan
 01)ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 as rk]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
-04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
+03)----ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+04)------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)--------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan with window function and group by
 query TT
@@ -318,23 +316,18 @@ logical_plan
 09)----------------TableScan: users projection=[salary, dept]
 physical_plan
 01)ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as r]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
-04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------FilterExec: sum(users.salary)@2 > Some(2000000),20,2, projection=[dept@0, salary@1]
-11)--------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept, salary@1 as salary], aggr=[sum(users.salary)]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([dept@0, salary@1], 4), input_partitions=4
-14)--------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept, salary@0 as salary], aggr=[sum(users.salary)]
-15)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------FilterExec: salary@0 > Some(500000),10,2
-18)----------------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
+03)----ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+04)------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
+07)------------FilterExec: sum(users.salary)@2 > Some(2000000),20,2, projection=[dept@0, salary@1]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept, salary@1 as salary], aggr=[sum(users.salary)]
+09)----------------RepartitionExec: partitioning=Hash([dept@0, salary@1], 4), input_partitions=4
+10)------------------AggregateExec: mode=Partial, gby=[dept@1 as dept, salary@0 as salary], aggr=[sum(users.salary)]
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)----------------------FilterExec: salary@0 > Some(500000),10,2
+13)------------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan with aggregate function
 query TT
@@ -355,18 +348,15 @@ physical_plan
 01)SortPreservingMergeExec: [dept@0 ASC NULLS LAST]
 02)--SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[dept@0 as dept, sum(users.salary)@1 as s]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------FilterExec: rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 = 1, projection=[dept@0, sum(users.salary)@1]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-08)--------------SortPreservingMergeExec: [sum(users.salary)@1 DESC]
-09)----------------SortExec: expr=[sum(users.salary)@1 DESC], preserve_partitioning=[true]
-10)------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept], aggr=[sum(users.salary)]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
-13)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-14)--------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept], aggr=[sum(users.salary)]
-15)----------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------FilterExec: rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 = 1, projection=[dept@0, sum(users.salary)@1]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+06)----------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------SortPreservingMergeExec: [sum(users.salary)@1 DESC]
+08)--------------SortExec: expr=[sum(users.salary)@1 DESC], preserve_partitioning=[true]
+09)----------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept], aggr=[sum(users.salary)]
+10)------------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=1
+11)--------------------AggregateExec: mode=Partial, gby=[dept@1 as dept], aggr=[sum(users.salary)]
+12)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Clean up
 statement ok
diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_like.slt b/datafusion/sqllogictest/test_files/regexp/regexp_like.slt
index dd42511eade93..6f2d5a873c1b6 100644
--- a/datafusion/sqllogictest/test_files/regexp/regexp_like.slt
+++ b/datafusion/sqllogictest/test_files/regexp/regexp_like.slt
@@ -251,9 +251,8 @@ logical_plan
 01)Filter: dict_table.column1 LIKE Utf8("%oo%")
 02)--TableScan: dict_table projection=[column1]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 LIKE %oo%
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: column1@0 LIKE %oo%
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Ensure casting / coercion works for all operators
 # (there should be no casts to Utf8)
diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt
index 29d20d10b6715..54e445f212422 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -44,11 +44,9 @@ logical_plan
 02)--TableScan: parquet_table projection=[column1, column2]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
+02)--RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=1
+03)----AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
 
 # disable round robin repartitioning
 statement ok
@@ -62,10 +60,9 @@ logical_plan
 02)--TableScan: parquet_table projection=[column1, column2]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=1
-04)------AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
+02)--RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=1
+03)----AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
 
 
 # Cleanup
@@ -122,10 +119,9 @@ logical_plan
 03)----TableScan: sink_table projection=[c1, c2, c3]
 physical_plan
 01)CoalescePartitionsExec: fetch=5
-02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
-03)----FilterExec: c3@2 > 0
-04)------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true
+02)--FilterExec: c3@2 > 0, fetch=5
+03)----RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
+04)------StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true
 
 # Start repratition on empty column test.
 # See https://github.com/apache/datafusion/issues/12057
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 41718b3aebc27..06ea22761d92b 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -59,9 +59,8 @@ logical_plan
 01)Filter: parquet_table.column1 != Int32(42)
 02)--TableScan: parquet_table projection=[column1], partial_filters=[parquet_table.column1 != Int32(42)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 != 42
-03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..135], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:135..270], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:270..405], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:405..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
+01)FilterExec: column1@0 != 42
+02)--DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..135], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:135..270], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:270..405], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:405..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
 
 # disable round robin repartitioning
 statement ok
@@ -75,9 +74,8 @@ logical_plan
 01)Filter: parquet_table.column1 != Int32(42)
 02)--TableScan: parquet_table projection=[column1], partial_filters=[parquet_table.column1 != Int32(42)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 != 42
-03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..135], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:135..270], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:270..405], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:405..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
+01)FilterExec: column1@0 != 42
+02)--DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..135], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:135..270], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:270..405], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:405..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
 
 # enable round robin repartitioning again
 statement ok
@@ -100,9 +98,8 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST]
 02)--SortExec: expr=[column1@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: column1@0 != 42
-05)--------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..266], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:266..526, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..272], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:272..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
+03)----FilterExec: column1@0 != 42
+04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..266], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:266..526, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..272], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:272..537]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
 
 
 ## Read the files as though they are ordered
@@ -136,9 +133,8 @@ logical_plan
 03)----TableScan: parquet_table_with_order projection=[column1], partial_filters=[parquet_table_with_order.column1 != Int32(42)]
 physical_plan
 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: column1@0 != 42
-04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..263], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..268], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:268..537], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:263..526]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
+02)--FilterExec: column1@0 != 42
+03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..263], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..268], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:268..537], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:263..526]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)]
 
 # Cleanup
 statement ok
@@ -183,9 +179,8 @@ logical_plan
 01)Filter: csv_table.column1 != Int32(42)
 02)--TableScan: csv_table projection=[column1], partial_filters=[csv_table.column1 != Int32(42)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 != 42
-03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:0..5], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:5..10], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:10..15], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:15..18]]}, projection=[column1], file_type=csv, has_header=true
+01)FilterExec: column1@0 != 42
+02)--DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:0..5], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:5..10], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:10..15], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/csv_table/1.csv:15..18]]}, projection=[column1], file_type=csv, has_header=true
 
 # Cleanup
 statement ok
@@ -226,9 +221,8 @@ logical_plan
 01)Filter: json_table.column1 != Int32(42)
 02)--TableScan: json_table projection=[column1], partial_filters=[json_table.column1 != Int32(42)]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 != 42
-03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:0..18], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:18..36], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:36..54], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:54..70]]}, projection=[column1], file_type=json
+01)FilterExec: column1@0 != 42
+02)--DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:0..18], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:18..36], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:36..54], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/json_table/1.json:54..70]]}, projection=[column1], file_type=json
 
 # Cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt b/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt
new file mode 100644
index 0000000000000..e2c9fa4237939
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/repartition_subset_satisfaction.slt
@@ -0,0 +1,526 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+# Tests for Subset Partitioning Optimization
+#
+# Subset partitioning allows Hash([a]) to satisfy Hash([a, b]) requirements
+# when the required partitioning expressions are a strict subset of the
+# current partitioning expressions.
+##########
+
+##########
+# SETUP: Configuration and Data Generation
+##########
+
+statement ok
+set datafusion.optimizer.enable_round_robin_repartition = false;
+
+statement ok
+set datafusion.execution.target_partitions = 3;
+
+statement ok
+set datafusion.optimizer.preserve_file_partitions = 1;
+
+# Create fact table partitioned by f_dkey (3 partitions)
+# Each partition has data sorted by timestamp
+# Partition: f_dkey=A
+statement ok
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 95.5),
+    (TIMESTAMP '2023-01-01T09:00:10', 102.3),
+    (TIMESTAMP '2023-01-01T09:00:20', 98.7),
+    (TIMESTAMP '2023-01-01T09:12:20', 105.1),
+    (TIMESTAMP '2023-01-01T09:12:30', 100.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 150.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 120.8)
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet'
+STORED AS PARQUET;
+
+# Partition: f_dkey=B
+statement ok
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 75.2),
+    (TIMESTAMP '2023-01-01T09:00:10', 82.4),
+    (TIMESTAMP '2023-01-01T09:00:20', 78.9),
+    (TIMESTAMP '2023-01-01T09:00:30', 85.6),
+    (TIMESTAMP '2023-01-01T09:12:30', 80.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 120.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 92.3)
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet'
+STORED AS PARQUET;
+
+# Partition: f_dkey=C
+statement ok
+COPY (SELECT column1 as timestamp, column2 as value FROM (VALUES
+    (TIMESTAMP '2023-01-01T09:00:00', 300.5),
+    (TIMESTAMP '2023-01-01T09:00:10', 285.7),
+    (TIMESTAMP '2023-01-01T09:00:20', 310.2),
+    (TIMESTAMP '2023-01-01T09:00:30', 295.8),
+    (TIMESTAMP '2023-01-01T09:00:40', 300.0),
+    (TIMESTAMP '2023-01-01T09:12:40', 250.0),
+    (TIMESTAMP '2023-01-01T09:12:50', 275.4)
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet'
+STORED AS PARQUET;
+
+# Create dimension table partitioned by d_dkey (4 partitions)
+query I
+COPY (SELECT column1 as env, column2 as service, column3 as host FROM (VALUES
+    ('dev', 'log', 'ma')
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=A/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as env, column2 as service, column3 as host FROM (VALUES
+    ('prod', 'log', 'ma')
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=B/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as env, column2 as service, column3 as host FROM (VALUES
+    ('prod', 'log', 'vim')
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=C/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+query I
+COPY (SELECT column1 as env, column2 as service, column3 as host FROM (VALUES
+    ('prod', 'trace', 'vim')
+))
+TO 'test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=D/data.parquet'
+STORED AS PARQUET;
+----
+1
+
+##########
+# TABLE DECLARATIONS
+##########
+
+# Fact table with ordering
+statement ok
+CREATE EXTERNAL TABLE fact_table_ordered (timestamp TIMESTAMP, value DOUBLE)
+STORED AS PARQUET
+PARTITIONED BY (f_dkey STRING)
+WITH ORDER (f_dkey ASC, timestamp ASC)
+LOCATION 'test_files/scratch/repartition_subset_satisfaction/fact/';
+
+# Dimension table (for join tests)
+statement ok
+CREATE EXTERNAL TABLE dimension_table (env STRING, service STRING, host STRING)
+STORED AS PARQUET
+PARTITIONED BY (d_dkey STRING)
+LOCATION 'test_files/scratch/repartition_subset_satisfaction/dimension/';
+
+##########
+# TEST 1: Basic Aggregate with Subset Partitioning
+# Demonstrates that GROUP BY [f_dkey, time_bin] can use
+# file partitioning on just [f_dkey]
+##########
+
+# With subset repartitioning forced (disables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 4;
+
+query TT
+EXPLAIN SELECT f_dkey, date_bin(INTERVAL '30 seconds', timestamp) as time_bin,
+       COUNT(*), AVG(value)
+FROM fact_table_ordered
+GROUP BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+ORDER BY f_dkey, time_bin;
+----
+logical_plan
+01)Sort: fact_table_ordered.f_dkey ASC NULLS LAST, time_bin ASC NULLS LAST
+02)--Projection: fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp) AS time_bin, count(Int64(1)) AS count(*), avg(fact_table_ordered.value)
+03)----Aggregate: groupBy=[[fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), fact_table_ordered.timestamp)]], aggr=[[count(Int64(1)), avg(fact_table_ordered.value)]]
+04)------TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)@1 as time_bin, count(Int64(1))@2 as count(*), avg(fact_table_ordered.value)@3 as avg(fact_table_ordered.value)]
+03)----AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)@1 as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+04)------SortExec: expr=[f_dkey@0 ASC NULLS LAST, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
+05)--------RepartitionExec: partitioning=Hash([f_dkey@0, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)@1], 3), input_partitions=3
+06)----------AggregateExec: mode=Partial, gby=[f_dkey@2 as f_dkey, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@0) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+07)------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+# Verify results without subset satisfaction
+query TPIR rowsort
+SELECT f_dkey, date_bin(INTERVAL '30 seconds', timestamp) as time_bin,
+       COUNT(*), AVG(value)
+FROM fact_table_ordered
+GROUP BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+ORDER BY f_dkey, time_bin;
+----
+A 2023-01-01T09:00:00 3 98.833333333333
+A 2023-01-01T09:12:00 1 105.1
+A 2023-01-01T09:12:30 3 123.6
+B 2023-01-01T09:00:00 3 78.833333333333
+B 2023-01-01T09:00:30 1 85.6
+B 2023-01-01T09:12:30 3 97.433333333333
+C 2023-01-01T09:00:00 3 298.8
+C 2023-01-01T09:00:30 2 297.9
+C 2023-01-01T09:12:30 2 262.7
+
+# With subset logic enabled (default - enables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 1;
+
+query TT
+EXPLAIN SELECT f_dkey, date_bin(INTERVAL '30 seconds', timestamp) as time_bin,
+       COUNT(*), AVG(value)
+FROM fact_table_ordered
+GROUP BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+ORDER BY f_dkey, time_bin;
+----
+logical_plan
+01)Sort: fact_table_ordered.f_dkey ASC NULLS LAST, time_bin ASC NULLS LAST
+02)--Projection: fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp) AS time_bin, count(Int64(1)) AS count(*), avg(fact_table_ordered.value)
+03)----Aggregate: groupBy=[[fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), fact_table_ordered.timestamp)]], aggr=[[count(Int64(1)), avg(fact_table_ordered.value)]]
+04)------TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [f_dkey@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST]
+02)--ProjectionExec: expr=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)@1 as time_bin, count(Int64(1))@2 as count(*), avg(fact_table_ordered.value)@3 as avg(fact_table_ordered.value)]
+03)----AggregateExec: mode=SinglePartitioned, gby=[f_dkey@2 as f_dkey, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@0) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)], aggr=[count(Int64(1)), avg(fact_table_ordered.value)], ordering_mode=Sorted
+04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+# Verify results match with subset satisfaction
+query TPIR rowsort
+SELECT f_dkey, date_bin(INTERVAL '30 seconds', timestamp) as time_bin,
+       COUNT(*), AVG(value)
+FROM fact_table_ordered
+GROUP BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+ORDER BY f_dkey, time_bin;
+----
+A 2023-01-01T09:00:00 3 98.833333333333
+A 2023-01-01T09:12:00 1 105.1
+A 2023-01-01T09:12:30 3 123.6
+B 2023-01-01T09:00:00 3 78.833333333333
+B 2023-01-01T09:00:30 1 85.6
+B 2023-01-01T09:12:30 3 97.433333333333
+C 2023-01-01T09:00:00 3 298.8
+C 2023-01-01T09:00:30 2 297.9
+C 2023-01-01T09:12:30 2 262.7
+
+##########
+# TEST 2: Window Functions with Subset Partitioning
+# Demonstrates that PARTITION BY [f_dkey, time_bin] can use
+# file partitioning on just [f_dkey]
+##########
+
+# With subset repartitioning forced (disables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 4;
+
+query TT
+EXPLAIN SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (
+           PARTITION BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+           ORDER BY timestamp
+       ) as rn
+FROM fact_table_ordered;
+----
+logical_plan
+01)Projection: fact_table_ordered.f_dkey, fact_table_ordered.timestamp, fact_table_ordered.value, row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
+02)--WindowAggr: windowExpr=[[row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@2 as f_dkey, timestamp@0 as timestamp, value@1 as value, row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rn]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }\"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----SortExec: expr=[f_dkey@2 ASC NULLS LAST, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@0) ASC NULLS LAST, timestamp@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------RepartitionExec: partitioning=Hash([f_dkey@2, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@0)], 3), input_partitions=3
+05)--------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+# Verify results without subset satisfaction
+query TPRI rowsort
+SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (
+           PARTITION BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+           ORDER BY timestamp
+       ) as rn
+FROM fact_table_ordered
+WHERE timestamp < TIMESTAMP '2023-01-01T09:00:30';
+----
+A 2023-01-01T09:00:00 95.5 1
+A 2023-01-01T09:00:10 102.3 2
+A 2023-01-01T09:00:20 98.7 3
+B 2023-01-01T09:00:00 75.2 1
+B 2023-01-01T09:00:10 82.4 2
+B 2023-01-01T09:00:20 78.9 3
+C 2023-01-01T09:00:00 300.5 1
+C 2023-01-01T09:00:10 285.7 2
+C 2023-01-01T09:00:20 310.2 3
+
+# With subset logic enabled (default - enables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 1;
+
+query TT
+EXPLAIN SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (
+           PARTITION BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+           ORDER BY timestamp
+       ) as rn
+FROM fact_table_ordered;
+----
+logical_plan
+01)Projection: fact_table_ordered.f_dkey, fact_table_ordered.timestamp, fact_table_ordered.value, row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn
+02)--WindowAggr: windowExpr=[[row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)ProjectionExec: expr=[f_dkey@2 as f_dkey, timestamp@0 as timestamp, value@1 as value, row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rn]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [fact_table_ordered.f_dkey, date_bin(IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }\"),fact_table_ordered.timestamp)] ORDER BY [fact_table_ordered.timestamp ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet
+
+# Verify results match with subset satisfaction
+query TPRI rowsort
+SELECT f_dkey, timestamp, value,
+       ROW_NUMBER() OVER (
+           PARTITION BY f_dkey, date_bin(INTERVAL '30 seconds', timestamp)
+           ORDER BY timestamp
+       ) as rn
+FROM fact_table_ordered
+WHERE timestamp < TIMESTAMP '2023-01-01T09:00:30';
+----
+A 2023-01-01T09:00:00 95.5 1
+A 2023-01-01T09:00:10 102.3 2
+A 2023-01-01T09:00:20 98.7 3
+B 2023-01-01T09:00:00 75.2 1
+B 2023-01-01T09:00:10 82.4 2
+B 2023-01-01T09:00:20 78.9 3
+C 2023-01-01T09:00:00 300.5 1
+C 2023-01-01T09:00:10 285.7 2
+C 2023-01-01T09:00:20 310.2 3
+
+##########
+# TEST 3: Complex Join and Aggregate with Subset Partitioning
+# Demonstrates subset partitioning with joins and nested aggregations
+##########
+
+# With subset repartitioning forced (disables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 4;
+
+query TT
+EXPLAIN SELECT env, time_bin, AVG(max_bin_value) AS avg_max_value
+FROM
+(
+    SELECT  f_dkey,
+            date_bin(INTERVAL '30 seconds', timestamp) AS time_bin,
+            env,
+            MAX(value) AS max_bin_value
+    FROM
+        (
+        SELECT
+            f.f_dkey,
+            d.env,
+            d.service,
+            d.host,
+            f.timestamp,
+            f.value
+        FROM dimension_table d
+        INNER JOIN fact_table_ordered f ON d.d_dkey = f.f_dkey
+        WHERE service = 'log'
+        ) AS j
+    GROUP BY f_dkey, time_bin, env
+) AS a
+GROUP BY env, time_bin
+ORDER BY env, time_bin;
+----
+logical_plan
+01)Sort: a.env ASC NULLS LAST, a.time_bin ASC NULLS LAST
+02)--Projection: a.env, a.time_bin, avg(a.max_bin_value) AS avg_max_value
+03)----Aggregate: groupBy=[[a.env, a.time_bin]], aggr=[[avg(a.max_bin_value)]]
+04)------SubqueryAlias: a
+05)--------Projection: date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp) AS time_bin, j.env, max(j.value) AS max_bin_value
+06)----------Aggregate: groupBy=[[j.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), j.timestamp), j.env]], aggr=[[max(j.value)]]
+07)------------SubqueryAlias: j
+08)--------------Projection: f.f_dkey, d.env, f.timestamp, f.value
+09)----------------Inner Join: d.d_dkey = f.f_dkey
+10)------------------SubqueryAlias: d
+11)--------------------Projection: dimension_table.env, dimension_table.d_dkey
+12)----------------------Filter: dimension_table.service = Utf8View("log")
+13)------------------------TableScan: dimension_table projection=[env, service, d_dkey], partial_filters=[dimension_table.service = Utf8View("log")]
+14)------------------SubqueryAlias: f
+15)--------------------TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [env@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST]
+02)--SortExec: expr=[env@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[env@0 as env, time_bin@1 as time_bin, avg(a.max_bin_value)@2 as avg_max_value]
+04)------AggregateExec: mode=FinalPartitioned, gby=[env@0 as env, time_bin@1 as time_bin], aggr=[avg(a.max_bin_value)]
+05)--------RepartitionExec: partitioning=Hash([env@0, time_bin@1], 3), input_partitions=3
+06)----------AggregateExec: mode=Partial, gby=[env@1 as env, time_bin@0 as time_bin], aggr=[avg(a.max_bin_value)]
+07)------------ProjectionExec: expr=[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp)@1 as time_bin, env@2 as env, max(j.value)@3 as max_bin_value]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp)@1 as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp), env@2 as env], aggr=[max(j.value)], ordering_mode=PartiallySorted([0, 1])
+09)----------------SortExec: expr=[f_dkey@0 ASC NULLS LAST, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
+10)------------------RepartitionExec: partitioning=Hash([f_dkey@0, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp)@1, env@2], 3), input_partitions=3
+11)--------------------AggregateExec: mode=Partial, gby=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@2) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp), env@1 as env], aggr=[max(j.value)], ordering_mode=PartiallySorted([0, 1])
+12)----------------------ProjectionExec: expr=[f_dkey@3 as f_dkey, env@0 as env, timestamp@1 as timestamp, value@2 as value]
+13)------------------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d_dkey@1, f_dkey@2)], projection=[env@0, timestamp@2, value@3, f_dkey@4]
+14)--------------------------CoalescePartitionsExec
+15)----------------------------FilterExec: service@1 = log, projection=[env@0, d_dkey@2]
+16)------------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=C/data.parquet]]}, projection=[env, service, d_dkey], file_type=parquet, predicate=service@1 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)]
+17)--------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Verify results without subset satisfaction
+query TPR rowsort
+SELECT env, time_bin, AVG(max_bin_value) AS avg_max_value
+FROM
+(
+    SELECT  f_dkey,
+            date_bin(INTERVAL '30 seconds', timestamp) AS time_bin,
+            env,
+            MAX(value) AS max_bin_value
+    FROM
+        (
+        SELECT
+            f.f_dkey,
+            d.env,
+            d.service,
+            d.host,
+            f.timestamp,
+            f.value
+        FROM dimension_table d
+        INNER JOIN fact_table_ordered f ON d.d_dkey = f.f_dkey
+        WHERE service = 'log'
+        ) AS j
+    GROUP BY f_dkey, time_bin, env
+) AS a
+GROUP BY env, time_bin
+ORDER BY env, time_bin;
+----
+dev 2023-01-01T09:00:00 102.3
+dev 2023-01-01T09:12:00 105.1
+dev 2023-01-01T09:12:30 150
+prod 2023-01-01T09:00:00 196.3
+prod 2023-01-01T09:00:30 192.8
+prod 2023-01-01T09:12:30 197.7
+
+# With subset logic enabled (default - enables subset optimization)
+statement ok
+set datafusion.optimizer.subset_repartition_threshold = 1;
+
+query TT
+EXPLAIN SELECT env, time_bin, AVG(max_bin_value) AS avg_max_value
+FROM
+(
+    SELECT  f_dkey,
+            date_bin(INTERVAL '30 seconds', timestamp) AS time_bin,
+            env,
+            MAX(value) AS max_bin_value
+    FROM
+        (
+        SELECT
+            f.f_dkey,
+            d.env,
+            d.service,
+            d.host,
+            f.timestamp,
+            f.value
+        FROM dimension_table d
+        INNER JOIN fact_table_ordered f ON d.d_dkey = f.f_dkey
+        WHERE service = 'log'
+        ) AS j
+    GROUP BY f_dkey, time_bin, env
+) AS a
+GROUP BY env, time_bin
+ORDER BY env, time_bin;
+----
+logical_plan
+01)Sort: a.env ASC NULLS LAST, a.time_bin ASC NULLS LAST
+02)--Projection: a.env, a.time_bin, avg(a.max_bin_value) AS avg_max_value
+03)----Aggregate: groupBy=[[a.env, a.time_bin]], aggr=[[avg(a.max_bin_value)]]
+04)------SubqueryAlias: a
+05)--------Projection: date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp) AS time_bin, j.env, max(j.value) AS max_bin_value
+06)----------Aggregate: groupBy=[[j.f_dkey, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"), j.timestamp), j.env]], aggr=[[max(j.value)]]
+07)------------SubqueryAlias: j
+08)--------------Projection: f.f_dkey, d.env, f.timestamp, f.value
+09)----------------Inner Join: d.d_dkey = f.f_dkey
+10)------------------SubqueryAlias: d
+11)--------------------Projection: dimension_table.env, dimension_table.d_dkey
+12)----------------------Filter: dimension_table.service = Utf8View("log")
+13)------------------------TableScan: dimension_table projection=[env, service, d_dkey], partial_filters=[dimension_table.service = Utf8View("log")]
+14)------------------SubqueryAlias: f
+15)--------------------TableScan: fact_table_ordered projection=[timestamp, value, f_dkey]
+physical_plan
+01)SortPreservingMergeExec: [env@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST]
+02)--SortExec: expr=[env@0 ASC NULLS LAST, time_bin@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[env@0 as env, time_bin@1 as time_bin, avg(a.max_bin_value)@2 as avg_max_value]
+04)------AggregateExec: mode=FinalPartitioned, gby=[env@0 as env, time_bin@1 as time_bin], aggr=[avg(a.max_bin_value)]
+05)--------RepartitionExec: partitioning=Hash([env@0, time_bin@1], 3), input_partitions=3
+06)----------AggregateExec: mode=Partial, gby=[env@1 as env, time_bin@0 as time_bin], aggr=[avg(a.max_bin_value)]
+07)------------ProjectionExec: expr=[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp)@1 as time_bin, env@2 as env, max(j.value)@3 as max_bin_value]
+08)--------------AggregateExec: mode=SinglePartitioned, gby=[f_dkey@0 as f_dkey, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }, timestamp@2) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 30000000000 }"),j.timestamp), env@1 as env], aggr=[max(j.value)], ordering_mode=PartiallySorted([0, 1])
+09)----------------ProjectionExec: expr=[f_dkey@3 as f_dkey, env@0 as env, timestamp@1 as timestamp, value@2 as value]
+10)------------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d_dkey@1, f_dkey@2)], projection=[env@0, timestamp@2, value@3, f_dkey@4]
+11)--------------------CoalescePartitionsExec
+12)----------------------FilterExec: service@1 = log, projection=[env@0, d_dkey@2]
+13)------------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=A/data.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=D/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/dimension/d_dkey=C/data.parquet]]}, projection=[env, service, d_dkey], file_type=parquet, predicate=service@1 = log, pruning_predicate=service_null_count@2 != row_count@3 AND service_min@0 <= log AND log <= service_max@1, required_guarantees=[service in (log)]
+14)--------------------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=A/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=B/data.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_subset_satisfaction/fact/f_dkey=C/data.parquet]]}, projection=[timestamp, value, f_dkey], output_ordering=[f_dkey@2 ASC NULLS LAST, timestamp@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Verify results match with subset satisfaction
+query TPR rowsort
+SELECT env, time_bin, AVG(max_bin_value) AS avg_max_value
+FROM
+(
+    SELECT  f_dkey,
+            date_bin(INTERVAL '30 seconds', timestamp) AS time_bin,
+            env,
+            MAX(value) AS max_bin_value
+    FROM
+        (
+        SELECT
+            f.f_dkey,
+            d.env,
+            d.service,
+            d.host,
+            f.timestamp,
+            f.value
+        FROM dimension_table d
+        INNER JOIN fact_table_ordered f ON d.d_dkey = f.f_dkey
+        WHERE service = 'log'
+        ) AS j
+    GROUP BY f_dkey, time_bin, env
+) AS a
+GROUP BY env, time_bin
+ORDER BY env, time_bin;
+----
+dev 2023-01-01T09:00:00 102.3
+dev 2023-01-01T09:12:00 105.1
+dev 2023-01-01T09:12:30 150
+prod 2023-01-01T09:00:00 196.3
+prod 2023-01-01T09:00:30 192.8
+prod 2023-01-01T09:12:30 197.7
+
+##########
+# CLEANUP
+##########
+
+statement ok
+DROP TABLE fact_table_ordered;
+
+statement ok
+DROP TABLE dimension_table;
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index faa0d69ae84b9..7be7de5a4def8 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -317,6 +317,54 @@ select ceil(100.1234, 1)
 query error DataFusion error: This feature is not implemented: CEIL with datetime is not supported
 select ceil(100.1234 to year)
 
+# ceil with decimal argument
+query RRRR
+select
+  ceil(arrow_cast(1.23,'Decimal128(10,2)')),
+  ceil(arrow_cast(-1.23,'Decimal128(10,2)')),
+  ceil(arrow_cast(123.00,'Decimal128(10,2)')),
+  ceil(arrow_cast(-123.00,'Decimal128(10,2)'));
+----
+2 -1 123 -123
+
+# ceil overflow with limited precision
+query error Decimal overflow while applying ceil
+select ceil(arrow_cast(9.23,'Decimal128(3,2)'));
+
+# ceil with decimal32 argument (ensure decimal output)
+query TTTTTTTT
+select
+  arrow_typeof(ceil(arrow_cast(9.01,'Decimal32(7,2)'))),
+  arrow_cast(ceil(arrow_cast(9.01,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(ceil(arrow_cast(-9.01,'Decimal32(7,2)'))),
+  arrow_cast(ceil(arrow_cast(-9.01,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(ceil(arrow_cast(10.00,'Decimal32(7,2)'))),
+  arrow_cast(ceil(arrow_cast(10.00,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(ceil(arrow_cast(-0.99,'Decimal32(7,2)'))),
+  arrow_cast(ceil(arrow_cast(-0.99,'Decimal32(7,2)')), 'Utf8');
+----
+Decimal32(7, 2) 10.00 Decimal32(7, 2) -9.00 Decimal32(7, 2) 10.00 Decimal32(7, 2) 0.00
+
+# ceil with decimal64 zero scale
+query TTTT
+select
+  arrow_typeof(ceil(arrow_cast(123456789,'Decimal64(18,0)'))),
+  arrow_cast(ceil(arrow_cast(123456789,'Decimal64(18,0)')), 'Utf8'),
+  arrow_typeof(ceil(arrow_cast(-987654321,'Decimal64(18,0)'))),
+  arrow_cast(ceil(arrow_cast(-987654321,'Decimal64(18,0)')), 'Utf8');
+----
+Decimal64(18, 0) 123456789 Decimal64(18, 0) -987654321
+
+# ceil with decimal256 argument
+query TTTT
+select
+  arrow_typeof(ceil(arrow_cast('9999999999999999999999999999999999.01','Decimal256(38,2)'))),
+  arrow_cast(ceil(arrow_cast('9999999999999999999999999999999999.01','Decimal256(38,2)')), 'Utf8'),
+  arrow_typeof(ceil(arrow_cast('-9999999999999999999999999999999999.01','Decimal256(38,2)'))),
+  arrow_cast(ceil(arrow_cast('-9999999999999999999999999999999999.01','Decimal256(38,2)')), 'Utf8');
+----
+Decimal256(38, 2) 10000000000000000000000000000000000.00 Decimal256(38, 2) -9999999999999999999999999999999999.00
+
 ## degrees
 
 # degrees scalar function
@@ -464,6 +512,54 @@ select floor(a, 1)
 query error DataFusion error: This feature is not implemented: FLOOR with datetime is not supported
 select floor(a to year)
 
+# floor with decimal argument
+query RRRR
+select
+  floor(arrow_cast(1.23,'Decimal128(10,2)')),
+  floor(arrow_cast(-1.23,'Decimal128(10,2)')),
+  floor(arrow_cast(123.00,'Decimal128(10,2)')),
+  floor(arrow_cast(-123.00,'Decimal128(10,2)'));
+----
+1 -2 123 -123
+
+# floor overflow with limited precision
+query error Decimal overflow while applying floor
+select floor(arrow_cast(-9.23,'Decimal128(3,2)'));
+
+# floor with decimal32 argument (ensure decimal output)
+query TTTTTTTT
+select
+  arrow_typeof(floor(arrow_cast(9.99,'Decimal32(7,2)'))),
+  arrow_cast(floor(arrow_cast(9.99,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(floor(arrow_cast(-9.01,'Decimal32(7,2)'))),
+  arrow_cast(floor(arrow_cast(-9.01,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(floor(arrow_cast(10.00,'Decimal32(7,2)'))),
+  arrow_cast(floor(arrow_cast(10.00,'Decimal32(7,2)')), 'Utf8'),
+  arrow_typeof(floor(arrow_cast(-0.01,'Decimal32(7,2)'))),
+  arrow_cast(floor(arrow_cast(-0.01,'Decimal32(7,2)')), 'Utf8');
+----
+Decimal32(7, 2) 9.00 Decimal32(7, 2) -10.00 Decimal32(7, 2) 10.00 Decimal32(7, 2) -1.00
+
+# floor with decimal64 zero scale
+query TTTT
+select
+  arrow_typeof(floor(arrow_cast(123456789,'Decimal64(18,0)'))),
+  arrow_cast(floor(arrow_cast(123456789,'Decimal64(18,0)')), 'Utf8'),
+  arrow_typeof(floor(arrow_cast(-987654321,'Decimal64(18,0)'))),
+  arrow_cast(floor(arrow_cast(-987654321,'Decimal64(18,0)')), 'Utf8');
+----
+Decimal64(18, 0) 123456789 Decimal64(18, 0) -987654321
+
+# floor with decimal256 argument
+query TTTT
+select
+  arrow_typeof(floor(arrow_cast('9999999999999999999999999999999999.99','Decimal256(38,2)'))),
+  arrow_cast(floor(arrow_cast('9999999999999999999999999999999999.99','Decimal256(38,2)')), 'Utf8'),
+  arrow_typeof(floor(arrow_cast('-9999999999999999999999999999999999.99','Decimal256(38,2)'))),
+  arrow_cast(floor(arrow_cast('-9999999999999999999999999999999999.99','Decimal256(38,2)')), 'Utf8');
+----
+Decimal256(38, 2) 9999999999999999999999999999999999.00 Decimal256(38, 2) -10000000000000000000000000000000000.00
+
 ## ln
 
 # ln scalar function
@@ -746,26 +842,26 @@ select pi(), pi() / 2, pi() / 3;
 
 ## power
 
-# power scalar function
-query III rowsort
+# power scalar function (always returns Float64, like PostgreSQL)
+query RRR rowsort
 select power(2, 0), power(2, 1), power(2, 2);
 ----
 1 2 4
 
 # power scalar nulls
-query I rowsort
+query R rowsort
 select power(null, 64);
 ----
 NULL
 
 # power scalar nulls #1
-query I rowsort
+query R rowsort
 select power(2, null);
 ----
 NULL
 
 # power scalar nulls #2
-query I rowsort
+query R rowsort
 select power(null, null);
 ----
 NULL
@@ -827,13 +923,43 @@ select round(a), round(b), round(c) from small_floats;
 
 # round with too large
 #  max Int32 is 2147483647
-query error DataFusion error: Execution error: Invalid values for decimal places: Cast error: Can't cast value 2147483648 to type Int32
+query error Arrow error: Cast error: Can't cast value 2147483648 to type Int32
 select round(3.14, 2147483648);
 
 # with array
-query error DataFusion error: Execution error: Invalid values for decimal places: Cast error: Can't cast value 2147483649 to type Int32
+query error Arrow error: Cast error: Can't cast value 2147483649 to type Int32
 select round(column1, column2) from values (3.14, 2), (3.14, 3), (3.14, 2147483649);
 
+# round decimal should not cast to float
+query TR
+select arrow_typeof(round('173975140545.855'::decimal(38,10), 2)),
+       round('173975140545.855'::decimal(38,10), 2);
+----
+Decimal128(38, 10) 173975140545.86
+
+# round decimal ties away from zero
+query RRRR
+select round('1.5'::decimal(2,1), 0),
+       round('-1.5'::decimal(2,1), 0),
+       round('2.5'::decimal(2,1), 0),
+       round('-2.5'::decimal(2,1), 0);
+----
+2 -2 3 -3
+
+# round decimal negative places (left of decimal)
+query TR
+select arrow_typeof(round('12345.55'::decimal(10,2), -1)),
+       round('12345.55'::decimal(10,2), -1);
+----
+Decimal128(10, 2) 12350
+
+# round decimal256 keeps decimals
+query TR
+select arrow_typeof(round('1234.5678'::decimal(50,4), 2)),
+       round('1234.5678'::decimal(50,4), 2);
+----
+Decimal256(50, 4) 1234.57
+
 
 ## signum
 
@@ -1775,7 +1901,7 @@ CREATE TABLE test(
 (-14, -14, -14.5, -14.5),
 (NULL, NULL, NULL, NULL);
 
-query IRRRIR rowsort
+query RRRRRR rowsort
 SELECT power(i32, exp_i) as power_i32,
        power(i64, exp_f) as power_i64,
        pow(f32, exp_i) as power_f32,
@@ -1883,7 +2009,7 @@ D false
 
 # test string_temporal_coercion
 query BBBBBBBBBB
-select 
+select
   arrow_cast(to_timestamp('2020-01-01 01:01:11.1234567890Z'), 'Timestamp(Second, None)') == '2020-01-01T01:01:11',
   arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)') == arrow_cast('2020-01-02T01:01:11', 'LargeUtf8'),
   arrow_cast(to_timestamp('2020-01-03 01:01:11.1234567890Z'), 'Time32(Second)') == '01:01:11',
diff --git a/datafusion/sqllogictest/test_files/schema_evolution.slt b/datafusion/sqllogictest/test_files/schema_evolution.slt
index 5572c4a5ffef3..e29aa14f13e92 100644
--- a/datafusion/sqllogictest/test_files/schema_evolution.slt
+++ b/datafusion/sqllogictest/test_files/schema_evolution.slt
@@ -138,3 +138,147 @@ select * from parquet_table where c > 11.0;
 ----
 bzz 300 13.7
 foo 200 12.6
+
+##########
+# Projection tests - selecting subset of columns
+# These tests verify column reordering and projection work correctly
+# with schema evolution (addresses E2E column reordering concern)
+##########
+
+# Select only column a
+query T rowsort
+select a from parquet_table;
+----
+NULL
+bzz
+foo
+foo
+foo
+foo
+foo
+foo
+foo
+
+# Select columns in different order than table schema (c, a instead of a, b, c)
+query RT rowsort
+select c, a from parquet_table;
+----
+10.5 foo
+12.6 foo
+13.7 bzz
+NULL NULL
+NULL foo
+NULL foo
+NULL foo
+NULL foo
+NULL foo
+
+# Select single column that's missing in some files
+query I rowsort
+select b from parquet_table;
+----
+1
+10
+100
+2
+200
+3
+300
+NULL
+NULL
+
+##########
+# Projection with filter tests
+##########
+
+# Projection with equality filter
+query TI rowsort
+select a, b from parquet_table where a = 'foo';
+----
+foo 1
+foo 100
+foo 2
+foo 200
+foo 3
+foo NULL
+foo NULL
+
+# Projection with range filter on projected column
+query IR rowsort
+select b, c from parquet_table where b > 5;
+----
+10 NULL
+100 10.5
+200 12.6
+300 13.7
+
+# Projection excluding filtered column (filter on c, project a)
+query T rowsort
+select a from parquet_table where c > 11.0;
+----
+bzz
+foo
+
+##########
+# Complex filter tests - OR combinations and IS NOT NULL
+##########
+
+# OR combination
+query TIR rowsort
+select * from parquet_table where a = 'foo' OR b > 100;
+----
+bzz 300 13.7
+foo 1 NULL
+foo 100 10.5
+foo 2 NULL
+foo 200 12.6
+foo 3 NULL
+foo NULL NULL
+foo NULL NULL
+
+# IS NOT NULL on column a
+query TIR rowsort
+select * from parquet_table where a IS NOT NULL;
+----
+bzz 300 13.7
+foo 1 NULL
+foo 100 10.5
+foo 2 NULL
+foo 200 12.6
+foo 3 NULL
+foo NULL NULL
+foo NULL NULL
+
+# IS NOT NULL on column c (missing in most files)
+query TIR rowsort
+select * from parquet_table where c IS NOT NULL;
+----
+bzz 300 13.7
+foo 100 10.5
+foo 200 12.6
+
+# Combined conditions with NULL checks
+query TIR rowsort
+select * from parquet_table where a IS NULL OR (b IS NOT NULL AND b > 5);
+----
+NULL 10 NULL
+bzz 300 13.7
+foo 100 10.5
+foo 200 12.6
+
+##########
+# Multi-column predicates
+##########
+
+# AND across columns with different availability
+query TIR rowsort
+select * from parquet_table where a = 'foo' AND b > 50;
+----
+foo 100 10.5
+foo 200 12.6
+
+# Filter on multiple columns from reordered file (File4 has b, a, c order)
+query TIR rowsort
+select * from parquet_table where b = 100 AND c = 10.5;
+----
+foo 100 10.5
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 5c684eb83d1a0..490df4b72d17b 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1404,7 +1404,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b]
-03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # since query below doesn't computation
@@ -1420,9 +1420,7 @@ logical_plan
 01)Sort: annotated_data_finite2.a ASC NULLS LAST
 02)--Projection: annotated_data_finite2.a, annotated_data_finite2.b, Int64(2)
 03)----TableScan: annotated_data_finite2 projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, 2 as Int64(2)], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is constant for column a
@@ -1440,10 +1438,9 @@ logical_plan
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0)]
 physical_plan
 01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: a@1 = 0
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--FilterExec: a@1 = 0
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is constant for column a and b
@@ -1461,10 +1458,9 @@ logical_plan
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)]
 physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: a@1 = 0 AND b@2 = 0
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--FilterExec: a@1 = 0 AND b@2 = 0
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is constant for column a and b
@@ -1482,10 +1478,9 @@ logical_plan
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)]
 physical_plan
 01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: a@1 = 0 AND b@2 = 0
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--FilterExec: a@1 = 0 AND b@2 = 0
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is constant for column a and b
@@ -1503,10 +1498,9 @@ logical_plan
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)]
 physical_plan
 01)SortPreservingMergeExec: [a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: a@1 = 0 AND b@2 = 0
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--FilterExec: a@1 = 0 AND b@2 = 0
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is when filter contains or
@@ -1525,10 +1519,9 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
 02)--SortExec: expr=[c@3 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------FilterExec: a@1 = 0 OR b@2 = 0
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+03)----FilterExec: a@1 = 0 OR b@2 = 0
+04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # When ordering lost during projection, we shouldn't keep the SortExec.
 # in the final physical plan.
@@ -1550,13 +1543,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c2@0 as c2, count(Int64(1))@1 as count(*)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[count(Int64(1))]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([c2@0], 2), input_partitions=2
-05)--------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[count(Int64(1))]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------ProjectionExec: expr=[c2@0 as c2]
-08)--------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c1], file_type=csv, has_header=true
+03)----RepartitionExec: partitioning=Hash([c2@0], 2), input_partitions=2
+04)------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------ProjectionExec: expr=[c2@0 as c2]
+07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c1], file_type=csv, has_header=true
 
 # FilterExec can track equality of non-column expressions.
 # plan below shouldn't have a SortExec because given column 'a' is ordered.
@@ -1573,10 +1565,9 @@ logical_plan
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[CAST(round(CAST(annotated_data_finite2.b AS Float64)) AS Int32) = annotated_data_finite2.a]
 physical_plan
 01)SortPreservingMergeExec: [CAST(round(CAST(b@2 AS Float64)) AS Int32) ASC NULLS LAST]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----FilterExec: CAST(round(CAST(b@2 AS Float64)) AS Int32) = a@1
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--FilterExec: CAST(round(CAST(b@2 AS Float64)) AS Int32) = a@1
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt
index bb4ac920d0327..8957404799b73 100644
--- a/datafusion/sqllogictest/test_files/set_variable.slt
+++ b/datafusion/sqllogictest/test_files/set_variable.slt
@@ -244,3 +244,188 @@ SET TIME ZONE = 'Asia/Taipei2'
 
 statement error Arrow error: Parser error: Invalid timezone "Asia/Taipei2": failed to parse timezone
 SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ
+
+# reset variable restores default
+statement ok
+set datafusion.catalog.information_schema = true
+
+statement ok
+SET datafusion.execution.batch_size = 1024
+
+query TT
+SHOW datafusion.execution.batch_size
+----
+datafusion.execution.batch_size 1024
+
+statement ok
+RESET datafusion.execution.batch_size
+
+query TT
+SHOW datafusion.execution.batch_size
+----
+datafusion.execution.batch_size 8192
+
+# reset variable with NULL default
+statement ok
+set datafusion.catalog.information_schema = true
+
+statement ok
+SET datafusion.execution.parquet.max_predicate_cache_size = '123'
+
+query TT
+SHOW datafusion.execution.parquet.max_predicate_cache_size
+----
+datafusion.execution.parquet.max_predicate_cache_size 123
+
+statement ok
+RESET datafusion.execution.parquet.max_predicate_cache_size
+
+query TT
+SHOW datafusion.execution.parquet.max_predicate_cache_size
+----
+datafusion.execution.parquet.max_predicate_cache_size NULL
+
+# reset time zone via aliases
+statement ok
+set datafusion.catalog.information_schema = true
+
+statement ok
+SET TIMEZONE = '-03:00'
+
+statement ok
+RESET TIMEZONE
+
+query TT
+SHOW TIMEZONE
+----
+datafusion.execution.time_zone NULL
+
+statement ok
+SET TIME ZONE = '+09:00'
+
+statement ok
+RESET timezone
+
+query TT
+SHOW TIME ZONE
+----
+datafusion.execution.time_zone NULL
+
+# reset runtime variables
+statement ok
+SET datafusion.runtime.memory_limit = '1M'
+
+statement ok
+RESET datafusion.runtime.memory_limit
+
+statement ok
+SET datafusion.runtime.max_temp_directory_size = '1M'
+
+statement ok
+RESET datafusion.runtime.max_temp_directory_size
+
+statement ok
+SET datafusion.runtime.metadata_cache_limit = '1M'
+
+statement ok
+RESET datafusion.runtime.metadata_cache_limit
+
+statement ok
+SET datafusion.runtime.temp_directory = './'
+
+statement ok
+RESET datafusion.runtime.temp_directory
+
+# test memory limit effect
+statement ok
+SET datafusion.runtime.memory_limit = '1K'
+
+# This query should fail with low memory
+statement error Not enough memory to continue external sort
+EXPLAIN ANALYZE SELECT * FROM generate_series(1, 1000) AS t1(v1) ORDER BY v1
+
+statement ok
+RESET datafusion.runtime.memory_limit
+
+# This query should succeed after resetting memory limit
+statement ok
+EXPLAIN ANALYZE SELECT * FROM generate_series(1, 1000) AS t1(v1) ORDER BY v1
+
+statement ok
+SET datafusion.runtime.list_files_cache_limit = '1K'
+
+statement ok
+RESET datafusion.runtime.list_files_cache_limit
+
+statement ok
+SET datafusion.runtime.list_files_cache_ttl = '1m'
+
+statement ok
+RESET datafusion.runtime.list_files_cache_ttl
+
+# reset invalid variable - typo in namespace
+statement error DataFusion error: Invalid or Unsupported Configuration: Could not find config namespace "dataexplosion"
+RESET dataexplosion.execution.batch_size
+
+# reset invalid variable - wrong namespace prefix
+statement error DataFusion error: Invalid or Unsupported Configuration: Config value "exec" not found on ConfigOptions
+RESET datafusion.exec.batch_size
+
+# reset invalid variable - typo in field name
+statement error DataFusion error: Invalid or Unsupported Configuration: Config value "batches_size" not found on ExecutionOptions
+RESET datafusion.execution.batches_size
+
+# reset invalid variable - extra suffix on valid field
+statement error DataFusion error: Invalid or Unsupported Configuration: Config field is a scalar usize and does not have nested field "bar"
+RESET datafusion.execution.batch_size.bar
+
+############
+## Test runtime configuration variables
+############
+
+# Test SHOW runtime.memory_limit (default value)
+query TT
+SHOW datafusion.runtime.memory_limit
+----
+datafusion.runtime.memory_limit unlimited
+
+# Test SET and SHOW runtime.memory_limit
+statement ok
+SET datafusion.runtime.memory_limit = '100M'
+
+query TT
+SHOW datafusion.runtime.memory_limit
+----
+datafusion.runtime.memory_limit 100M
+
+# Test SET and SHOW runtime.max_temp_directory_size
+statement ok
+SET datafusion.runtime.max_temp_directory_size = '10G'
+
+query TT
+SHOW datafusion.runtime.max_temp_directory_size
+----
+datafusion.runtime.max_temp_directory_size 10G
+
+# Test SET and SHOW runtime.metadata_cache_limit
+statement ok
+SET datafusion.runtime.metadata_cache_limit = '200M'
+
+query TT
+SHOW datafusion.runtime.metadata_cache_limit
+----
+datafusion.runtime.metadata_cache_limit 200M
+
+# Note: runtime.temp_directory shows the actual temp directory path with a unique suffix,
+# so we cannot test the exact value. We verify it exists in information_schema instead.
+
+# Test that all runtime variables appear in information_schema.df_settings
+query T
+SELECT name FROM information_schema.df_settings WHERE name LIKE 'datafusion.runtime.%' ORDER BY name
+----
+datafusion.runtime.list_files_cache_limit
+datafusion.runtime.list_files_cache_ttl
+datafusion.runtime.max_temp_directory_size
+datafusion.runtime.memory_limit
+datafusion.runtime.metadata_cache_limit
+datafusion.runtime.temp_directory
diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt b/datafusion/sqllogictest/test_files/simplify_expr.slt
index 2387385369cb2..d8c25ab25e8ea 100644
--- a/datafusion/sqllogictest/test_files/simplify_expr.slt
+++ b/datafusion/sqllogictest/test_files/simplify_expr.slt
@@ -26,9 +26,8 @@ logical_plan
 01)Filter: t.a = Int32(3)
 02)--TableScan: t projection=[a]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: a@0 = 3
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: a@0 = 3
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 # test regex exprs
 query TT
@@ -38,9 +37,8 @@ logical_plan
 01)Filter: t.b ~ Utf8View(".*")
 02)--TableScan: t projection=[b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: b@0 ~ .*
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: b@0 ~ .*
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select b from t where b !~ '.*'
@@ -49,9 +47,8 @@ logical_plan
 01)Filter: t.b !~ Utf8View(".*")
 02)--TableScan: t projection=[b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: b@0 !~ .*
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: b@0 !~ .*
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query T
 select b from t where b ~ '.*'
@@ -70,9 +67,8 @@ logical_plan
 01)Filter: t.a IS NOT NULL OR Boolean(NULL)
 02)--TableScan: t projection=[a, b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: a@0 IS NOT NULL OR NULL
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
+01)FilterExec: a@0 IS NOT NULL OR NULL
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table t;
@@ -117,4 +113,3 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[[{x:100}] as a]
 02)--PlaceholderRowExec
-
diff --git a/datafusion/sqllogictest/test_files/slt_features.slt b/datafusion/sqllogictest/test_files/slt_features.slt
new file mode 100644
index 0000000000000..5075ed10eae9a
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/slt_features.slt
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# =================================
+# Test sqllogictest runner features
+# =================================
+
+# --------------------------
+# Test `<slt:ignore>` marker
+# --------------------------
+query T
+select 'DataFusion'
+----
+<slt:ignore>
+
+query T
+select 'DataFusion'
+----
+Data<slt:ignore>
+
+query T
+select 'DataFusion'
+----
+<slt:ignore>Fusion
+
+query T
+select 'Apache DataFusion';
+----
+<slt:ignore>Data<slt:ignore>
+
+query T
+select 'DataFusion'
+----
+DataFusion<slt:ignore>
+
+query T
+select 'DataFusion'
+----
+<slt:ignore>DataFusion
+
+query T
+select 'DataFusion'
+----
+<slt:ignore>DataFusion<slt:ignore>
+
+query I
+select * from generate_series(3);
+----
+0
+1
+<slt:ignore>
+3
+
+query I
+select * from generate_series(3);
+----
+<slt:ignore>
+1
+<slt:ignore>
+<slt:ignore>
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index ed463333217af..d2fa37ef76da8 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -37,7 +37,7 @@ logical_plan
 02)--TableScan: t1 projection=[a, b]
 03)--TableScan: t2 projection=[a, b]
 physical_plan
-01)SortMergeJoin: join_type=Inner, on=[(a@0, a@0)], filter=CAST(b@1 AS Int64) * 50 <= CAST(b@0 AS Int64)
+01)SortMergeJoinExec: join_type=Inner, on=[(a@0, a@0)], filter=CAST(b@1 AS Int64) * 50 <= CAST(b@0 AS Int64)
 02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
 03)----DataSourceExec: partitions=1, partition_sizes=[1]
 04)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
@@ -891,3 +891,52 @@ drop table t2;
 # return sql params back to default values
 statement ok
 set datafusion.optimizer.prefer_hash_join = true;
+
+##########
+## Tests for equijoins with different column counts
+##########
+
+statement ok
+set datafusion.optimizer.prefer_hash_join = false;
+
+statement ok
+DROP TABLE IF EXISTS t1;
+
+statement ok
+CREATE TABLE t1(a int, b int) AS VALUES (1, 100), (2, 200), (3, 300);
+
+statement ok
+DROP TABLE IF EXISTS t2;
+
+statement ok
+CREATE TABLE t2(a int, b int, c int) AS VALUES (4, 101, 1001), (3, 201, 2001), (2, 250, 3001);
+
+statement ok
+DROP TABLE IF EXISTS t3;
+
+statement ok
+CREATE TABLE t3(x int) AS VALUES (1);
+
+query IIIII
+SELECT * FROM t2 RIGHT JOIN t1 on t1.a = t2.a AND t1.b < t2.b
+----
+NULL NULL NULL 1 100
+2 250 3001 2 200
+NULL NULL NULL 3 300
+
+query IIIII
+SELECT * FROM t1 LEFT JOIN t2 on t1.a = t2.a AND t1.b < t2.b
+----
+1 100 NULL NULL NULL
+2 200 2 250 3001
+3 300 NULL NULL NULL
+
+# Small table for LeftMark
+
+# LeftMark equijoin with different columns count
+query III rowsort
+SELECT t2.a, t2.b, t2.c
+FROM t2
+WHERE t2.a > 3 OR t2.a IN (SELECT t3.x FROM t3 WHERE t2.b < 150)
+----
+4 101 1001
diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt
new file mode 100644
index 0000000000000..58d9915a24be2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt
@@ -0,0 +1,886 @@
+#Sort Pushdown for ordered Parquet files
+statement ok
+SET datafusion.execution.parquet.pushdown_filters = true;
+
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+# Test 1: Sort Pushdown for ordered Parquet files
+# Create a sorted dataset
+statement ok
+CREATE TABLE sorted_data(id INT, value INT, name VARCHAR) AS VALUES
+(1, 100, 'a'),
+(2, 200, 'b'),
+(3, 300, 'c'),
+(4, 400, 'd'),
+(5, 500, 'e'),
+(6, 600, 'f'),
+(7, 700, 'g'),
+(8, 800, 'h'),
+(9, 900, 'i'),
+(10, 1000, 'j');
+
+# Copy to parquet with sorting
+query I
+COPY (SELECT * FROM sorted_data ORDER BY id ASC)
+TO 'test_files/scratch/sort_pushdown/sorted_data.parquet';
+----
+10
+
+statement ok
+CREATE EXTERNAL TABLE sorted_parquet(id INT, value INT, name VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/sorted_data.parquet'
+WITH ORDER (id ASC);
+
+# Test 1.1: Sort pushdown with DESC (opposite of ASC)
+# Should show reverse_row_groups=true
+query TT
+EXPLAIN SELECT * FROM sorted_parquet ORDER BY id DESC LIMIT 3;
+----
+logical_plan
+01)Sort: sorted_parquet.id DESC NULLS FIRST, fetch=3
+02)--TableScan: sorted_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[id@0 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+# Test 1.2: Verify results are correct
+query IIT
+SELECT * FROM sorted_parquet ORDER BY id DESC LIMIT 3;
+----
+10 1000 j
+9 900 i
+8 800 h
+
+# Test 1.3: Should NOT apply for ASC (same direction)
+query TT
+EXPLAIN SELECT * FROM sorted_parquet ORDER BY id ASC LIMIT 3;
+----
+logical_plan
+01)Sort: sorted_parquet.id ASC NULLS LAST, fetch=3
+02)--TableScan: sorted_parquet projection=[id, value, name]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_data.parquet]]}, projection=[id, value, name], limit=3, output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+
+# Test 1.4: Disable sort pushdown
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = false;
+
+query TT
+EXPLAIN SELECT * FROM sorted_parquet ORDER BY id DESC LIMIT 3;
+----
+logical_plan
+01)Sort: sorted_parquet.id DESC NULLS FIRST, fetch=3
+02)--TableScan: sorted_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[id@0 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_data.parquet]]}, projection=[id, value, name], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Re-enable
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+# Test 1.5: With OFFSET
+query TT
+EXPLAIN SELECT * FROM sorted_parquet ORDER BY id DESC LIMIT 3 OFFSET 2;
+----
+logical_plan
+01)Limit: skip=2, fetch=3
+02)--Sort: sorted_parquet.id DESC NULLS FIRST, fetch=5
+03)----TableScan: sorted_parquet projection=[id, value, name]
+physical_plan
+01)GlobalLimitExec: skip=2, fetch=3
+02)--SortExec: TopK(fetch=5), expr=[id@0 DESC], preserve_partitioning=[false]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+query IIT
+SELECT * FROM sorted_parquet ORDER BY id DESC LIMIT 3 OFFSET 2;
+----
+8 800 h
+7 700 g
+6 600 f
+
+# Test 1.6: Reverse scan with row selection (page index pruning)
+# This tests that when reverse_row_groups=true, the RowSelection is also properly reversed
+
+# Create a dataset with multiple row groups and enable page index
+statement ok
+CREATE TABLE multi_rg_data(id INT, category VARCHAR, value INT) AS VALUES
+(1, 'alpha', 10),
+(2, 'alpha', 20),
+(3, 'beta', 30),
+(4, 'beta', 40),
+(5, 'gamma', 50),
+(6, 'gamma', 60),
+(7, 'delta', 70),
+(8, 'delta', 80);
+
+# Write with small row groups (2 rows each = 4 row groups)
+statement ok
+SET datafusion.execution.parquet.max_row_group_size = 2;
+
+query I
+COPY (SELECT * FROM multi_rg_data ORDER BY id ASC)
+TO 'test_files/scratch/sort_pushdown/multi_rg_sorted.parquet';
+----
+8
+
+# Reset row group size
+statement ok
+SET datafusion.execution.parquet.max_row_group_size = 1048576;
+
+statement ok
+CREATE EXTERNAL TABLE multi_rg_sorted(id INT, category VARCHAR, value INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/multi_rg_sorted.parquet'
+WITH ORDER (id ASC);
+
+# Enable page index for better pruning
+statement ok
+SET datafusion.execution.parquet.enable_page_index = true;
+
+statement ok
+SET datafusion.execution.parquet.pushdown_filters = true;
+
+# Test with reverse scan and filter that prunes some row groups
+# This will create a RowSelection with partial row group scans
+query TT
+EXPLAIN SELECT * FROM multi_rg_sorted
+WHERE category IN ('alpha', 'gamma')
+ORDER BY id DESC LIMIT 5;
+----
+logical_plan
+01)Sort: multi_rg_sorted.id DESC NULLS FIRST, fetch=5
+02)--Filter: multi_rg_sorted.category = Utf8View("alpha") OR multi_rg_sorted.category = Utf8View("gamma")
+03)----TableScan: multi_rg_sorted projection=[id, category, value], partial_filters=[multi_rg_sorted.category = Utf8View("alpha") OR multi_rg_sorted.category = Utf8View("gamma")]
+physical_plan
+01)SortExec: TopK(fetch=5), expr=[id@0 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_rg_sorted.parquet]]}, projection=[id, category, value], file_type=parquet, predicate=(category@1 = alpha OR category@1 = gamma) AND DynamicFilter [ empty ], reverse_row_groups=true, pruning_predicate=category_null_count@2 != row_count@3 AND category_min@0 <= alpha AND alpha <= category_max@1 OR category_null_count@2 != row_count@3 AND category_min@0 <= gamma AND gamma <= category_max@1, required_guarantees=[category in (alpha, gamma)]
+
+# Verify the results are correct despite reverse scanning with row selection
+# Expected: gamma values (6, 5) then alpha values (2, 1), in DESC order by id
+query ITI
+SELECT * FROM multi_rg_sorted
+WHERE category IN ('alpha', 'gamma')
+ORDER BY id DESC LIMIT 5;
+----
+6 gamma 60
+5 gamma 50
+2 alpha 20
+1 alpha 10
+
+# Test with more complex selection pattern
+query ITI
+SELECT * FROM multi_rg_sorted
+WHERE category IN ('beta', 'delta')
+ORDER BY id DESC;
+----
+8 delta 80
+7 delta 70
+4 beta 40
+3 beta 30
+
+# Test forward scan for comparison (should give same logical results in ASC order)
+query ITI
+SELECT * FROM multi_rg_sorted
+WHERE category IN ('alpha', 'gamma')
+ORDER BY id ASC;
+----
+1 alpha 10
+2 alpha 20
+5 gamma 50
+6 gamma 60
+
+# Disable reverse scan and verify it still works
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = false;
+
+query ITI
+SELECT * FROM multi_rg_sorted
+WHERE category IN ('alpha', 'gamma')
+ORDER BY id DESC LIMIT 5;
+----
+6 gamma 60
+5 gamma 50
+2 alpha 20
+1 alpha 10
+
+# Re-enable
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+# Test 1.7: Sort pushdown with more than one partition
+# Create multiple parquet files to trigger it
+
+# Split data into multiple files
+statement ok
+CREATE TABLE sorted_data_part1(id INT, value INT, name VARCHAR) AS VALUES
+(1, 100, 'a'),
+(2, 200, 'b'),
+(3, 300, 'c');
+
+statement ok
+CREATE TABLE sorted_data_part2(id INT, value INT, name VARCHAR) AS VALUES
+(4, 400, 'd'),
+(5, 500, 'e'),
+(6, 600, 'f');
+
+statement ok
+CREATE TABLE sorted_data_part3(id INT, value INT, name VARCHAR) AS VALUES
+(7, 700, 'g'),
+(8, 800, 'h'),
+(9, 900, 'i'),
+(10, 1000, 'j');
+
+# Create directory for multi-file parquet
+query I
+COPY (SELECT * FROM sorted_data_part1 ORDER BY id ASC)
+TO 'test_files/scratch/sort_pushdown/sorted_multi/part1.parquet';
+----
+3
+
+query I
+COPY (SELECT * FROM sorted_data_part2 ORDER BY id ASC)
+TO 'test_files/scratch/sort_pushdown/sorted_multi/part2.parquet';
+----
+3
+
+query I
+COPY (SELECT * FROM sorted_data_part3 ORDER BY id ASC)
+TO 'test_files/scratch/sort_pushdown/sorted_multi/part3.parquet';
+----
+4
+
+# Create external table pointing to directory with multiple files
+statement ok
+CREATE EXTERNAL TABLE sorted_parquet_multi(id INT, value INT, name VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/sorted_multi/'
+WITH ORDER (id ASC);
+
+# Enable multiple partitions
+statement ok
+SET datafusion.execution.target_partitions = 4;
+
+# Now we should see RepartitionExec because we have 3 input partitions (3 files)
+query TT
+EXPLAIN SELECT * FROM sorted_parquet_multi ORDER BY id DESC LIMIT 3;
+----
+logical_plan
+01)Sort: sorted_parquet_multi.id DESC NULLS FIRST, fetch=3
+02)--TableScan: sorted_parquet_multi projection=[id, value, name]
+physical_plan
+01)SortPreservingMergeExec: [id@0 DESC], fetch=3
+02)--SortExec: TopK(fetch=3), expr=[id@0 DESC], preserve_partitioning=[true]
+03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_multi/part1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_multi/part2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/sorted_multi/part3.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+# Verify correctness with repartitioning and multiple files
+query IIT
+SELECT * FROM sorted_parquet_multi ORDER BY id DESC LIMIT 3;
+----
+10 1000 j
+9 900 i
+8 800 h
+
+# Test ASC order (should not trigger reverse scan)
+query IIT
+SELECT * FROM sorted_parquet_multi ORDER BY id ASC LIMIT 3;
+----
+1 100 a
+2 200 b
+3 300 c
+
+# Cleanup
+statement ok
+DROP TABLE sorted_data_part1;
+
+statement ok
+DROP TABLE sorted_data_part2;
+
+statement ok
+DROP TABLE sorted_data_part3;
+
+statement ok
+DROP TABLE sorted_parquet_multi;
+
+# Reset to default
+statement ok
+SET datafusion.execution.target_partitions = 4;
+
+# Cleanup
+statement ok
+DROP TABLE multi_rg_data;
+
+statement ok
+DROP TABLE multi_rg_sorted;
+
+statement ok
+SET datafusion.execution.parquet.enable_page_index = false;
+
+statement ok
+SET datafusion.execution.parquet.pushdown_filters = true;
+
+# Cleanup
+statement ok
+DROP TABLE sorted_data;
+
+statement ok
+DROP TABLE sorted_parquet;
+
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+
+# Test 2: Sort pushdown with constant column filtering
+# This tests the case where a leading sort column becomes constant through WHERE filtering
+
+# Create a multi-column sorted dataset (like time-series data)
+statement ok
+CREATE TABLE timeseries_data(timeframe VARCHAR, period_end INT, value DOUBLE) AS VALUES
+('daily', 1, 100.0),
+('daily', 2, 150.0),
+('daily', 3, 200.0),
+('weekly', 1, 500.0),
+('weekly', 2, 600.0),
+('weekly', 3, 700.0),
+('monthly', 1, 1000.0),
+('monthly', 2, 1100.0),
+('monthly', 3, 1200.0),
+('quarterly', 1, 5000.0),
+('quarterly', 2, 5500.0),
+('quarterly', 3, 6000.0);
+
+# Copy to parquet with multi-column sorting (timeframe ASC, period_end ASC)
+query I
+COPY (SELECT * FROM timeseries_data ORDER BY timeframe ASC, period_end ASC)
+TO 'test_files/scratch/sort_pushdown/timeseries_sorted.parquet';
+----
+12
+
+statement ok
+CREATE EXTERNAL TABLE timeseries_parquet(timeframe VARCHAR, period_end INT, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/timeseries_sorted.parquet'
+WITH ORDER (timeframe ASC, period_end ASC);
+
+# Test 2.1: Query with constant prefix filter and DESC on remaining column
+# WHERE timeframe='quarterly' makes the first sort column constant
+# ORDER BY period_end DESC should trigger reverse scan because:
+# File ordering: [timeframe ASC, period_end ASC]
+# After filtering timeframe='quarterly': effectively [period_end ASC]
+# Request: [period_end DESC] -> exact reverse!
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: timeseries_parquet.period_end DESC NULLS FIRST, fetch=2
+02)--Filter: timeseries_parquet.timeframe = Utf8View("quarterly")
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.timeframe = Utf8View("quarterly")]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[period_end@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], file_type=parquet, predicate=timeframe@0 = quarterly AND DynamicFilter [ empty ], reverse_row_groups=true, pruning_predicate=timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= quarterly AND quarterly <= timeframe_max@1, required_guarantees=[timeframe in (quarterly)]
+
+# Test 2.2: Verify the results are correct
+query TIR
+SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end DESC
+LIMIT 2;
+----
+quarterly 3 6000
+quarterly 2 5500
+
+# Test 2.3: Same filter but ASC order (should not trigger reverse scan, ordering already satisfied)
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end ASC
+LIMIT 2;
+----
+logical_plan
+01)Sort: timeseries_parquet.period_end ASC NULLS LAST, fetch=2
+02)--Filter: timeseries_parquet.timeframe = Utf8View("quarterly")
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.timeframe = Utf8View("quarterly")]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], limit=2, output_ordering=[timeframe@0 ASC NULLS LAST, period_end@1 ASC NULLS LAST], file_type=parquet, predicate=timeframe@0 = quarterly, pruning_predicate=timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= quarterly AND quarterly <= timeframe_max@1, required_guarantees=[timeframe in (quarterly)]
+
+# Test 2.4: Verify ASC results
+query TIR
+SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end ASC
+LIMIT 2;
+----
+quarterly 1 5000
+quarterly 2 5500
+
+# Test 2.5: Test with different constant value
+query TIR
+SELECT * FROM timeseries_parquet
+WHERE timeframe = 'weekly'
+ORDER BY period_end DESC;
+----
+weekly 3 700
+weekly 2 600
+weekly 1 500
+
+# Test 2.6: Test without constant filter (no reverse scan because need both columns)
+# Request: [timeframe ASC, period_end DESC]
+# File has: [timeframe ASC, period_end ASC]
+# These are NOT reverse of each other - only second column is reversed
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+ORDER BY timeframe ASC, period_end DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: timeseries_parquet.timeframe ASC NULLS LAST, timeseries_parquet.period_end DESC NULLS FIRST, fetch=3
+02)--TableScan: timeseries_parquet projection=[timeframe, period_end, value]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[timeframe@0 ASC NULLS LAST, period_end@1 DESC], preserve_partitioning=[false], sort_prefix=[timeframe@0 ASC NULLS LAST]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], output_ordering=[timeframe@0 ASC NULLS LAST, period_end@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Test 2.7: Disable sort pushdown and verify filter still works
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = false;
+
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: timeseries_parquet.period_end DESC NULLS FIRST, fetch=2
+02)--Filter: timeseries_parquet.timeframe = Utf8View("quarterly")
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.timeframe = Utf8View("quarterly")]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[period_end@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], output_ordering=[timeframe@0 ASC NULLS LAST, period_end@1 ASC NULLS LAST], file_type=parquet, predicate=timeframe@0 = quarterly AND DynamicFilter [ empty ], pruning_predicate=timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= quarterly AND quarterly <= timeframe_max@1, required_guarantees=[timeframe in (quarterly)]
+
+# Results should still be correct
+query TIR
+SELECT * FROM timeseries_parquet
+WHERE timeframe = 'quarterly'
+ORDER BY period_end DESC
+LIMIT 2;
+----
+quarterly 3 6000
+quarterly 2 5500
+
+# Re-enable
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+# Test 2.8: Test with IN clause (multiple constant values)
+# Note: IN clause with multiple values means timeframe is NOT constant
+# (could be 'daily' or 'weekly'), so the first sort column cannot be eliminated.
+# Without a constant first column, we cannot reverse scan based on just period_end DESC.
+# The physical plan should NOT show reverse_row_groups=true
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE timeframe IN ('daily', 'weekly')
+ORDER BY period_end DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: timeseries_parquet.period_end DESC NULLS FIRST, fetch=3
+02)--Filter: timeseries_parquet.timeframe = Utf8View("daily") OR timeseries_parquet.timeframe = Utf8View("weekly")
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.timeframe = Utf8View("daily") OR timeseries_parquet.timeframe = Utf8View("weekly")]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[period_end@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], output_ordering=[timeframe@0 ASC NULLS LAST, period_end@1 ASC NULLS LAST], file_type=parquet, predicate=(timeframe@0 = daily OR timeframe@0 = weekly) AND DynamicFilter [ empty ], pruning_predicate=timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= daily AND daily <= timeframe_max@1 OR timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= weekly AND weekly <= timeframe_max@1, required_guarantees=[timeframe in (daily, weekly)]
+
+# Test 2.9: Complex case - literal constant in sort expression itself
+# The literal 'constant' is ignored in sort analysis
+# After stripping: ORDER BY period_end DESC
+# With WHERE timeframe='monthly' making first column constant
+# File: [period_end ASC] (after constant column removal)
+# Request: [period_end DESC] -> exact reverse, triggers reverse scan
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE timeframe = 'monthly'
+ORDER BY 'constant', period_end DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: Utf8("constant") ASC NULLS LAST, timeseries_parquet.period_end DESC NULLS FIRST, fetch=2
+02)--Filter: timeseries_parquet.timeframe = Utf8View("monthly")
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.timeframe = Utf8View("monthly")]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[period_end@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], file_type=parquet, predicate=timeframe@0 = monthly AND DynamicFilter [ empty ], reverse_row_groups=true, pruning_predicate=timeframe_null_count@2 != row_count@3 AND timeframe_min@0 <= monthly AND monthly <= timeframe_max@1, required_guarantees=[timeframe in (monthly)]
+
+# Verify results
+query TIR
+SELECT * FROM timeseries_parquet
+WHERE timeframe = 'monthly'
+ORDER BY period_end DESC
+LIMIT 2;
+----
+monthly 3 1200
+monthly 2 1100
+
+# Test 2.10: Filter on non-leading sort column
+# File order: [timeframe ASC, period_end ASC]
+# Filter: period_end = 2 (makes second column constant)
+# Request: [timeframe DESC]
+# After constant column removal: File has [timeframe ASC], Request wants [timeframe DESC]
+# This is exact reverse -> triggers reverse scan
+query TT
+EXPLAIN SELECT * FROM timeseries_parquet
+WHERE period_end = 2
+ORDER BY timeframe DESC;
+----
+logical_plan
+01)Sort: timeseries_parquet.timeframe DESC NULLS FIRST
+02)--Filter: timeseries_parquet.period_end = Int32(2)
+03)----TableScan: timeseries_parquet projection=[timeframe, period_end, value], partial_filters=[timeseries_parquet.period_end = Int32(2)]
+physical_plan
+01)SortExec: expr=[timeframe@0 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timeseries_sorted.parquet]]}, projection=[timeframe, period_end, value], file_type=parquet, predicate=period_end@1 = 2, reverse_row_groups=true, pruning_predicate=period_end_null_count@2 != row_count@3 AND period_end_min@0 <= 2 AND 2 <= period_end_max@1, required_guarantees=[period_end in (2)]
+
+# Cleanup
+statement ok
+DROP TABLE timeseries_data;
+
+statement ok
+DROP TABLE timeseries_parquet;
+
+# Reset to default
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
+
+
+# Test 3: Sort pushdown with monotonic functions
+# This tests that reverse scan works when sort expressions involve monotonic functions
+
+# Create test data with timestamp column
+statement ok
+CREATE TABLE timestamp_data(id INT, ts TIMESTAMP, volume BIGINT, price DOUBLE) AS VALUES
+(1, TIMESTAMP '2024-01-15 10:00:00', 1000, 100.0),
+(2, TIMESTAMP '2024-01-20 11:00:00', 1500, 105.0),
+(3, TIMESTAMP '2024-01-25 12:00:00', 2000, 110.0),
+(4, TIMESTAMP '2024-02-05 09:00:00', 1200, 108.0),
+(5, TIMESTAMP '2024-02-15 14:00:00', 1800, 112.0),
+(6, TIMESTAMP '2024-02-25 15:00:00', 2200, 115.0),
+(7, TIMESTAMP '2024-03-10 09:00:00', 1300, 113.0),
+(8, TIMESTAMP '2024-03-18 14:00:00', 1900, 118.0),
+(9, TIMESTAMP '2024-03-28 15:00:00', 2300, 120.0);
+
+# Copy to parquet with sorting by timestamp ASC
+query I
+COPY (SELECT * FROM timestamp_data ORDER BY ts ASC)
+TO 'test_files/scratch/sort_pushdown/timestamp_sorted.parquet';
+----
+9
+
+# Test 3.1: Simple monotonic function - date_trunc
+# Create external table with file ordering that conceptually includes date_trunc
+# File is actually sorted by [ts ASC], but conceptually [date_trunc('month', ts) ASC, ts ASC]
+statement ok
+CREATE EXTERNAL TABLE timestamp_parquet(id INT, ts TIMESTAMP, volume BIGINT, price DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/timestamp_sorted.parquet'
+WITH ORDER (ts ASC);
+
+# Query with ORDER BY ts DESC
+# File ordering: [ts ASC]
+# Request: [ts DESC]
+# This should trigger reverse_row_groups=true
+query TT
+EXPLAIN SELECT * FROM timestamp_parquet
+ORDER BY ts DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: timestamp_parquet.ts DESC NULLS FIRST, fetch=3
+02)--TableScan: timestamp_parquet projection=[id, ts, volume, price]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[ts@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timestamp_sorted.parquet]]}, projection=[id, ts, volume, price], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+# Verify results
+query IPIR
+SELECT * FROM timestamp_parquet
+ORDER BY ts DESC
+LIMIT 3;
+----
+9 2024-03-28T15:00:00 2300 120
+8 2024-03-18T14:00:00 1900 118
+7 2024-03-10T09:00:00 1300 113
+
+# Test 3.2: Monotonic function in ORDER BY - date_trunc DESC
+# File ordering: [ts ASC]
+# Request: [date_trunc('day', ts) DESC]
+# Since date_trunc is monotonic with ts, reversed file ordering [ts DESC] satisfies [date_trunc DESC]
+query TT
+EXPLAIN SELECT * FROM timestamp_parquet
+ORDER BY date_trunc('day', ts) DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: date_trunc(Utf8("day"), timestamp_parquet.ts) DESC NULLS FIRST, fetch=3
+02)--TableScan: timestamp_parquet projection=[id, ts, volume, price]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[date_trunc(day, ts@1) DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/timestamp_sorted.parquet]]}, projection=[id, ts, volume, price], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+# Verify results (descending day)
+query IPIR
+SELECT * FROM timestamp_parquet
+ORDER BY date_trunc('day', ts) DESC
+LIMIT 3;
+----
+9 2024-03-28T15:00:00 2300 120
+8 2024-03-18T14:00:00 1900 118
+7 2024-03-10T09:00:00 1300 113
+
+# Test 3.3: Multi-column scenario with explicit monotonic function in file ordering
+# Create a table where we explicitly declare the ordering includes the monotonic function
+# This simulates files that are partitioned/sorted by [date_trunc('month', ts) ASC, ts ASC]
+
+# Create a new parquet file sorted by [ts ASC] (which implies date_trunc ordering)
+statement ok
+CREATE TABLE multi_month_data(id INT, ts TIMESTAMP, volume BIGINT, price DOUBLE) AS VALUES
+-- January 2024
+(1, TIMESTAMP '2024-01-05 09:30:00', 1000, 100.0),
+(2, TIMESTAMP '2024-01-15 14:30:00', 1500, 105.0),
+(3, TIMESTAMP '2024-01-25 15:59:00', 2000, 110.0),
+-- February 2024
+(4, TIMESTAMP '2024-02-03 09:30:00', 1200, 108.0),
+(5, TIMESTAMP '2024-02-14 12:00:00', 1800, 112.0),
+(6, TIMESTAMP '2024-02-28 15:59:00', 2200, 115.0),
+-- March 2024
+(7, TIMESTAMP '2024-03-01 09:30:00', 1300, 113.0),
+(8, TIMESTAMP '2024-03-15 14:00:00', 1900, 118.0),
+(9, TIMESTAMP '2024-03-29 15:59:00', 2300, 120.0);
+
+query I
+COPY (SELECT * FROM multi_month_data ORDER BY ts ASC)
+TO 'test_files/scratch/sort_pushdown/multi_month_sorted.parquet';
+----
+9
+
+# Declare the file has ordering [ts ASC]
+# Conceptually this means [date_trunc('month', ts) ASC, ts ASC] due to monotonicity
+statement ok
+CREATE EXTERNAL TABLE multi_month_parquet(id INT, ts TIMESTAMP, volume BIGINT, price DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/multi_month_sorted.parquet'
+WITH ORDER (ts ASC);
+
+# Test 3.3a: Request ORDER BY ts DESC (opposite direction)
+# File: [ts ASC]
+# Request: [ts DESC]
+# Should trigger reverse_row_groups=true
+query TT
+EXPLAIN SELECT * FROM multi_month_parquet
+ORDER BY ts DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: multi_month_parquet.ts DESC NULLS FIRST, fetch=2
+02)--TableScan: multi_month_parquet projection=[id, ts, volume, price]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[ts@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_month_sorted.parquet]]}, projection=[id, ts, volume, price], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+query IPIR
+SELECT * FROM multi_month_parquet
+ORDER BY ts DESC
+LIMIT 2;
+----
+9 2024-03-29T15:59:00 2300 120
+8 2024-03-15T14:00:00 1900 118
+
+# Test 3.3b: Request ORDER BY date_trunc('month', ts) DESC, ts DESC
+# File: [ts ASC] (which implies [date_trunc('month', ts) ASC, ts ASC])
+# Request: [date_trunc('month', ts) DESC, ts DESC]
+# The reversed file ordering [ts DESC] satisfies this because:
+# - date_trunc is monotonic with ts
+# - So [ts DESC] implies [date_trunc('month', ts) DESC, ts DESC]
+query TT
+EXPLAIN SELECT * FROM multi_month_parquet
+ORDER BY date_trunc('month', ts) DESC, ts DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: date_trunc(Utf8("month"), multi_month_parquet.ts) DESC NULLS FIRST, multi_month_parquet.ts DESC NULLS FIRST, fetch=2
+02)--TableScan: multi_month_parquet projection=[id, ts, volume, price]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[date_trunc(month, ts@1) DESC, ts@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/multi_month_sorted.parquet]]}, projection=[id, ts, volume, price], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+query IPIR
+SELECT * FROM multi_month_parquet
+ORDER BY date_trunc('month', ts) DESC, ts DESC
+LIMIT 2;
+----
+9 2024-03-29T15:59:00 2300 120
+8 2024-03-15T14:00:00 1900 118
+
+# Test 3.4: CAST as a monotonic function
+statement ok
+CREATE TABLE int_data(id INT, small_val SMALLINT, big_val BIGINT) AS VALUES
+(1, 10, 100),
+(2, 20, 200),
+(3, 30, 300),
+(4, 40, 400),
+(5, 50, 500);
+
+query I
+COPY (SELECT * FROM int_data ORDER BY small_val ASC)
+TO 'test_files/scratch/sort_pushdown/int_sorted.parquet';
+----
+5
+
+statement ok
+CREATE EXTERNAL TABLE int_parquet(id INT, small_val SMALLINT, big_val BIGINT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/int_sorted.parquet'
+WITH ORDER (small_val ASC);
+
+# CAST preserves ordering: CAST(small_val AS BIGINT) is monotonic with small_val
+query TT
+EXPLAIN SELECT * FROM int_parquet
+ORDER BY CAST(small_val AS BIGINT) DESC
+LIMIT 2;
+----
+logical_plan
+01)Sort: CAST(int_parquet.small_val AS Int64) DESC NULLS FIRST, fetch=2
+02)--TableScan: int_parquet projection=[id, small_val, big_val]
+physical_plan
+01)SortExec: TopK(fetch=2), expr=[CAST(small_val@1 AS Int64) DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/int_sorted.parquet]]}, projection=[id, small_val, big_val], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+query III
+SELECT * FROM int_parquet
+ORDER BY CAST(small_val AS BIGINT) DESC
+LIMIT 2;
+----
+5 50 500
+4 40 400
+
+# Test 3.5: CEIL as a monotonic function
+statement ok
+CREATE TABLE float_data(id INT, value DOUBLE) AS VALUES
+(1, 1.1),
+(2, 2.3),
+(3, 3.5),
+(4, 4.7),
+(5, 5.9);
+
+query I
+COPY (SELECT * FROM float_data ORDER BY value ASC)
+TO 'test_files/scratch/sort_pushdown/float_sorted.parquet';
+----
+5
+
+statement ok
+CREATE EXTERNAL TABLE float_parquet(id INT, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/float_sorted.parquet'
+WITH ORDER (value ASC);
+
+# CEIL is monotonic increasing
+query TT
+EXPLAIN SELECT * FROM float_parquet
+ORDER BY CEIL(value) DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: ceil(float_parquet.value) DESC NULLS FIRST, fetch=3
+02)--TableScan: float_parquet projection=[id, value]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[ceil(value@1) DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/float_sorted.parquet]]}, projection=[id, value], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
+
+query IR
+SELECT * FROM float_parquet
+ORDER BY CEIL(value) DESC
+LIMIT 3;
+----
+5 5.9
+4 4.7
+3 3.5
+
+# Test 3.6: Negative case - ABS is NOT monotonic over mixed positive/negative range
+statement ok
+CREATE TABLE signed_data(id INT, value DOUBLE) AS VALUES
+(1, -5.0),
+(2, -3.0),
+(3, -1.0),
+(4, 2.0),
+(5, 4.0);
+
+query I
+COPY (SELECT * FROM signed_data ORDER BY value ASC)
+TO 'test_files/scratch/sort_pushdown/signed_sorted.parquet';
+----
+5
+
+statement ok
+CREATE EXTERNAL TABLE signed_parquet(id INT, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/sort_pushdown/signed_sorted.parquet'
+WITH ORDER (value ASC);
+
+# ABS is NOT monotonic over the full range [-5, 4], so should NOT trigger reverse scan
+query TT
+EXPLAIN SELECT * FROM signed_parquet
+ORDER BY ABS(value) DESC
+LIMIT 3;
+----
+logical_plan
+01)Sort: abs(signed_parquet.value) DESC NULLS FIRST, fetch=3
+02)--TableScan: signed_parquet projection=[id, value]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[abs(value@1) DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/signed_sorted.parquet]]}, projection=[id, value], output_ordering=[value@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Results should still be correct (no optimization applied)
+query IR
+SELECT * FROM signed_parquet
+ORDER BY ABS(value) DESC
+LIMIT 3;
+----
+1 -5
+5 4
+2 -3
+
+# Cleanup
+statement ok
+DROP TABLE timestamp_data;
+
+statement ok
+DROP TABLE timestamp_parquet;
+
+statement ok
+DROP TABLE multi_month_data;
+
+statement ok
+DROP TABLE multi_month_parquet;
+
+statement ok
+DROP TABLE int_data;
+
+statement ok
+DROP TABLE int_parquet;
+
+statement ok
+DROP TABLE float_data;
+
+statement ok
+DROP TABLE float_parquet;
+
+statement ok
+DROP TABLE signed_data;
+
+statement ok
+DROP TABLE signed_parquet;
+
+statement ok
+SET datafusion.optimizer.enable_sort_pushdown = true;
diff --git a/datafusion/sqllogictest/test_files/spark/aggregate/try_sum.slt b/datafusion/sqllogictest/test_files/spark/aggregate/try_sum.slt
new file mode 100644
index 0000000000000..0f440a97dd1cc
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/aggregate/try_sum.slt
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+query I
+SELECT try_sum(x) AS sum_x FROM VALUES (1), (2), (3) AS tab(x);
+----
+6
+
+query I
+SELECT try_sum(x) AS sum_x FROM VALUES (NULL), (2), (NULL) AS tab(x);
+----
+2
+
+query I
+SELECT try_sum(x) AS sum_x FROM VALUES (CAST(9223372036854775807 AS BIGINT)), (1) AS tab(x);
+----
+NULL
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (1.5), (2.5), (3.0) AS tab(x);
+----
+7
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (1e308), (1e308) AS tab(x);
+----
+Infinity
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (CAST('NaN' AS DOUBLE)), (1.0) AS tab(x);
+----
+NaN
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (CAST('Infinity' AS DOUBLE)), (1.0) AS tab(x);
+----
+Infinity
+
+# Decimal
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (DECIMAL(10,2) '1.23'), (DECIMAL(10,2) '4.77') AS tab(x);
+----
+6
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (DECIMAL(10,2) '1.00'), (NULL), (DECIMAL(10,2) '2.50') AS tab(x);
+----
+3.5
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (DECIMAL(5,0) '90000'), (DECIMAL(5,0) '20000') AS tab(x);
+----
+110000
+
+query R
+SELECT try_sum(x) AS sum_x FROM VALUES (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111'),
+                                      (DECIMAL(38,0) '11111111111111111111111111111111111111') AS tab(x);
+----
+NULL
+
+#Group By
+query TI
+SELECT g, try_sum(x) AS sum_x
+FROM VALUES
+  ('bad', CAST(9223372036854775807 AS BIGINT)),
+  ('bad', 1),
+  ('ok', 10),
+  ('ok', NULL),
+  ('ok', 5) AS tab(g, x)
+GROUP BY g
+ORDER BY g;
+----
+bad NULL
+ok 15
+
+query R
+SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
+----
+NULL
+
+
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (CAST('-Infinity' AS DOUBLE)), (CAST('Infinity' AS DOUBLE)) AS tab(col);
+----
+NaN
+
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (CAST('-Infinity' AS DOUBLE)), (CAST('-Infinity' AS DOUBLE)) AS tab(col);
+----
+-Infinity
+
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (CAST('Infinity' AS FLOAT)), (CAST(1.0 AS FLOAT)) AS tab(col);
+----
+Infinity
+
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (-0.0), (0.0) AS tab(col);
+----
+0
+
+# need be 0.0
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (CAST(-0.0 AS DOUBLE)), (CAST(0.0 AS DOUBLE)) AS tab(col);
+----
+0
+
+query R
+SELECT try_sum(col) AS sum_x FROM VALUES (CAST(-5.5 AS DECIMAL(10,2))), (CAST(5.5 AS DECIMAL(10,2))) AS tab(col);
+----
+0
+
+# Compare double 0.0 vs decimal 0.00
+query RR
+SELECT 0.0 AS double_zero, CAST(0.0 AS DECIMAL(10,2)) AS decimal_zero;
+----
+0 0
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt b/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
index 216d99025171a..8ec886d02e78f 100644
--- a/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
@@ -225,3 +225,18 @@ query I
 SELECT bit_count(arrow_cast(-9223372036854775808, 'Int64'));
 ----
 1
+
+query I
+SELECT bit_count(true);
+----
+1
+
+query I
+SELECT bit_count(false);
+----
+0
+
+query I
+SELECT bit_count(cast(null as boolean));
+----
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/bit_get.slt b/datafusion/sqllogictest/test_files/spark/bitwise/bit_get.slt
index 6a2b244d58e69..faba0b66c4f20 100644
--- a/datafusion/sqllogictest/test_files/spark/bitwise/bit_get.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitwise/bit_get.slt
@@ -69,7 +69,32 @@ SELECT bit_get(NULL, 0);
 ----
 NULL
 
+query I
+SELECT bit_get(NULL::int, 0);
+----
+NULL
+
 query I
 SELECT bit_get(11, NULL);
 ----
 NULL
+
+query I
+SELECT bit_get(11, NULL::int);
+----
+NULL
+
+query I
+SELECT bit_get(11::tinyint, 0);
+----
+1
+
+query I
+SELECT bit_get(11::bigint, 0);
+----
+1
+
+query I
+SELECT bit_get(11, 3::bigint);
+----
+1
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/shiftright.slt b/datafusion/sqllogictest/test_files/spark/bitwise/shiftright.slt
index 3587bcc7ca52b..3dd43509b1769 100644
--- a/datafusion/sqllogictest/test_files/spark/bitwise/shiftright.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitwise/shiftright.slt
@@ -145,3 +145,118 @@ query I
 select shiftright(3::int,-32);
 ----
 3
+
+# i32 + nulls
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(4, 1),
+(8, 2),
+(16, 3),
+(32, 4),
+(null, 2),
+(8, null)
+t(value, shift)
+----
+2 Int32
+2 Int32
+2 Int32
+2 Int32
+NULL Int32
+NULL Int32
+
+# big shifts
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(1, 32),
+(2, 33),
+(3, 64)
+t(value, shift)
+----
+1 Int32
+1 Int32
+3 Int32
+
+# negative shift
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(4, -1),
+(8, -2),
+(16, -3)
+t(value, shift)
+----
+0 Int32
+0 Int32
+0 Int32
+
+# For signed integers, right shift preserves sign bit
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(-4, 1),
+(-8, 2),
+(-16, 3)
+t(value, shift)
+----
+-2 Int32
+-2 Int32
+-2 Int32
+
+# i64 value
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'Int64'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'Int64'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 Int64
+2 Int64
+2 Int64
+
+# u32 value
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'UInt32'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'UInt32'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 UInt32
+2 UInt32
+2 UInt32
+
+# u64 value
+query IT
+SELECT
+	shiftright(arrow_cast(value, 'UInt64'), shift),
+	arrow_typeof(shiftright(arrow_cast(value, 'UInt64'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 UInt64
+2 UInt64
+2 UInt64
+
+# pure null handling
+query IT
+SELECT shiftright(null, 1), arrow_typeof(shiftright(null, 1));
+----
+NULL Int32
+
+query IT
+SELECT shiftright(null, null), arrow_typeof(shiftright(null, null));
+----
+NULL Int32
+
+query IT
+SELECT shiftright(1::bigint, null), arrow_typeof(shiftright(1::bigint, null));
+----
+NULL Int64
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/shiftrightunsigned.slt b/datafusion/sqllogictest/test_files/spark/bitwise/shiftrightunsigned.slt
index b0d4cfaec7021..b9ef530b36238 100644
--- a/datafusion/sqllogictest/test_files/spark/bitwise/shiftrightunsigned.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitwise/shiftrightunsigned.slt
@@ -146,3 +146,106 @@ query I
 select shiftrightunsigned(3::int,-32);
 ----
 3
+
+# i32 + nulls
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(4, 1),
+(8, 2),
+(16, 3),
+(32, 4),
+(null, 2),
+(8, null)
+t(value, shift)
+----
+2 Int32
+2 Int32
+2 Int32
+2 Int32
+NULL Int32
+NULL Int32
+
+# negative shift
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(4, -1),
+(8, -2),
+(16, -3)
+t(value, shift)
+----
+0 Int32
+0 Int32
+0 Int32
+
+# Negative values are treated as large positive values
+# -4 as u32 = 4294967292, 4294967292 >>> 1 = 2147483646
+# -8 as u32 = 4294967288, 4294967288 >>> 2 = 1073741822
+# -16 as u32 = 4294967280, 4294967280 >>> 3 = 536870910
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(-4, 1),
+(-8, 2),
+(-16, 3)
+t(value, shift)
+----
+2147483646 Int32
+1073741822 Int32
+536870910 Int32
+
+# i64 value
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'Int64'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'Int64'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 Int64
+2 Int64
+2 Int64
+
+# u32 value
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'UInt32'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'UInt32'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 UInt32
+2 UInt32
+2 UInt32
+
+# u64 value
+query IT
+SELECT
+	shiftrightunsigned(arrow_cast(value, 'UInt64'), shift),
+	arrow_typeof(shiftrightunsigned(arrow_cast(value, 'UInt64'), shift))
+FROM VALUES (4, 1), (8, 2), (16, 3) t(value, shift)
+----
+2 UInt64
+2 UInt64
+2 UInt64
+
+# pure null handling
+query IT
+SELECT shiftrightunsigned(null, 1), arrow_typeof(shiftrightunsigned(null, 1));
+----
+NULL Int32
+
+query IT
+SELECT shiftrightunsigned(null, null), arrow_typeof(shiftrightunsigned(null, null));
+----
+NULL Int32
+
+query IT
+SELECT shiftrightunsigned(1::bigint, null), arrow_typeof(shiftrightunsigned(1::bigint, null));
+----
+NULL Int64
diff --git a/datafusion/sqllogictest/test_files/spark/datetime/make_dt_interval.slt b/datafusion/sqllogictest/test_files/spark/datetime/make_dt_interval.slt
index dc6c33caa9b4c..1223b777d1d63 100644
--- a/datafusion/sqllogictest/test_files/spark/datetime/make_dt_interval.slt
+++ b/datafusion/sqllogictest/test_files/spark/datetime/make_dt_interval.slt
@@ -63,12 +63,13 @@ SELECT (make_dt_interval(0, 0, 0, null))
 ----
 NULL
 
-# missing params
+# zero arguments - returns default zero duration
 query ?
 SELECT (make_dt_interval()) AS make_dt_interval
 ----
 0 days 0 hours 0 mins 0.000000 secs
 
+
 query ?
 SELECT (make_dt_interval(1)) AS make_dt_interval
 ----
diff --git a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
index 87b69d8d404ea..6fbeb11fb9a36 100644
--- a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
+++ b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
@@ -30,11 +30,6 @@ SELECT crc32('Spark');
 ----
 1557323817
 
-query I
-SELECT crc32(NULL);
-----
-NULL
-
 query I
 SELECT crc32('');
 ----
@@ -45,32 +40,48 @@ SELECT crc32(arrow_cast('', 'Binary'));
 ----
 0
 
-# Test with LargeUtf8 (using CAST to ensure type)
+# Test with different types
+query I
+SELECT crc32(NULL);
+----
+NULL
+
 query I
 SELECT crc32(arrow_cast('Spark', 'LargeUtf8'));
 ----
 1557323817
 
-# Test with Utf8View (using CAST to ensure type)
 query I
 SELECT crc32(arrow_cast('Spark', 'Utf8View'));
 ----
 1557323817
 
-# Test with different binary types
+query I
+SELECT crc32(arrow_cast('Spark', 'Utf8'));
+----
+1557323817
+
 query I
 SELECT crc32(arrow_cast('Spark', 'Binary'));
 ----
 1557323817
 
-# Test with LargeBinary
+query I
+SELECT crc32(arrow_cast(arrow_cast('Spark', 'Binary'), 'FixedSizeBinary(5)'));
+----
+1557323817
+
 query I
 SELECT crc32(arrow_cast('Spark', 'LargeBinary'));
 ----
 1557323817
 
-# Test with BinaryView
 query I
 SELECT crc32(arrow_cast('Spark', 'BinaryView'));
 ----
 1557323817
+
+# Upstream arrow-rs issue: https://github.com/apache/arrow-rs/issues/8841
+# This should succeed after we receive the fix
+query error Arrow error: Compute error: Internal Error: Cannot cast BinaryView to BinaryArray of expected type
+select crc32(arrow_cast(null, 'Dictionary(Int32, Utf8)'))
diff --git a/datafusion/sqllogictest/test_files/spark/hash/sha1.slt b/datafusion/sqllogictest/test_files/spark/hash/sha1.slt
index 1ce7346160726..5185c45d090bd 100644
--- a/datafusion/sqllogictest/test_files/spark/hash/sha1.slt
+++ b/datafusion/sqllogictest/test_files/spark/hash/sha1.slt
@@ -31,40 +31,51 @@ SELECT sha1('Spark');
 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
 query T
-SELECT sha1(NULL);
+SELECT sha('Spark');
 ----
-NULL
+85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
 query T
 SELECT sha1('');
 ----
 da39a3ee5e6b4b0d3255bfef95601890afd80709
 
-# Test with LargeUtf8 (using CAST to ensure type)
+# Test with different types
+query T
+SELECT sha1(NULL);
+----
+NULL
+
 query T
 SELECT sha1(arrow_cast('Spark', 'LargeUtf8'));
 ----
 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
-# Test with Utf8View (using CAST to ensure type)
 query T
 SELECT sha1(arrow_cast('Spark', 'Utf8View'));
 ----
 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
-# Test with Binary
+query T
+SELECT sha1(arrow_cast('Spark', 'Utf8'));
+----
+85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
+
 query T
 SELECT sha1(arrow_cast('Spark', 'Binary'));
 ----
 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
-# Test with LargeBinary
+query T
+SELECT sha1(arrow_cast(arrow_cast('Spark', 'Binary'), 'FixedSizeBinary(5)'));
+----
+85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
+
 query T
 SELECT sha1(arrow_cast('Spark', 'LargeBinary'));
 ----
 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
 
-# Test with BinaryView
 query T
 SELECT sha1(arrow_cast('Spark', 'BinaryView'));
 ----
diff --git a/datafusion/sqllogictest/test_files/spark/math/abs.slt b/datafusion/sqllogictest/test_files/spark/math/abs.slt
index 4b9edf7e29f27..19ca902ea3de6 100644
--- a/datafusion/sqllogictest/test_files/spark/math/abs.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/abs.slt
@@ -23,10 +23,75 @@
 
 ## Original Query: SELECT abs(-1);
 ## PySpark 3.5.5 Result: {'abs(-1)': 1, 'typeof(abs(-1))': 'int', 'typeof(-1)': 'int'}
-#query
-#SELECT abs(-1::int);
+
+# abs: signed int and NULL
+query IIIIR
+SELECT abs(-127::TINYINT), abs(-32767::SMALLINT), abs(-2147483647::INT), abs(-9223372036854775807::BIGINT), abs(NULL);
+----
+127 32767 2147483647 9223372036854775807 NULL
+
+
+# See https://github.com/apache/datafusion/issues/18794 for operator precedence
+# abs: signed int minimal values
+query IIII
+select abs((-128)::TINYINT), abs((-32768)::SMALLINT), abs((-2147483648)::INT), abs((-9223372036854775808)::BIGINT)
+----
+-128 -32768 -2147483648 -9223372036854775808
+
+# abs: floats, NULL, NaN, -0, infinity, -infinity
+query RRRRRRRRRRRR
+SELECT abs(-1.0::FLOAT), abs(0.::FLOAT), abs(-0.::FLOAT), abs(-0::FLOAT), abs(NULL::FLOAT), abs('NaN'::FLOAT), abs('inf'::FLOAT), abs('+inf'::FLOAT), abs('-inf'::FLOAT), abs('infinity'::FLOAT), abs('+infinity'::FLOAT), abs('-infinity'::FLOAT)
+----
+1 0 0 0 NULL NaN Infinity Infinity Infinity Infinity Infinity Infinity
+
+# abs: doubles, NULL, NaN, -0, infinity, -infinity
+query RRRRRRRRRRRR
+SELECT abs(-1.0::DOUBLE), abs(0.::DOUBLE), abs(-0.::DOUBLE), abs(-0::DOUBLE), abs(NULL::DOUBLE), abs('NaN'::DOUBLE), abs('inf'::DOUBLE), abs('+inf'::DOUBLE), abs('-inf'::DOUBLE), abs('infinity'::DOUBLE), abs('+infinity'::DOUBLE), abs('-infinity'::DOUBLE)
+----
+1 0 0 0 NULL NaN Infinity Infinity Infinity Infinity Infinity Infinity
+
+# abs: decimal128 and decimal256
+statement ok
+CREATE TABLE test_nullable_decimal(
+    c1 DECIMAL(10, 2),    /* Decimal128 */
+    c2 DECIMAL(38, 10),   /* Decimal128 with max precision */
+    c3 DECIMAL(40, 2),    /* Decimal256 */
+    c4 DECIMAL(76, 10)    /* Decimal256 with max precision */
+ ) AS VALUES
+    (0, 0, 0, 0),
+    (NULL, NULL, NULL, NULL);
+
+query I
+INSERT into test_nullable_decimal values
+    (
+        -99999999.99,
+        '-9999999999999999999999999999.9999999999',
+        '-99999999999999999999999999999999999999.99',
+        '-999999999999999999999999999999999999999999999999999999999999999999.9999999999'
+    ),
+    (
+        99999999.99,
+        '9999999999999999999999999999.9999999999',
+        '99999999999999999999999999999999999999.99',
+        '999999999999999999999999999999999999999999999999999999999999999999.9999999999'
+    )
+----
+2
+
+query RRRR rowsort
+SELECT abs(c1), abs(c2), abs(c3), abs(c4) FROM test_nullable_decimal
+----
+0 0 0 0
+99999999.99 9999999999999999999999999999.9999999999 99999999999999999999999999999999999999.99 999999999999999999999999999999999999999999999999999999999999999999.9999999999
+99999999.99 9999999999999999999999999999.9999999999 99999999999999999999999999999999999999.99 999999999999999999999999999999999999999999999999999999999999999999.9999999999
+NULL NULL NULL NULL
+
+
+statement ok
+drop table test_nullable_decimal
 
 ## Original Query: SELECT abs(INTERVAL -'1-1' YEAR TO MONTH);
 ## PySpark 3.5.5 Result: {"abs(INTERVAL '-1-1' YEAR TO MONTH)": 13, "typeof(abs(INTERVAL '-1-1' YEAR TO MONTH))": 'interval year to month', "typeof(INTERVAL '-1-1' YEAR TO MONTH)": 'interval year to month'}
 #query
 #SELECT abs(INTERVAL '-1-1' YEAR TO MONTH::interval year to month);
+# See GitHub issue for ANSI interval support: https://github.com/apache/datafusion/issues/18793
diff --git a/datafusion/sqllogictest/test_files/spark/math/csc.slt b/datafusion/sqllogictest/test_files/spark/math/csc.slt
index b11986c3e1b9f..837704113da4c 100644
--- a/datafusion/sqllogictest/test_files/spark/math/csc.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/csc.slt
@@ -23,5 +23,24 @@
 
 ## Original Query: SELECT csc(1);
 ## PySpark 3.5.5 Result: {'CSC(1)': 1.1883951057781212, 'typeof(CSC(1))': 'double', 'typeof(1)': 'int'}
-#query
-#SELECT csc(1::int);
+
+query R
+SELECT csc(1::INT);
+----
+1.188395105778121
+
+query R
+SELECT csc(a) FROM (VALUES (0::INT), (1::INT), (-1::INT), (null)) AS t(a);
+----
+Infinity
+1.188395105778121
+-1.188395105778121
+NULL
+
+query R
+SELECT csc(a) FROM (VALUES (pi()), (-pi()), (pi()/2) , (arrow_cast('NAN','Float32'))) AS t(a);
+----
+8165619676597685
+-8165619676597685
+1
+NaN
diff --git a/datafusion/sqllogictest/test_files/spark/math/expm1.slt b/datafusion/sqllogictest/test_files/spark/math/expm1.slt
index 96d4abb0414b3..647a5ba341d0a 100644
--- a/datafusion/sqllogictest/test_files/spark/math/expm1.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/expm1.slt
@@ -30,3 +30,8 @@ SELECT expm1(a) FROM (VALUES (0::INT), (1::INT)) AS t(a);
 ----
 0
 1.718281828459045
+
+query R
+SELECT expm1(0.0::double);
+----
+0
diff --git a/datafusion/sqllogictest/test_files/spark/math/hex.slt b/datafusion/sqllogictest/test_files/spark/math/hex.slt
index 0fb8b92de02d4..05c9fb3f31b28 100644
--- a/datafusion/sqllogictest/test_files/spark/math/hex.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/hex.slt
@@ -48,3 +48,18 @@ SELECT hex(column1) FROM t_utf8view;
 666F6F
 NULL
 666F6F62617262617A
+
+query T
+SELECT hex(column1) FROM VALUES (arrow_cast('hello', 'LargeBinary')), (NULL), (arrow_cast('world', 'LargeBinary'));
+----
+68656C6C6F
+NULL
+776F726C64
+
+statement error Function 'hex' expects 1 arguments but received 2
+SELECT hex(1, 2);
+
+query T
+SELECT hex(arrow_cast('test', 'LargeBinary')) as lar_b;
+----
+74657374
diff --git a/datafusion/sqllogictest/test_files/spark/math/sec.slt b/datafusion/sqllogictest/test_files/spark/math/sec.slt
index 6c49a34549f0f..c95d583ce9154 100644
--- a/datafusion/sqllogictest/test_files/spark/math/sec.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/sec.slt
@@ -23,5 +23,23 @@
 
 ## Original Query: SELECT sec(0);
 ## PySpark 3.5.5 Result: {'SEC(0)': 1.0, 'typeof(SEC(0))': 'double', 'typeof(0)': 'int'}
-#query
-#SELECT sec(0::int);
+query R
+SELECT sec(0::int);
+----
+1
+
+query R
+SELECT sec(a) FROM (VALUES (0::INT), (1::INT), (-1::INT), (null)) AS t(a);
+----
+1
+1.850815717680926
+1.850815717680926
+NULL
+
+query R
+SELECT sec(a) FROM (VALUES (pi()), (3 * pi()/2), (pi()/2) , (arrow_cast('NAN','Float32'))) AS t(a);
+----
+-1
+-5443746451065123
+16331239353195370
+NaN
diff --git a/datafusion/sqllogictest/test_files/spark/math/shiftleft.slt b/datafusion/sqllogictest/test_files/spark/math/shiftleft.slt
index 3676e4c18153c..c8ddeb6740871 100644
--- a/datafusion/sqllogictest/test_files/spark/math/shiftleft.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/shiftleft.slt
@@ -144,3 +144,103 @@ query I
 select shiftleft(3::int,-32);
 ----
 3
+
+# i32 + nulls
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(null, 2),
+(8, null)
+t(value, shift)
+----
+2 Int32
+8 Int32
+24 Int32
+64 Int32
+NULL Int32
+NULL Int32
+
+# big shifts
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(1, 32),
+(2, 33),
+(3, 64)
+t(value, shift)
+----
+1 Int32
+4 Int32
+3 Int32
+
+# negative shift
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'Int32'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'Int32'), shift))
+FROM VALUES
+(4, -1),
+(8, -2),
+(16, -3)
+t(value, shift)
+----
+0 Int32
+0 Int32
+0 Int32
+
+# i64 value
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'Int64'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'Int64'), shift))
+FROM VALUES (1, 1), (2, 2), (3, 3) t(value, shift)
+----
+2 Int64
+8 Int64
+24 Int64
+
+# u32 value
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'UInt32'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'UInt32'), shift))
+FROM VALUES (1, 1), (2, 2), (3, 3) t(value, shift)
+----
+2 UInt32
+8 UInt32
+24 UInt32
+
+# u64 value
+query IT
+SELECT
+	shiftleft(arrow_cast(value, 'UInt64'), shift),
+	arrow_typeof(shiftleft(arrow_cast(value, 'UInt64'), shift))
+FROM VALUES (1, 1), (2, 2), (3, 3) t(value, shift)
+----
+2 UInt64
+8 UInt64
+24 UInt64
+
+# pure null handling
+query IT
+SELECT shiftleft(null, 1), arrow_typeof(shiftleft(null, 1));
+----
+NULL Int32
+
+query IT
+SELECT shiftleft(null, null), arrow_typeof(shiftleft(null, null));
+----
+NULL Int32
+
+query IT
+SELECT shiftleft(1::bigint, null), arrow_typeof(shiftleft(1::bigint, null));
+----
+NULL Int64
diff --git a/datafusion/sqllogictest/test_files/spark/url/try_url_decode.slt b/datafusion/sqllogictest/test_files/spark/url/try_url_decode.slt
new file mode 100644
index 0000000000000..559c77af97e9a
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/url/try_url_decode.slt
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+query T
+SELECT try_url_decode('https%3A%2F%2Fspark.apache.org');
+----
+https://spark.apache.org
+
+# Test with LargeUtf8
+query T
+SELECT try_url_decode(arrow_cast('https%3A%2F%2Fspark.apache.org', 'LargeUtf8'));
+----
+https://spark.apache.org
+
+# Test with Utf8View
+query T
+SELECT try_url_decode(arrow_cast('https%3A%2F%2Fspark.apache.org', 'Utf8View'));
+----
+https://spark.apache.org
+
+# Non-ASCII string
+query T
+SELECT try_url_decode('%E4%BD%A0%E5%A5%BD')
+----
+你好
+
+# Empty string
+query T
+SELECT try_url_decode('');
+----
+(empty)
+
+# Null value
+query T
+SELECT try_url_decode(NULL::string);
+----
+NULL
+
+# Roundtrip with url_encode
+query T
+SELECT try_url_decode(url_encode('Spark SQL ~!@#$%^&*()'));
+----
+Spark SQL ~!@#$%^&*()
+
+# Plus replacement
+query T
+SELECT try_url_decode('Spark+SQL%21');
+----
+Spark SQL!
+
+# Handled invalid percent encoding error
+query T
+SELECT try_url_decode('https%3%2F%2Fspark.apache.org'::string);
+----
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/url/url_decode.slt b/datafusion/sqllogictest/test_files/spark/url/url_decode.slt
index fa5028b647dc3..61399aa0ef2e7 100644
--- a/datafusion/sqllogictest/test_files/spark/url/url_decode.slt
+++ b/datafusion/sqllogictest/test_files/spark/url/url_decode.slt
@@ -15,13 +15,53 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This file was originally created by a porting script from:
-#   https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
-# This file is part of the implementation of the datafusion-spark function library.
-# For more information, please see:
-#   https://github.com/apache/datafusion/issues/15914
-
-## Original Query: SELECT url_decode('https%3A%2F%2Fspark.apache.org');
-## PySpark 3.5.5 Result: {'url_decode(https%3A%2F%2Fspark.apache.org)': 'https://spark.apache.org', 'typeof(url_decode(https%3A%2F%2Fspark.apache.org))': 'string', 'typeof(https%3A%2F%2Fspark.apache.org)': 'string'}
-#query
-#SELECT url_decode('https%3A%2F%2Fspark.apache.org'::string);
+query T
+SELECT url_decode('https%3A%2F%2Fspark.apache.org');
+----
+https://spark.apache.org
+
+# Test with LargeUtf8
+query T
+SELECT url_decode(arrow_cast('https%3A%2F%2Fspark.apache.org', 'LargeUtf8'));
+----
+https://spark.apache.org
+
+# Test with Utf8View
+query T
+SELECT url_decode(arrow_cast('https%3A%2F%2Fspark.apache.org', 'Utf8View'));
+----
+https://spark.apache.org
+
+# Non-ASCII string
+query T
+SELECT url_decode('%E4%BD%A0%E5%A5%BD')
+----
+你好
+
+# Empty string
+query T
+SELECT url_decode('');
+----
+(empty)
+
+# Null value
+query T
+SELECT url_decode(NULL::string);
+----
+NULL
+
+# Roundtrip with url_encode
+query T
+SELECT url_decode(url_encode('Spark SQL ~!@#$%^&*()'));
+----
+Spark SQL ~!@#$%^&*()
+
+# Plus replacement
+query T
+SELECT url_decode('Spark+SQL%21');
+----
+Spark SQL!
+
+# Invalid percent encoding case
+query error DataFusion error: Execution error: Invalid percent\-encoding: invalid hex sequence '%3%' at position 5
+SELECT url_decode('https%3%2F%2Fspark.apache.org'::string);
diff --git a/datafusion/sqllogictest/test_files/spark/url/url_encode.slt b/datafusion/sqllogictest/test_files/spark/url/url_encode.slt
index 6aef87dcb4c0f..3d7a42f19384b 100644
--- a/datafusion/sqllogictest/test_files/spark/url/url_encode.slt
+++ b/datafusion/sqllogictest/test_files/spark/url/url_encode.slt
@@ -15,13 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This file was originally created by a porting script from:
-#   https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
-# This file is part of the implementation of the datafusion-spark function library.
-# For more information, please see:
-#   https://github.com/apache/datafusion/issues/15914
+query T
+SELECT url_encode('https://spark.apache.org');
+----
+https%3A%2F%2Fspark.apache.org
 
-## Original Query: SELECT url_encode('https://spark.apache.org');
-## PySpark 3.5.5 Result: {'url_encode(https://spark.apache.org)': 'https%3A%2F%2Fspark.apache.org', 'typeof(url_encode(https://spark.apache.org))': 'string', 'typeof(https://spark.apache.org)': 'string'}
-#query
-#SELECT url_encode('https://spark.apache.org'::string);
+# Test with LargeUtf8
+query T
+SELECT url_encode(arrow_cast('https://spark.apache.org', 'LargeUtf8'));
+----
+https%3A%2F%2Fspark.apache.org
+
+# Test with Utf8View
+query T
+SELECT url_encode(arrow_cast('https://spark.apache.org', 'Utf8View'));
+----
+https%3A%2F%2Fspark.apache.org
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt
index f602dbb54b081..6cf02218872df 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -132,10 +132,10 @@ SELECT substr('Hello🌏世界', 5, 3)
 ----
 o🌏世
 
-statement error The first argument of the substr function can only be a string, but got Int64
+statement error Function 'substr' failed to match any signature
 SELECT substr(1, 3)
 
-statement error The first argument of the substr function can only be a string, but got Int64
+statement error Function 'substr' failed to match any signature
 SELECT substr(1, 3, 4)
 
 statement error Execution error: negative substring length not allowed
@@ -207,6 +207,25 @@ SELECT ends_with('foobar', 'foo')
 ----
 false
 
+query B
+SELECT ends_with(a, '%bar') from (values ('foobar'), ('foo%bar')) as t(a);
+----
+false
+true
+
+query B
+SELECT ends_with(a, '_bar') from (values ('foobar'), ('foo_bar')) as t(a);
+----
+false
+true
+
+query B
+SELECT ends_with(a, '\_bar') from (values ('foobar'), ('foo\\bar'), ('foo\_bar')) as t(a);
+----
+false
+false
+true
+
 query I
 SELECT levenshtein('kitten', 'sitting')
 ----
@@ -846,6 +865,26 @@ SELECT starts_with('foobar', 'bar')
 ----
 false
 
+
+query B
+SELECT starts_with(a, 'foo%') from (values ('foobar'), ('foo%bar')) as t(a);
+----
+false
+true
+
+query B
+SELECT starts_with(a, 'foo\_') from (values ('foobar'), ('foo\\_bar'), ('foo\_bar')) as t(a);
+----
+false
+false
+true
+
+query B
+SELECT starts_with(a, 'foo_') from (values ('foobar'), ('foo_bar')) as t(a);
+----
+false
+true
+
 query TT
 select '   ', '|'
 ----
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt
index 4d30f572ad6fb..13b0aba653efb 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -370,7 +370,7 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f_o%") AS c5, test.column1_utf8view LIKE Utf8View("f_o%") AS c6
+01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f\_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f\_o%") AS c5, test.column1_utf8view LIKE Utf8View("f\_o%") AS c6
 02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
 
 ## Test STARTS_WITH works with column arguments
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index dce5fe036b4ec..d985af1104da3 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -53,9 +53,9 @@ select * from struct_values;
 query TT
 select arrow_typeof(s1), arrow_typeof(s2) from struct_values;
 ----
-Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
-Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
-Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": Int32) Struct("a": Int32, "b": Utf8View)
+Struct("c0": Int32) Struct("a": Int32, "b": Utf8View)
+Struct("c0": Int32) Struct("a": Int32, "b": Utf8View)
 
 
 # struct[i]
@@ -229,12 +229,12 @@ select named_struct('field_a', 1, 'field_b', 2);
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
-Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
+Struct("first": Int64, "second": Int64, "third": Int64)
 
 query T
 select arrow_typeof({'first': 1, 'second': 2, 'third': 3});
 ----
-Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
+Struct("first": Int64, "second": Int64, "third": Int64)
 
 # test nested struct literal
 query ?
@@ -413,7 +413,7 @@ create table t(a struct<r varchar, c int>, b struct<r varchar, c float>) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
+List(Struct("r": Utf8View, "c": Float32))
 
 query ?
 select [a, b] from t;
@@ -464,12 +464,12 @@ select * from t;
 query T
 select arrow_typeof(c1) from t;
 ----
-Struct("r": nullable Utf8View, "b": nullable Int32)
+Struct("r": Utf8View, "b": Int32)
 
 query T
 select arrow_typeof(c2) from t;
 ----
-Struct("r": nullable Utf8View, "b": nullable Float32)
+Struct("r": Utf8View, "b": Float32)
 
 statement ok
 drop table t;
@@ -486,8 +486,8 @@ select * from t;
 query T
 select arrow_typeof(column1) from t;
 ----
-Struct("r": nullable Utf8, "c": nullable Float64)
-Struct("r": nullable Utf8, "c": nullable Float64)
+Struct("r": Utf8, "c": Float64)
+Struct("r": Utf8, "c": Float64)
 
 statement ok
 drop table t;
@@ -519,9 +519,9 @@ select coalesce(s1) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct("a": nullable Float32, "b": nullable Utf8View)
-Struct("a": nullable Float32, "b": nullable Utf8View)
-Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": Float32, "b": Utf8View)
+Struct("a": Float32, "b": Utf8View)
+Struct("a": Float32, "b": Utf8View)
 
 statement ok
 drop table t;
@@ -546,9 +546,9 @@ select coalesce(s1, s2) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct("a": nullable Float32, "b": nullable Utf8View)
-Struct("a": nullable Float32, "b": nullable Utf8View)
-Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": Float32, "b": Utf8View)
+Struct("a": Float32, "b": Utf8View)
+Struct("a": Float32, "b": Utf8View)
 
 statement ok
 drop table t;
@@ -583,7 +583,7 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
+List(Struct("r": Utf8View, "c": Float32))
 
 statement ok
 drop table t;
@@ -606,13 +606,13 @@ create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float,
 query T
 select arrow_typeof(a) from t;
 ----
-Struct("r": nullable Utf8View, "c": nullable Int32, "g": nullable Float32)
+Struct("r": Utf8View, "c": Int32, "g": Float32)
 
 # type of each column should not coerced but perserve as it is
 query T
 select arrow_typeof(b) from t;
 ----
-Struct("r": nullable Utf8View, "c": nullable Float32, "g": nullable Int32)
+Struct("r": Utf8View, "c": Float32, "g": Int32)
 
 statement ok
 drop table t;
@@ -653,3 +653,174 @@ test1.struct_field['substruct']['subsubstruct'] = test2.struct_field['substruct'
 
 statement ok
 drop table test;
+
+# Test nested get_field with multiple arguments
+query I
+select get_field({'a': {'b': 1}}, 'a', 'b');
+----
+1
+
+# Test nested get_field with three levels
+query I
+select get_field({'a': {'b': {'c': 42}}}, 'a', 'b', 'c');
+----
+42
+
+# Test type validation error - accessing field on non-struct
+query error Cannot access field at argument 2: type Int64 is not Struct, Map, or Null
+select get_field({'a': 1}, 'a', 'b');
+
+# Test that bracket syntax produces a single get_field call (not nested)
+# We use a table column to prevent constant folding
+statement ok
+create table explain_test (s struct(a struct(b int))) as values ({'a': {'b': 1}});
+
+query TT
+explain select s['a']['b'] from explain_test;
+----
+logical_plan
+01)Projection: get_field(explain_test.s, Utf8("a"), Utf8("b"))
+02)--TableScan: explain_test projection=[s]
+physical_plan
+01)ProjectionExec: expr=[get_field(s@0, a, b) as explain_test.s[a][b]]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+drop table explain_test;
+
+# Test with nested struct in table
+statement ok
+create table nested_test (s struct(inner struct(val int))) as values ({'inner': {'val': 100}});
+
+query I
+select s['inner']['val'] from nested_test;
+----
+100
+
+query I
+select get_field(s, 'inner', 'val') from nested_test;
+----
+100
+
+statement ok
+drop table nested_test;
+
+# Test mixed struct and map access
+query I
+select get_field({'m': map(['k'], [42])}, 'm', 'k');
+----
+42
+
+# Test nested map access
+query I
+select {'m': map(['outer'], [map(['inner'], [99])])}['m']['outer']['inner'];
+----
+99
+
+###############################################
+# Additional tests for nested get_field support
+###############################################
+
+# Backwards compatibility: original 2-argument form still works
+query I
+select get_field({'a': 1}, 'a');
+----
+1
+
+# Error: get_field with no arguments
+query error get_field requires at least 2 arguments, got 0
+select get_field();
+
+# Error: get_field with only 1 argument
+query error get_field requires at least 2 arguments, got 1
+select get_field({'a': 1});
+
+# Null handling: null at base should return null
+query I
+select get_field(CAST(NULL AS STRUCT(a STRUCT(b INT))), 'a', 'b');
+----
+NULL
+
+# Null handling: null in middle of chain
+statement ok
+create table null_mid_test (s STRUCT(a STRUCT(b INT)));
+
+statement ok
+insert into null_mid_test values ({'a': NULL});
+
+query I
+select s['a']['b'] from null_mid_test;
+----
+NULL
+
+query I
+select get_field(s, 'a', 'b') from null_mid_test;
+----
+NULL
+
+statement ok
+drop table null_mid_test;
+
+# Type validation error at argument 3
+query error Cannot access field at argument 3: type Int64 is not Struct, Map, or Null
+select get_field({'a': {'b': 2}}, 'a', 'b', 'c');
+
+# Type validation error at argument 4
+query error Cannot access field at argument 4: type Int64 is not Struct, Map, or Null
+select get_field({'a': {'b': {'c': 3}}}, 'a', 'b', 'c', 'd');
+
+# Non-existent field at first level
+query error Field x not found in struct
+select get_field({'a': 1}, 'x');
+
+# Non-existent field at second level
+query error Field x not found in struct
+select get_field({'a': {'b': 1}}, 'a', 'x');
+
+# Deep nesting: 5-level access
+query I
+select get_field({'l1': {'l2': {'l3': {'l4': {'l5': 42}}}}}, 'l1', 'l2', 'l3', 'l4', 'l5');
+----
+42
+
+# Deep nesting: 5-level access via bracket syntax
+query I
+select {'l1': {'l2': {'l3': {'l4': {'l5': 99}}}}}['l1']['l2']['l3']['l4']['l5'];
+----
+99
+
+# Mixed array and struct access: array index should break the batching
+statement ok
+create table mixed_access_test (data STRUCT(items STRUCT(name VARCHAR)[]) );
+
+statement ok
+insert into mixed_access_test values ({'items': [{'name': 'first'}, {'name': 'second'}]});
+
+query T
+select data['items'][1]['name'] from mixed_access_test;
+----
+first
+
+query T
+select data['items'][2]['name'] from mixed_access_test;
+----
+second
+
+statement ok
+drop table mixed_access_test;
+
+# Nullable parent propagation: null parent should propagate
+statement ok
+create table nullable_parent_test (s STRUCT(a STRUCT(b INT)));
+
+statement ok
+insert into nullable_parent_test values ({'a': {'b': 1}}), (NULL);
+
+query I
+select s['a']['b'] from nullable_parent_test;
+----
+1
+NULL
+
+statement ok
+drop table nullable_parent_test;
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index dec9357495356..da0bfc89d5848 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -201,18 +201,16 @@ logical_plan
 07)----------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-07)------------CoalesceBatchesExec: target_batch_size=2
-08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
-12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)--------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+06)----------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[2]
+10)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1
@@ -236,18 +234,16 @@ logical_plan
 07)----------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int * Float64(1)) + Int64(1)@0 as t2_sum]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int * Float64(1)) + Int64(1)@0, t1_id@2]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[sum(t2.t2_int * Float64(1))@1 + 1 as sum(t2.t2_int * Float64(1)) + Int64(1), t2_id@0 as t2_id]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))]
-07)------------CoalesceBatchesExec: target_batch_size=2
-08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))]
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
-12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)--------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int * Float64(1)) + Int64(1)@0, t1_id@2]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[sum(t2.t2_int * Float64(1))@1 + 1 as sum(t2.t2_int * Float64(1)) + Int64(1), t2_id@0 as t2_id]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))]
+06)----------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[2]
+10)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query IR rowsort
 SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1
@@ -271,18 +267,16 @@ logical_plan
 07)----------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
-06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-07)------------CoalesceBatchesExec: target_batch_size=2
-08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
-12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)--------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+06)----------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[2]
+10)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id group by t2_id, 'a') as t2_sum from t1
@@ -307,20 +301,17 @@ logical_plan
 08)------------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
-04)------CoalescePartitionsExec
-05)--------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------FilterExec: sum(t2.t2_int)@1 < 3
-08)--------------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-09)----------------CoalesceBatchesExec: target_batch_size=2
-10)------------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
-11)--------------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)------------------------DataSourceExec: partitions=1, partition_sizes=[2]
-14)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)--------DataSourceExec: partitions=1, partition_sizes=[2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int)@0, t1_id@2]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[sum(t2.t2_int)@1 as sum(t2.t2_int), t2_id@0 as t2_id]
+05)--------FilterExec: sum(t2.t2_int)@1 < 3
+06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+07)------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
+08)--------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+10)------------------DataSourceExec: partitions=1, partition_sizes=[2]
+11)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)------DataSourceExec: partitions=1, partition_sizes=[2]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id having sum(t2_int) < 3) as t2_sum from t1
@@ -1189,13 +1180,11 @@ logical_plan
 05)------SubqueryAlias: __correlated_sq_1
 06)--------TableScan: t2 projection=[t2_id]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--FilterExec: t1_id@0 > 40 OR NOT mark@3, projection=[t1_id@0, t1_name@1, t1_int@2]
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------HashJoinExec: mode=CollectLeft, join_type=RightMark, on=[(t2_id@0, t1_id@0)]
-05)--------DataSourceExec: partitions=1, partition_sizes=[2]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)----------DataSourceExec: partitions=1, partition_sizes=[2]
+01)FilterExec: t1_id@0 > 40 OR NOT mark@3, projection=[t1_id@0, t1_name@1, t1_int@2]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightMark, on=[(t2_id@0, t1_id@0)]
+03)----DataSourceExec: partitions=1, partition_sizes=[2]
+04)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)------DataSourceExec: partitions=1, partition_sizes=[2]
 
 statement ok
 set datafusion.explain.logical_plan_only = true;
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
index ea7addd8e36f7..dd474f3fa1054 100644
--- a/datafusion/sqllogictest/test_files/subquery_sort.slt
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -150,11 +150,10 @@ physical_plan
 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c3@2 DESC, c9@3 ASC NULLS LAST], preserve_partitioning=[true]
 04)------ProjectionExec: expr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@1 as c1, first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@2 as c2, first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@3 as c3, first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@4 as c9]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], file_type=csv, has_header=true
 
 
 query TI
diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt
index c843400efc2b2..cf8a091880d3d 100644
--- a/datafusion/sqllogictest/test_files/table_functions.slt
+++ b/datafusion/sqllogictest/test_files/table_functions.slt
@@ -509,3 +509,27 @@ SELECT c, f.*  FROM json_table, LATERAL generate_series(1,2) f;
 1 2
 2 1
 2 2
+
+
+# Test generate_series in a recursive CTE to ensure the state is correctly reset
+query I rowsort
+WITH RECURSIVE t AS (
+    SELECT 1 i
+    UNION ALL
+    SELECT g.i
+    FROM generate_series(1, 1) g(i), t
+)
+SELECT *
+FROM t
+LIMIT 10;
+----
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt b/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt
new file mode 100644
index 0000000000000..d48e41d1204de
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/to_timestamp_timezone.slt
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## to_timestamp timezone tests
+##########
+
+## Reset timezone for other tests
+statement ok
+RESET datafusion.execution.time_zone
+
+## Test 1: Default timezone (None) with naive timestamp
+## Naive timestamps (without explicit timezone) should be interpreted as UTC by default
+query P
+SELECT to_timestamp('2020-09-08T13:42:29');
+----
+2020-09-08T13:42:29
+
+## Test 2: Explicit UTC timezone ('Z' suffix)
+## Explicit timezone should be respected regardless of session timezone
+query P
+SELECT to_timestamp('2020-09-08T13:42:29Z');
+----
+2020-09-08T13:42:29
+
+## Test 3: Explicit timezone offset (+05:00)
+## Explicit offset should be respected - this is 13:42:29+05:00 which is 08:42:29 UTC
+query P
+SELECT to_timestamp('2020-09-08T13:42:29+05:00');
+----
+2020-09-08T08:42:29
+
+## Test 4: Explicit timezone offset without colon (+0500)
+## Should handle offset formats without colons
+query P
+SELECT to_timestamp('2020-09-08T13:42:29+0500');
+----
+2020-09-08T08:42:29
+
+## Test 5: Negative timezone offset
+query P
+SELECT to_timestamp('2020-09-08T13:42:29-03:30');
+----
+2020-09-08T17:12:29
+
+## Test 6: Configure session timezone to America/New_York
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+## Test 7: Naive timestamp with configured timezone
+## '2020-09-08T13:42:29' in America/New_York is EDT (UTC-4)
+## So this should become '2020-09-08T13:42:29-04:00'
+query P
+SELECT to_timestamp('2020-09-08T13:42:29');
+----
+2020-09-08T13:42:29-04:00
+
+## Test 8: Explicit UTC should be transformed to configured timezone
+query P
+SELECT to_timestamp('2020-09-08T13:42:29Z');
+----
+2020-09-08T09:42:29-04:00
+
+## Test 9: Explicit offset should be transformed to configured timezone
+query P
+SELECT to_timestamp('2020-09-08T13:42:29+05:00');
+----
+2020-09-08T04:42:29-04:00
+
+## Test 10: Check arrow_typeof returns timstamp in configured timezone
+## Result should be Timestamp(Nanosecond, "America/New_York") regardless of input timezone
+query T
+SELECT arrow_typeof(to_timestamp('2020-09-08T13:42:29Z'));
+----
+Timestamp(ns, "America/New_York")
+
+## Test 11: Configure to offset-based timezone
+statement ok
+SET datafusion.execution.time_zone = '+05:30';
+
+## Test 12: Naive timestamp with offset-based configured timezone
+query P
+SELECT to_timestamp('2020-09-08T13:42:29');
+----
+2020-09-08T13:42:29+05:30
+
+## Test 13: Reset to None
+statement ok
+RESET datafusion.execution.time_zone
+
+## Test 14: Naive timestamp
+query P
+SELECT to_timestamp('2020-09-08T13:42:29');
+----
+2020-09-08T13:42:29
+
+query P
+SELECT to_timestamp('2020-09-08T13:42:29Z');
+----
+2020-09-08T13:42:29
+
+query P
+SELECT to_timestamp('2020-09-08T13:42:29+05:00');
+----
+2020-09-08T08:42:29
+
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+## Test 15: to_timestamp with format string - naive timestamp with session timezone
+
+query P
+SELECT to_timestamp('2020-09-08 13:42:29', '%Y-%m-%d %H:%M:%S');
+----
+2020-09-08T13:42:29-04:00
+
+## Test 16: to_timestamp with format string - explicit timezone should be respected
+statement ok
+SET datafusion.execution.time_zone = 'UTC';
+
+query P
+SELECT to_timestamp('2020-09-08 13:42:29 +0000', '%Y-%m-%d %H:%M:%S %z');
+----
+2020-09-08T13:42:29Z
+
+query P
+SELECT to_timestamp('2020-09-08 13:42:29 America/Toronto', '%Y-%m-%d %H:%M:%S %Z');
+----
+2020-09-08T17:42:29Z
+
+query error Error parsing timestamp from '2020-09-08 13:42:29America/Toronto' using format '%Y-%m-%d %H:%M:%S%Z': '%Z' is only supported at the end of the format string preceded by a space
+SELECT to_timestamp('2020-09-08 13:42:29America/Toronto', '%Y-%m-%d %H:%M:%S%Z');
+
+## Test 17: Test all precision variants respect timezone
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+## to_timestamp_seconds
+query P
+SELECT to_timestamp_seconds('2020-09-08T13:42:29');
+----
+2020-09-08T13:42:29-04:00
+
+## to_timestamp_millis
+query P
+SELECT to_timestamp_millis('2020-09-08T13:42:29.123');
+----
+2020-09-08T13:42:29.123-04:00
+
+## to_timestamp_micros
+query P
+SELECT to_timestamp_micros('2020-09-08T13:42:29.123456');
+----
+2020-09-08T13:42:29.123456-04:00
+
+## to_timestamp_nanos
+query P
+SELECT to_timestamp_nanos('2020-09-08T13:42:29.123456789');
+----
+2020-09-08T13:42:29.123456789-04:00
+
+## test integers
+query T
+select arrow_typeof(to_timestamp_seconds(61))
+----
+Timestamp(s, "America/New_York")
+
+query T
+select arrow_typeof(to_timestamp_millis(61))
+----
+Timestamp(ms, "America/New_York")
+
+query T
+select arrow_typeof(to_timestamp_micros(61))
+----
+Timestamp(µs, "America/New_York")
+
+query T
+select arrow_typeof(to_timestamp_nanos(61))
+----
+Timestamp(ns, "America/New_York")
+
+query T
+select arrow_typeof(to_timestamp(61))
+----
+Timestamp(ns, "America/New_York")
+
+## Reset timezone for other tests
+statement ok
+RESET datafusion.execution.time_zone
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index 8a08cc17d4172..aba468d21fd08 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -340,7 +340,7 @@ explain select number, letter, age from partial_sorted order by number asc limit
 ----
 physical_plan
 01)SortExec: TopK(fetch=3), expr=[number@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], file_type=parquet, predicate=DynamicFilter [ empty ], reverse_row_groups=true
 
 query TT
 explain select number, letter, age from partial_sorted order by letter asc, number desc limit 3;
@@ -371,8 +371,7 @@ explain select number, letter, age, number as column4, letter as column5 from pa
 ----
 physical_plan
 01)SortExec: TopK(fetch=3), expr=[number@0 DESC, letter@1 ASC NULLS LAST, age@2 DESC], preserve_partitioning=[false], sort_prefix=[number@0 DESC, letter@1 ASC NULLS LAST]
-02)--ProjectionExec: expr=[number@0 as number, letter@1 as letter, age@2 as age, number@0 as column4, letter@1 as column5]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age, number@0 as column4, letter@1 as column5], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
 
 # Verify that the sort prefix is correctly computed over normalized, order-maintaining projections (number + 1, number, number + 1, age)
 query TT
@@ -383,7 +382,7 @@ physical_plan
 02)--SortExec: TopK(fetch=3), expr=[number_plus@0 DESC, number@1 DESC, age@3 ASC NULLS LAST], preserve_partitioning=[true], sort_prefix=[number_plus@0 DESC, number@1 DESC]
 03)----ProjectionExec: expr=[__common_expr_1@0 as number_plus, number@1 as number, __common_expr_1@0 as other_number_plus, age@2 as age]
 04)------ProjectionExec: expr=[CAST(number@0 AS Int64) + 1 as __common_expr_1, number@0 as number, age@1 as age]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, age], output_ordering=[number@0 DESC], file_type=parquet
 
 # Cleanup
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part
index 4a6ad5eddfb79..84237e4393377 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part
@@ -51,10 +51,8 @@ physical_plan
 02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(Int64(1))@9 as count_order]
 04)------AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))]
-08)--------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------FilterExec: l_shipdate@6 <= 1998-09-02, projection=[l_quantity@0, l_extendedprice@1, l_discount@2, l_tax@3, l_returnflag@4, l_linestatus@5]
-11)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))]
+07)------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus]
+08)--------------FilterExec: l_shipdate@6 <= 1998-09-02, projection=[l_quantity@0, l_extendedprice@1, l_discount@2, l_tax@3, l_returnflag@4, l_linestatus@5]
+09)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
index 04de9153a0474..62649148bf058 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
@@ -73,34 +73,20 @@ physical_plan
 02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment]
 04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], file_type=csv, has_header=false
-22)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-24)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)----------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
-26)------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-27)--------------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-29)------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)--------------------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
-31)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], file_type=csv, has_header=false
-32)------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-34)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
+08)--------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
+10)------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
+12)----------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], file_type=csv, has_header=false
+14)----------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+15)------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
+16)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+17)------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+18)--------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
+19)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], file_type=csv, has_header=false
+20)--------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+21)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
index 6b03d708c7fa2..a31579eb1e09d 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
@@ -79,48 +79,31 @@ physical_plan
 04)------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as sum(partsupp.ps_supplycost * partsupp.ps_availqty), CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) as join_proj_push_down_1]
 05)--------CoalescePartitionsExec
 06)----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-18)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-19)------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-21)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-22)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-23)----------------------CoalesceBatchesExec: target_batch_size=8192
-24)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-25)--------------------------CoalesceBatchesExec: target_batch_size=8192
-26)----------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-28)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-29)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
-30)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-31)----------CoalescePartitionsExec
-32)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-33)--------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
-35)------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-37)----------------------CoalesceBatchesExec: target_batch_size=8192
-38)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
-39)--------------------------CoalesceBatchesExec: target_batch_size=8192
-40)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-41)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-42)--------------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-46)------------------CoalesceBatchesExec: target_batch_size=8192
-47)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-48)----------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+07)------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
+10)------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
+12)----------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+13)------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+14)----------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+15)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+16)------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+17)--------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+18)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+19)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+20)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+21)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+22)----------CoalescePartitionsExec
+23)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+24)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
+25)----------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+26)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
+27)--------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+28)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+29)--------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+30)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+31)----------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+32)------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+33)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+34)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part
index f7344daed8c7a..b152fde02f060 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part
@@ -63,16 +63,11 @@ physical_plan
 02)--SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count]
 04)------AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------FilterExec: (l_shipmode@4 = MAIL OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1994-01-01 AND l_receiptdate@3 < 1995-01-01, projection=[l_orderkey@0, l_shipmode@4]
-14)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], file_type=csv, has_header=false
-15)------------------CoalesceBatchesExec: target_batch_size=8192
-16)--------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-17)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderpriority], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3]
+08)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+09)----------------FilterExec: (l_shipmode@4 = MAIL OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1994-01-01 AND l_receiptdate@3 < 1995-01-01, projection=[l_orderkey@0, l_shipmode@4]
+10)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], file_type=csv, has_header=false
+11)--------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+12)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderpriority], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
index 96f3bd6edf324..94e0848bfcce1 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
@@ -57,19 +57,13 @@ physical_plan
 02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC, c_count@0 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c_count@0 as c_count, count(Int64(1))@1 as custdist]
 04)------AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(Int64(1))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(Int64(1))]
-08)--------------ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count]
-09)----------------AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], file_type=csv, has_header=false
-16)----------------------CoalesceBatchesExec: target_batch_size=8192
-17)------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-18)--------------------------CoalesceBatchesExec: target_batch_size=8192
-19)----------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1]
-20)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(Int64(1))]
+07)------------ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count]
+08)--------------AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)]
+09)----------------HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1]
+10)------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], file_type=csv, has_header=false
+12)------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+13)--------------------FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1]
+14)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
index 8d8dd68c3d7bd..a9ac517f287d0 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
@@ -46,14 +46,9 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
 05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as __common_expr_1, p_type@2 as p_type]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01, projection=[l_partkey@0, l_extendedprice@1, l_discount@2]
-12)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
-13)--------------CoalesceBatchesExec: target_batch_size=8192
-14)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-15)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-16)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
+06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4]
+07)------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+08)--------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01, projection=[l_partkey@0, l_extendedprice@1, l_discount@2]
+09)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+10)------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=1
+11)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
index 0636a033b25a3..ae0c0a93a3552 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
@@ -73,30 +73,22 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]
 02)--SortExec: expr=[s_suppkey@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(max(revenue0.total_revenue)@0, total_revenue@4)], projection=[s_suppkey@1, s_name@2, s_address@3, s_phone@4, total_revenue@5]
-05)--------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)]
-06)----------CoalescePartitionsExec
-07)------------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)]
-08)--------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
-09)----------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
-12)----------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
-15)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
-16)--------CoalesceBatchesExec: target_batch_size=8192
-17)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, supplier_no@0)], projection=[s_suppkey@0, s_name@1, s_address@2, s_phone@3, total_revenue@5]
-18)------------CoalesceBatchesExec: target_batch_size=8192
-19)--------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-20)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], file_type=csv, has_header=false
-22)------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
-23)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-24)----------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
-26)--------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-27)----------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
-29)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(max(revenue0.total_revenue)@0, total_revenue@4)], projection=[s_suppkey@1, s_name@2, s_address@3, s_phone@4, total_revenue@5]
+04)------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)]
+05)--------CoalescePartitionsExec
+06)----------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)]
+07)------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+09)----------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
+10)------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+11)--------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
+12)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+13)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, supplier_no@0)], projection=[s_suppkey@0, s_name@1, s_address@2, s_phone@3, total_revenue@5]
+14)--------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+15)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], file_type=csv, has_header=false
+16)--------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
+17)----------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+18)------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
+19)--------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+20)----------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
+21)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part
index 53d637ea3f510..0ee60a1e8afb2 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part
@@ -69,31 +69,19 @@ physical_plan
 02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt]
 04)------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
-08)--------------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4
-11)--------------------AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(ps_suppkey@0, s_suppkey@0)]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, p_partkey@0)], projection=[ps_suppkey@1, p_brand@3, p_type@4, p_size@5]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-20)--------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey], file_type=csv, has_header=false
-21)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-23)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)----------------------------------------FilterExec: p_brand@1 != Brand#45 AND p_type@2 NOT LIKE MEDIUM POLISHED% AND p_size@3 IN (SET) ([49, 14, 23, 45, 19, 3, 36, 9])
-25)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-26)--------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_type, p_size], file_type=csv, has_header=false
-27)--------------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-29)------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)--------------------------------FilterExec: s_comment@1 LIKE %Customer%Complaints%, projection=[s_suppkey@0]
-31)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-32)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_comment], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
+07)------------AggregateExec: mode=SinglePartitioned, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[]
+08)--------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(ps_suppkey@0, s_suppkey@0)]
+09)----------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, p_partkey@0)], projection=[ps_suppkey@1, p_brand@3, p_type@4, p_size@5]
+11)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+12)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey], file_type=csv, has_header=false
+13)--------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+14)----------------------FilterExec: p_brand@1 != Brand#45 AND p_type@2 NOT LIKE MEDIUM POLISHED% AND p_size@3 IN (SET) ([49, 14, 23, 45, 19, 3, 36, 9])
+15)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+16)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_type, p_size], file_type=csv, has_header=false
+17)----------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+18)------------------FilterExec: s_comment@1 LIKE %Customer%Complaints%, projection=[s_suppkey@0]
+19)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_comment], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part
index 51a0d096428c0..83294d61a1698 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part
@@ -55,22 +55,16 @@ physical_plan
 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_quantity@1, l_extendedprice@2, p_partkey@3]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-11)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice], file_type=csv, has_header=false
-12)----------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-14)--------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX, projection=[p_partkey@0]
-16)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-17)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], file_type=csv, has_header=false
-18)------------ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey]
-19)--------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)]
-20)----------------CoalesceBatchesExec: target_batch_size=8192
-21)------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-22)--------------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)]
-23)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity], file_type=csv, has_header=false
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1]
+06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_quantity@1, l_extendedprice@2, p_partkey@3]
+07)------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice], file_type=csv, has_header=false
+09)------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+10)--------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX, projection=[p_partkey@0]
+11)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], file_type=csv, has_header=false
+13)----------ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey]
+14)------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)]
+15)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+16)----------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)]
+17)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
index 55da5371671e8..617051d602bd6 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
@@ -69,32 +69,19 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST]
 02)--SortExec: expr=[o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true]
-03)----AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4
-06)----------AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@2, l_orderkey@0)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-17)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-18)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false
-19)----------------------------CoalesceBatchesExec: target_batch_size=8192
-20)------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-21)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false
-22)--------------------CoalesceBatchesExec: target_batch_size=8192
-23)----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-24)------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
-25)----------------CoalesceBatchesExec: target_batch_size=8192
-26)------------------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0]
-27)--------------------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
-28)----------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-30)--------------------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
-31)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
+03)----AggregateExec: mode=SinglePartitioned, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)]
+04)------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@2, l_orderkey@0)]
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6]
+06)----------RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5]
+08)--------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false
+10)--------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+11)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false
+12)----------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+13)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
+14)--------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0]
+15)----------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
+16)------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+17)--------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
+18)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part
index 4960ad1f4a914..72c21e060fa66 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part
@@ -68,16 +68,11 @@ physical_plan
 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND p_container@3 IN ([SM CASE, SM BOX, SM PACK, SM PKG]) AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN ([MED BAG, MED BOX, MED PKG, MED PACK]) AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN ([LG CASE, LG BOX, LG PACK, LG PKG]) AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_extendedprice@2, l_discount@3]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------FilterExec: (l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON, projection=[l_partkey@0, l_quantity@1, l_extendedprice@2, l_discount@3]
-11)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], file_type=csv, has_header=false
-12)------------CoalesceBatchesExec: target_batch_size=8192
-13)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-14)----------------CoalesceBatchesExec: target_batch_size=8192
-15)------------------FilterExec: (p_brand@1 = Brand#12 AND p_container@3 IN ([SM CASE, SM BOX, SM PACK, SM PKG]) AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN ([MED BAG, MED BOX, MED PKG, MED PACK]) AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN ([LG CASE, LG BOX, LG PACK, LG PKG]) AND p_size@2 <= 15) AND p_size@2 >= 1
-16)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-17)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_size, p_container], file_type=csv, has_header=false
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND p_container@3 IN (SET) ([SM CASE, SM BOX, SM PACK, SM PKG]) AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN (SET) ([MED BAG, MED BOX, MED PKG, MED PACK]) AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN (SET) ([LG CASE, LG BOX, LG PACK, LG PKG]) AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_extendedprice@2, l_discount@3]
+06)----------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+07)------------FilterExec: (l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON, projection=[l_partkey@0, l_quantity@1, l_extendedprice@2, l_discount@3]
+08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], file_type=csv, has_header=false
+09)----------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+10)------------FilterExec: (p_brand@1 = Brand#12 AND p_container@3 IN (SET) ([SM CASE, SM BOX, SM PACK, SM PKG]) AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN (SET) ([MED BAG, MED BOX, MED PKG, MED PACK]) AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN (SET) ([LG CASE, LG BOX, LG PACK, LG PKG]) AND p_size@2 <= 15) AND p_size@2 >= 1
+11)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_size, p_container], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
index b2e0fb0cd1cc0..d5ff6724402ad 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
@@ -102,79 +102,47 @@ physical_plan
 01)SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=10
 02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@9, r_regionkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, ps_supplycost@7, n_name@8]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@4, n_nationkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@10, n_regionkey@11]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4
-16)------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@3 as s_name, s_address@4 as s_address, s_nationkey@5 as s_nationkey, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@2 as ps_supplycost]
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@2, s_suppkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_supplycost@3, s_name@5, s_address@6, s_nationkey@7, s_phone@8, s_acctbal@9, s_comment@10]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4
-21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4]
-23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)----------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-26)--------------------------------------------------FilterExec: p_size@3 = 15 AND p_type@2 LIKE %BRASS, projection=[p_partkey@0, p_mfgr@1]
-27)----------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-28)------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_mfgr, p_type, p_size], file_type=csv, has_header=false
-29)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)----------------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-31)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
-32)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-34)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=csv, has_header=false
-36)--------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-38)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-39)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
-40)------------------CoalesceBatchesExec: target_batch_size=8192
-41)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-42)----------------------CoalesceBatchesExec: target_batch_size=8192
-43)------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
-44)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
-46)----------CoalesceBatchesExec: target_batch_size=8192
-47)------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
-48)--------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
-49)----------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-50)------------------CoalesceBatchesExec: target_batch_size=8192
-51)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-52)----------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-53)------------------------CoalesceBatchesExec: target_batch_size=8192
-54)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
-55)----------------------------CoalesceBatchesExec: target_batch_size=8192
-56)------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
-57)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-58)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
-59)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-60)--------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-61)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-62)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
-63)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-64)----------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-65)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
-66)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-67)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-68)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-69)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-70)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-71)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-72)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-73)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
-74)----------------------------CoalesceBatchesExec: target_batch_size=8192
-75)------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-76)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-77)----------------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
-78)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-79)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+04)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
+05)--------RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4
+06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@9, r_regionkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, ps_supplycost@7, n_name@8]
+07)------------RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4
+08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@4, n_nationkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@10, n_regionkey@11]
+09)----------------RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4
+10)------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@3 as s_name, s_address@4 as s_address, s_nationkey@5 as s_nationkey, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@2 as ps_supplycost]
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@2, s_suppkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_supplycost@3, s_name@5, s_address@6, s_nationkey@7, s_phone@8, s_acctbal@9, s_comment@10]
+12)----------------------RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4]
+14)--------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+15)----------------------------FilterExec: p_size@3 = 15 AND p_type@2 LIKE %BRASS, projection=[p_partkey@0, p_mfgr@1]
+16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_mfgr, p_type, p_size], file_type=csv, has_header=false
+18)--------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+19)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
+20)----------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+21)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=csv, has_header=false
+22)----------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+23)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
+24)------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+25)--------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
+26)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+27)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+28)--------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
+29)----------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
+30)------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+31)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+32)----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+33)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
+34)--------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
+35)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
+36)------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+37)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
+38)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+39)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
+40)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+41)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+42)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+43)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
+44)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+45)----------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
+46)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+47)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
index 0b994de411ea3..426a1cbaa4e22 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
@@ -83,44 +83,28 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [s_name@0 ASC NULLS LAST]
 02)--SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[s_suppkey@0, s_name@1, s_address@2]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false
-13)----------------CoalesceBatchesExec: target_batch_size=8192
-14)------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-15)--------------------CoalesceBatchesExec: target_batch_size=8192
-16)----------------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0]
-17)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-18)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-19)--------CoalesceBatchesExec: target_batch_size=8192
-20)----------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-21)------------CoalesceBatchesExec: target_batch_size=8192
-22)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1]
-23)----------------CoalesceBatchesExec: target_batch_size=8192
-24)------------------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4
-25)--------------------CoalesceBatchesExec: target_batch_size=8192
-26)----------------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(ps_partkey@0, p_partkey@0)]
-27)------------------------CoalesceBatchesExec: target_batch_size=8192
-28)--------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-29)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false
-30)------------------------CoalesceBatchesExec: target_batch_size=8192
-31)--------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-32)----------------------------CoalesceBatchesExec: target_batch_size=8192
-33)------------------------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0]
-34)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
-36)----------------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]
-37)------------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
-38)--------------------CoalesceBatchesExec: target_batch_size=8192
-39)----------------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4
-40)------------------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
-41)--------------------------CoalesceBatchesExec: target_batch_size=8192
-42)----------------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2]
-43)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false
+03)----HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2]
+04)------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[s_suppkey@0, s_name@1, s_address@2]
+06)----------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false
+08)----------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+09)------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0]
+10)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+12)------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+13)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1]
+14)----------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4
+15)------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(ps_partkey@0, p_partkey@0)]
+16)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+17)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false
+18)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+19)----------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0]
+20)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+21)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
+22)----------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]
+23)------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
+24)--------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4
+25)----------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
+26)------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2]
+27)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
index e52171524007e..5e9192d677532 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
@@ -94,50 +94,30 @@ physical_plan
 02)--SortExec: expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[s_name@0 as s_name, count(Int64(1))@1 as numwait]
 04)------AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(Int64(1))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(Int64(1))]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@1, n_nationkey@0)], projection=[s_name@0, l_orderkey@2, l_suppkey@3]
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@2, o_orderkey@0)], projection=[s_name@0, s_nationkey@1, l_orderkey@2, l_suppkey@3]
-20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4
-22)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4]
-24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-26)--------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-27)----------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false
-28)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
-30)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-31)----------------------------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
-32)------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
-33)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-35)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0]
-37)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false
-38)------------------------------CoalesceBatchesExec: target_batch_size=8192
-39)--------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-40)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-41)------------------------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0]
-42)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-43)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-44)----------------------CoalesceBatchesExec: target_batch_size=8192
-45)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-46)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false
-47)------------------CoalesceBatchesExec: target_batch_size=8192
-48)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-49)----------------------CoalesceBatchesExec: target_batch_size=8192
-50)------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
-51)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(Int64(1))]
+07)------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0]
+08)--------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0
+09)----------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@1, n_nationkey@0)], projection=[s_name@0, l_orderkey@2, l_suppkey@3]
+11)--------------------RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4
+12)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@2, o_orderkey@0)], projection=[s_name@0, s_nationkey@1, l_orderkey@2, l_suppkey@3]
+13)------------------------RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4]
+15)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+16)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false
+17)----------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
+18)------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
+19)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
+20)------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+21)--------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0]
+22)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false
+23)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+24)----------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0]
+25)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+26)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+27)----------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+28)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false
+29)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+30)----------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
+31)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
index 22476156b80d8..add578c3b079d 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
@@ -78,29 +78,23 @@ physical_plan
 02)--SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[cntrycode@0 as cntrycode, count(Int64(1))@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal]
 04)------AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(Int64(1)), sum(custsale.c_acctbal)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(Int64(1)), sum(custsale.c_acctbal)]
-08)--------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > avg(customer.c_acctbal)@0, projection=[c_phone@0, c_acctbal@1, avg(customer.c_acctbal)@3]
-11)--------------------ProjectionExec: expr=[c_phone@0 as c_phone, c_acctbal@1 as c_acctbal, CAST(c_acctbal@1 AS Decimal128(19, 6)) as join_proj_push_down_1]
-12)----------------------CoalescePartitionsExec
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------FilterExec: substr(c_phone@1, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17])
-19)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-20)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
-21)----------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
-23)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
-24)--------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
-25)----------------------CoalescePartitionsExec
-26)------------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
-27)--------------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
-29)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-30)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(Int64(1)), sum(custsale.c_acctbal)]
+07)------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > avg(customer.c_acctbal)@0, projection=[c_phone@0, c_acctbal@1, avg(customer.c_acctbal)@3]
+10)------------------ProjectionExec: expr=[c_phone@0 as c_phone, c_acctbal@1 as c_acctbal, CAST(c_acctbal@1 AS Decimal128(19, 6)) as join_proj_push_down_1]
+11)--------------------CoalescePartitionsExec
+12)----------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
+13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+14)--------------------------FilterExec: substr(c_phone@1, 1, 2) IN (SET) ([13, 31, 23, 29, 30, 18, 17])
+15)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+16)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
+17)------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
+18)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
+19)------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
+20)--------------------CoalescePartitionsExec
+21)----------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
+22)------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN (SET) ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
+23)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+24)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part
index d982ec32e9547..7fec4e5f5d624 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part
@@ -61,29 +61,17 @@ physical_plan
 01)SortPreservingMergeExec: [revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], fetch=10
 02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority]
-04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------FilterExec: c_mktsegment@1 = BUILDING, projection=[c_custkey@0]
-18)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-19)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_mktsegment], file_type=csv, has_header=false
-20)--------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-22)------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------FilterExec: o_orderdate@2 < 1995-03-15
-24)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], file_type=csv, has_header=false
-25)------------------CoalesceBatchesExec: target_batch_size=8192
-26)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-27)----------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------FilterExec: l_shipdate@3 > 1995-03-15, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
-29)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+04)------AggregateExec: mode=SinglePartitioned, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5]
+06)----------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4]
+08)--------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+09)----------------FilterExec: c_mktsegment@1 = BUILDING, projection=[c_custkey@0]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_mktsegment], file_type=csv, has_header=false
+12)--------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+13)----------------FilterExec: o_orderdate@2 < 1995-03-15
+14)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], file_type=csv, has_header=false
+15)----------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+16)------------FilterExec: l_shipdate@3 > 1995-03-15, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
+17)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part
index f7de3cd3c967c..0007666f15365 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part
@@ -57,18 +57,12 @@ physical_plan
 02)--SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(Int64(1))@1 as order_count]
 04)------AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(Int64(1))]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(Int64(1))]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------FilterExec: o_orderdate@1 >= 1993-07-01 AND o_orderdate@1 < 1993-10-01, projection=[o_orderkey@0, o_orderpriority@2]
-14)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], file_type=csv, has_header=false
-15)------------------CoalesceBatchesExec: target_batch_size=8192
-16)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-17)----------------------CoalesceBatchesExec: target_batch_size=8192
-18)------------------------FilterExec: l_receiptdate@2 > l_commitdate@1, projection=[l_orderkey@0]
-19)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(Int64(1))]
+07)------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1]
+08)--------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+09)----------------FilterExec: o_orderdate@1 >= 1993-07-01 AND o_orderdate@1 < 1993-10-01, projection=[o_orderkey@0, o_orderpriority@2]
+10)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], file_type=csv, has_header=false
+11)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+12)----------------FilterExec: l_receiptdate@2 > l_commitdate@1, projection=[l_orderkey@0]
+13)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
index 15636056b8714..d854001f3cc4c 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
@@ -71,50 +71,29 @@ physical_plan
 02)--SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue]
 04)------AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, n_name@2]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, n_name@4, n_regionkey@5]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@1, s_suppkey@0), (c_nationkey@0, s_nationkey@1)], projection=[l_extendedprice@2, l_discount@3, s_nationkey@5]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4
-20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5]
-22)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4
-24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2]
-26)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-28)------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-29)--------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-30)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-31)----------------------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-32)------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------------------------------------------FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01, projection=[o_orderkey@0, o_custkey@1]
-34)----------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-35)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-37)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
-38)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-39)------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4
-40)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-41)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-42)--------------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
-46)------------------CoalesceBatchesExec: target_batch_size=8192
-47)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-48)----------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------FilterExec: r_name@1 = ASIA, projection=[r_regionkey@0]
-50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, n_name@2]
+08)--------------RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, n_name@4, n_regionkey@5]
+10)------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@1, s_suppkey@0), (c_nationkey@0, s_nationkey@1)], projection=[l_extendedprice@2, l_discount@3, s_nationkey@5]
+12)----------------------RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5]
+14)--------------------------RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4
+15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2]
+16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+18)------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+19)--------------------------------FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01, projection=[o_orderkey@0, o_custkey@1]
+20)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+21)--------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+22)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
+23)----------------------RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=1
+24)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+25)------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+26)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
+27)--------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+28)----------------FilterExec: r_name@1 = ASIA, projection=[r_regionkey@0]
+29)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+30)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part
index b1e5d2869a8c5..eb9063d691712 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part
@@ -38,6 +38,5 @@ physical_plan
 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(5),15,2 AND l_discount@2 <= Some(7),15,2 AND l_quantity@0 < Some(2400),15,2, projection=[l_extendedprice@1, l_discount@2]
-07)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+05)--------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(5),15,2 AND l_discount@2 <= Some(7),15,2 AND l_quantity@0 < Some(2400),15,2, projection=[l_extendedprice@1, l_discount@2]
+06)----------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
index 291d56e43f2df..b4e70993396e6 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
@@ -88,53 +88,32 @@ physical_plan
 02)--SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue]
 04)------AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)]
-08)--------------ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, date_part(YEAR, l_shipdate@2) as l_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], filter=n_name@0 = FRANCE AND n_name@1 = GERMANY OR n_name@0 = GERMANY AND n_name@1 = FRANCE, projection=[l_extendedprice@0, l_discount@1, l_shipdate@2, n_name@4, n_name@6]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@0, n_nationkey@0)], projection=[l_extendedprice@1, l_discount@2, l_shipdate@3, c_nationkey@4, n_name@6]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_custkey@4, c_custkey@0)], projection=[s_nationkey@0, l_extendedprice@1, l_discount@2, l_shipdate@3, c_nationkey@6]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4
-21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6]
-23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)----------------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4
-25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-26)--------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6]
-27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-29)--------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-30)----------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-31)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)------------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
-33)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------------------------------------------------FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31
-35)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
-36)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-38)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey], file_type=csv, has_header=false
-39)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-40)--------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-41)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-42)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-43)----------------------------CoalesceBatchesExec: target_batch_size=8192
-44)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-45)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-46)----------------------------------FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY
-47)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-48)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-49)--------------------CoalesceBatchesExec: target_batch_size=8192
-50)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-51)------------------------CoalesceBatchesExec: target_batch_size=8192
-52)--------------------------FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE
-53)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-54)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)]
+07)------------ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, date_part(YEAR, l_shipdate@2) as l_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume]
+08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], filter=n_name@0 = FRANCE AND n_name@1 = GERMANY OR n_name@0 = GERMANY AND n_name@1 = FRANCE, projection=[l_extendedprice@0, l_discount@1, l_shipdate@2, n_name@4, n_name@6]
+09)----------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@0, n_nationkey@0)], projection=[l_extendedprice@1, l_discount@2, l_shipdate@3, c_nationkey@4, n_name@6]
+11)--------------------RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4
+12)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_custkey@4, c_custkey@0)], projection=[s_nationkey@0, l_extendedprice@1, l_discount@2, l_shipdate@3, c_nationkey@6]
+13)------------------------RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6]
+15)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4
+16)------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6]
+17)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+18)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+19)--------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
+20)----------------------------------FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31
+21)------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+22)----------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+23)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey], file_type=csv, has_header=false
+24)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+25)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+26)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+27)----------------------FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY
+28)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+29)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+30)----------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+31)------------------FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE
+32)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+33)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
index 50171c528db6d..12f19d43d40e7 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
@@ -94,69 +94,40 @@ physical_plan
 02)--SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[o_year@0 as o_year, CAST(CAST(sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 AS Decimal128(12, 2)) / CAST(sum(all_nations.volume)@2 AS Decimal128(12, 2)) AS Decimal128(15, 2)) as mkt_share]
 04)------AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]
-08)--------------ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@2, n_name@4]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@3, n_regionkey@4, n_name@6]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@4, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, s_nationkey@2, o_orderdate@3, n_regionkey@6]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4
-21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_custkey@3, c_custkey@0)], projection=[l_extendedprice@0, l_discount@1, s_nationkey@2, o_orderdate@4, c_nationkey@6]
-23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)----------------------------------------------RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4
-25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-26)--------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_extendedprice@1, l_discount@2, s_nationkey@3, o_custkey@5, o_orderdate@6]
-27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-29)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)----------------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@1, s_suppkey@0)], projection=[l_orderkey@0, l_extendedprice@2, l_discount@3, s_nationkey@5]
-31)------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)--------------------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
-33)----------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)------------------------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5]
-35)--------------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)----------------------------------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-37)------------------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-38)--------------------------------------------------------------------------FilterExec: p_type@1 = ECONOMY ANODIZED STEEL, projection=[p_partkey@0]
-39)----------------------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-40)------------------------------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
-41)--------------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-42)----------------------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
-43)------------------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
-44)------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-45)--------------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-46)----------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-47)------------------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-48)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-50)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-51)----------------------------------------------------------FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31
-52)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-53)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-54)----------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-55)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-56)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-57)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-58)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-59)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-60)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
-61)----------------------------CoalesceBatchesExec: target_batch_size=8192
-62)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-63)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-64)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-65)--------------------CoalesceBatchesExec: target_batch_size=8192
-66)----------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-67)------------------------CoalesceBatchesExec: target_batch_size=8192
-68)--------------------------FilterExec: r_name@1 = AMERICA, projection=[r_regionkey@0]
-69)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-70)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]
+07)------------ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation]
+08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@2, n_name@4]
+09)----------------RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@3, n_regionkey@4, n_name@6]
+11)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+12)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@4, n_nationkey@0)], projection=[l_extendedprice@0, l_discount@1, s_nationkey@2, o_orderdate@3, n_regionkey@6]
+13)------------------------RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_custkey@3, c_custkey@0)], projection=[l_extendedprice@0, l_discount@1, s_nationkey@2, o_orderdate@4, c_nationkey@6]
+15)----------------------------RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4
+16)------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_extendedprice@1, l_discount@2, s_nationkey@3, o_custkey@5, o_orderdate@6]
+17)--------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@1, s_suppkey@0)], projection=[l_orderkey@0, l_extendedprice@2, l_discount@3, s_nationkey@5]
+19)------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
+20)--------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5]
+21)----------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+22)------------------------------------------FilterExec: p_type@1 = ECONOMY ANODIZED STEEL, projection=[p_partkey@0]
+23)--------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+24)----------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
+25)----------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
+26)------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
+27)------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+28)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+29)--------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+30)----------------------------------FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31
+31)------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+32)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+33)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+34)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+35)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
+36)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+37)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+38)----------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+39)------------------FilterExec: r_name@1 = AMERICA, projection=[r_regionkey@0]
+40)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+41)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
index 3b31c1bc2e8e3..4ec434c90368f 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
@@ -79,48 +79,29 @@ physical_plan
 02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit]
 04)------AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
-08)--------------ProjectionExec: expr=[n_name@5 as nation, date_part(YEAR, o_orderdate@4) as o_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) - ps_supplycost@3 * l_quantity@0 as amount]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[l_quantity@0, l_extendedprice@1, l_discount@2, ps_supplycost@4, o_orderdate@5, n_name@7]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_quantity@1, l_extendedprice@2, l_discount@3, s_nationkey@4, ps_supplycost@5, o_orderdate@7]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, ps_suppkey@1), (l_partkey@1, ps_partkey@0)], projection=[l_orderkey@0, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@9]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4
-21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, s_suppkey@0)], projection=[l_orderkey@0, l_partkey@1, l_suppkey@2, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@7]
-23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)----------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4
-25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-26)--------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6]
-27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-29)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)----------------------------------------------------------FilterExec: p_name@1 LIKE %green%, projection=[p_partkey@0]
-31)------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-32)--------------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
-33)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
-35)--------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], file_type=csv, has_header=false
-36)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-38)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-39)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-40)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-41)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
-42)----------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
-43)----------------------------CoalesceBatchesExec: target_batch_size=8192
-44)------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-45)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], file_type=csv, has_header=false
-46)--------------------CoalesceBatchesExec: target_batch_size=8192
-47)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-48)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-49)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+05)--------RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
+07)------------ProjectionExec: expr=[n_name@5 as nation, date_part(YEAR, o_orderdate@4) as o_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) - ps_supplycost@3 * l_quantity@0 as amount]
+08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[l_quantity@0, l_extendedprice@1, l_discount@2, ps_supplycost@4, o_orderdate@5, n_name@7]
+09)----------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_quantity@1, l_extendedprice@2, l_discount@3, s_nationkey@4, ps_supplycost@5, o_orderdate@7]
+11)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+12)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, ps_suppkey@1), (l_partkey@1, ps_partkey@0)], projection=[l_orderkey@0, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@9]
+13)------------------------RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, s_suppkey@0)], projection=[l_orderkey@0, l_partkey@1, l_suppkey@2, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@7]
+15)----------------------------RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4
+16)------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6]
+17)--------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+18)----------------------------------FilterExec: p_name@1 LIKE %green%, projection=[p_partkey@0]
+19)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
+21)--------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
+22)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], file_type=csv, has_header=false
+23)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+24)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+25)------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
+26)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
+27)--------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+28)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], file_type=csv, has_header=false
+29)----------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+30)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 0c8b8c6edb1fc..b79b6d2fe5e9e 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -234,15 +234,13 @@ logical_plan
 06)------TableScan: t2 projection=[name]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
-02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=3
-05)--------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-06)----------UnionExec
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)------------ProjectionExec: expr=[name@0 || _new as name]
-10)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=3
+03)----AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+04)------UnionExec
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)--------DataSourceExec: partitions=1, partition_sizes=[1]
+07)--------ProjectionExec: expr=[name@0 || _new as name]
+08)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # nested_union_all
 query T rowsort
@@ -307,31 +305,27 @@ logical_plan
 10)------TableScan: t1 projection=[id, name]
 physical_plan
 01)UnionExec
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(id@0, CAST(t2.id AS Int32)@2), (name@1, name@1)], NullsEqual: true
-04)------CoalescePartitionsExec
-05)--------AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------RepartitionExec: partitioning=Hash([id@0, name@1], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[id@0 as id, name@1 as name], aggr=[]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
-11)------ProjectionExec: expr=[id@0 as id, name@1 as name, CAST(id@0 AS Int32) as CAST(t2.id AS Int32)]
-12)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)----------DataSourceExec: partitions=1, partition_sizes=[1]
-14)--ProjectionExec: expr=[CAST(id@0 AS Int32) as id, name@1 as name]
-15)----CoalesceBatchesExec: target_batch_size=2
-16)------HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(CAST(t2.id AS Int32)@2, id@0), (name@1, name@1)], projection=[id@0, name@1], NullsEqual: true
-17)--------CoalescePartitionsExec
-18)----------ProjectionExec: expr=[id@0 as id, name@1 as name, CAST(id@0 AS Int32) as CAST(t2.id AS Int32)]
-19)------------AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
-20)--------------CoalesceBatchesExec: target_batch_size=2
-21)----------------RepartitionExec: partitioning=Hash([id@0, name@1], 4), input_partitions=4
-22)------------------AggregateExec: mode=Partial, gby=[id@0 as id, name@1 as name], aggr=[]
-23)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-24)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
-25)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-26)----------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(CAST(t2.id AS Int32)@2, id@0), (name@1, name@1)], NullsEqual: true
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[id@0 as id, name@1 as name, CAST(id@0 AS Int32) as CAST(t2.id AS Int32)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
+08)------RepartitionExec: partitioning=Hash([id@0, name@1], 4), input_partitions=4
+09)--------AggregateExec: mode=Partial, gby=[id@0 as id, name@1 as name], aggr=[]
+10)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+12)--ProjectionExec: expr=[CAST(id@0 AS Int32) as id, name@1 as name]
+13)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(CAST(t2.id AS Int32)@2, id@0), (name@1, name@1)], projection=[id@0, name@1], NullsEqual: true
+14)------CoalescePartitionsExec
+15)--------ProjectionExec: expr=[id@0 as id, name@1 as name, CAST(id@0 AS Int32) as CAST(t2.id AS Int32)]
+16)----------AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
+17)------------RepartitionExec: partitioning=Hash([id@0, name@1], 4), input_partitions=4
+18)--------------AggregateExec: mode=Partial, gby=[id@0 as id, name@1 as name], aggr=[]
+19)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+21)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+22)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query IT rowsort
@@ -377,28 +371,24 @@ logical_plan
 09)----TableScan: t1 projection=[name]
 physical_plan
 01)UnionExec
-02)--CoalesceBatchesExec: target_batch_size=2
-03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
-04)------CoalescePartitionsExec
-05)--------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
-11)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-12)--------DataSourceExec: partitions=1, partition_sizes=[1]
-13)--CoalesceBatchesExec: target_batch_size=2
-14)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
-15)------CoalescePartitionsExec
-16)--------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
-17)----------CoalesceBatchesExec: target_batch_size=2
-18)------------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-19)--------------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-20)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)------------------DataSourceExec: partitions=1, partition_sizes=[1]
-22)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-23)--------DataSourceExec: partitions=1, partition_sizes=[1]
+02)--HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
+05)--------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+09)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+10)------DataSourceExec: partitions=1, partition_sizes=[1]
+11)--HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
+12)----CoalescePartitionsExec
+13)------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
+14)--------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
+15)----------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+16)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+18)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+19)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # union_upcast_types
 query TT
@@ -453,17 +443,15 @@ physical_plan
 02)--AggregateExec: mode=SinglePartitioned, gby=[name@0 as name], aggr=[count(Int64(1))]
 03)----InterleaveExec
 04)------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
-10)------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
-11)--------CoalesceBatchesExec: target_batch_size=2
-12)----------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-13)------------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-14)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+09)------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
+10)--------RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
+11)----------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Union with limit push down 3 children test case
 query TT
@@ -511,19 +499,17 @@ physical_plan
 06)----------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 07)------------ProjectionExec: expr=[]
 08)--------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[]
-09)----------------CoalesceBatchesExec: target_batch_size=2
-10)------------------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
-11)--------------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
-12)----------------------CoalesceBatchesExec: target_batch_size=2
-13)------------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434, projection=[c1@0]
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], file_type=csv, has_header=true
-16)----ProjectionExec: expr=[1 as cnt]
-17)------PlaceholderRowExec
-18)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
-19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-20)--------ProjectionExec: expr=[1 as c1]
-21)----------PlaceholderRowExec
+09)----------------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
+10)------------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
+11)--------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434, projection=[c1@0]
+12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], file_type=csv, has_header=true
+14)----ProjectionExec: expr=[1 as cnt]
+15)------PlaceholderRowExec
+16)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
+17)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+18)--------ProjectionExec: expr=[1 as c1]
+19)----------PlaceholderRowExec
 
 
 ########
@@ -603,8 +589,7 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--UnionExec
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
-04)----ProjectionExec: expr=[c1a@0 as c1]
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a], output_ordering=[c1a@0 ASC NULLS LAST], file_type=csv, has_header=true
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a@0 as c1], file_type=csv, has_header=true
 
 statement ok
 drop table t1
@@ -835,14 +820,12 @@ logical_plan
 physical_plan
 01)CoalescePartitionsExec
 02)--UnionExec
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------FilterExec: c1@0 = a
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], file_type=csv, has_header=true
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------FilterExec: c1@0 = a
-09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], file_type=csv, has_header=true
+03)----FilterExec: c1@0 = a
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], file_type=csv, has_header=true
+06)----FilterExec: c1@0 = a
+07)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], file_type=csv, has_header=true
 
 # Clean up after the test
 statement ok
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 50121813133bf..352056adbf813 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -511,7 +511,7 @@ x y [30, 40, 50]
 query error DataFusion error: type_coercion\ncaused by\nThis feature is not implemented: Unnest should be rewritten to LogicalPlan::Unnest before type coercion
 select sum(unnest(generate_series(1,10)));
 
-query error DataFusion error: Internal error: unnest on struct can only be applied at the root level of select expression
+query error DataFusion error: Internal error: Assertion failed: struct_allowed: unnest on struct can only be applied at the root level of select expression
 select arrow_typeof(unnest(column5)) from unnest_table;
 
 query T
@@ -799,7 +799,7 @@ query error DataFusion error: Error during planning: Column in SELECT must be in
 select unnest(column1) c1 from nested_unnest_table group by c1.c0;
 
 # TODO: this query should work. see issue: https://github.com/apache/datafusion/issues/12794
-query error DataFusion error: Internal error: unnest on struct can only be applied at the root level of select expression
+query error DataFusion error: Internal error: Assertion failed: struct_allowed: unnest on struct can only be applied at the root level of select expression
 select unnest(column1) c1 from nested_unnest_table
 
 query II??I??
@@ -962,15 +962,14 @@ physical_plan
 01)ProjectionExec: expr=[array_agg(unnested.ar)@1 as array_agg(unnested.ar)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
 03)----SortExec: expr=[generated_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([generated_id@0], 4), input_partitions=4
-06)----------AggregateExec: mode=Partial, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
-07)------------ProjectionExec: expr=[generated_id@0 as generated_id, __unnest_placeholder(make_array(range().value),depth=1)@1 as ar]
-08)--------------UnnestExec
-09)----------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-12)----------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
+04)------RepartitionExec: partitioning=Hash([generated_id@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
+06)----------ProjectionExec: expr=[generated_id@0 as generated_id, __unnest_placeholder(make_array(range().value),depth=1)@1 as ar]
+07)------------UnnestExec
+08)--------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
+10)------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+11)--------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
 
 # Unnest array where data is already ordered by column2 (100, 200, 300, 400)
 statement ok
@@ -1021,8 +1020,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as UNNEST(t.column1), column2@1 as column2]
 02)--UnnestExec
-03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # Explain should have a SortExec at the top because we order by the output of the unnest (i.e. discarding the ordering)
 query TT
@@ -1038,8 +1036,7 @@ physical_plan
 01)SortExec: expr=[unnested@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as unnested, column2@1 as column2]
 03)----UnnestExec
-04)------ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
@@ -1084,8 +1081,7 @@ logical_plan
 physical_plan
 01)SortExec: expr=[__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--UnnestExec
-03)----ProjectionExec: expr=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[column1, column2, column3], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))], file_type=parquet
 
 # cleanup
 statement ok
@@ -1136,8 +1132,7 @@ logical_plan
 04)------TableScan: t projection=[column1, column2]
 physical_plan
 01)UnnestExec
-02)--ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
@@ -1207,8 +1202,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=2)@0 as UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1)@1 as UNNEST(t.column2), __unnest_placeholder(t.column3).s1@2 as __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2@3 as __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3@4 as __unnest_placeholder(t.column3).s3, column4@5 as column4]
 02)--UnnestExec
-03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4@3 as column4]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1, column2, column3, column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index c2fabb5e6eff4..8ac8724683a8a 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -275,18 +275,17 @@ physical_plan
 02)--SortExec: expr=[b@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a]
 04)------AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[max(d.a)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[max(d.a)], ordering_mode=Sorted
-08)--------------UnionExec
-09)----------------ProjectionExec: expr=[1 as a, aa as b]
-10)------------------PlaceholderRowExec
-11)----------------ProjectionExec: expr=[3 as a, aa as b]
-12)------------------PlaceholderRowExec
-13)----------------ProjectionExec: expr=[5 as a, bb as b]
-14)------------------PlaceholderRowExec
-15)----------------ProjectionExec: expr=[7 as a, bb as b]
-16)------------------PlaceholderRowExec
+05)--------RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[max(d.a)], ordering_mode=Sorted
+07)------------UnionExec
+08)--------------ProjectionExec: expr=[1 as a, aa as b]
+09)----------------PlaceholderRowExec
+10)--------------ProjectionExec: expr=[3 as a, aa as b]
+11)----------------PlaceholderRowExec
+12)--------------ProjectionExec: expr=[5 as a, bb as b]
+13)----------------PlaceholderRowExec
+14)--------------ProjectionExec: expr=[7 as a, bb as b]
+15)----------------PlaceholderRowExec
 
 # Check actual result:
 query TI
@@ -362,17 +361,16 @@ physical_plan
 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
 05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
-09)----------------UnionExec
-10)------------------ProjectionExec: expr=[1 as a, aa as b]
-11)--------------------PlaceholderRowExec
-12)------------------ProjectionExec: expr=[3 as a, aa as b]
-13)--------------------PlaceholderRowExec
-14)------------------ProjectionExec: expr=[5 as a, bb as b]
-15)--------------------PlaceholderRowExec
-16)------------------ProjectionExec: expr=[7 as a, bb as b]
-17)--------------------PlaceholderRowExec
+07)------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
+08)--------------UnionExec
+09)----------------ProjectionExec: expr=[1 as a, aa as b]
+10)------------------PlaceholderRowExec
+11)----------------ProjectionExec: expr=[3 as a, aa as b]
+12)------------------PlaceholderRowExec
+13)----------------ProjectionExec: expr=[5 as a, bb as b]
+14)------------------PlaceholderRowExec
+15)----------------ProjectionExec: expr=[7 as a, bb as b]
+16)------------------PlaceholderRowExec
 
 
 # check actual result
@@ -1313,15 +1311,12 @@ physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
 02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=4096
-05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-09)----------------CoalesceBatchesExec: target_batch_size=4096
-10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
-11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c4], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+05)--------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+08)--------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c4], file_type=csv, has_header=true
 
 
 # test_window_agg_sort_reversed_plan
@@ -1883,13 +1878,11 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[]
-07)------------CoalesceBatchesExec: target_batch_size=4096
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
-10)------------------CoalesceBatchesExec: target_batch_size=4096
-11)--------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434, projection=[c1@0]
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+08)--------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
+09)----------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434, projection=[c1@0]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], file_type=csv, has_header=true
 
 
 query I
@@ -1925,16 +1918,14 @@ logical_plan
 06)----------TableScan: aggregate_test_100 projection=[c2, c3, c9]
 physical_plan
 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
-02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
+02)--SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=1, maintains_sort_order=true
+06)----------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+08)--------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
 
 
 
@@ -1943,7 +1934,7 @@ SELECT c3,
     SUM(c9) OVER(ORDER BY c3 DESC, c9 DESC, c2 ASC) as sum1,
     SUM(c9) OVER(PARTITION BY c3 ORDER BY c9 DESC ) as sum2
     FROM aggregate_test_100
-    ORDER BY c3
+    ORDER BY c3, c9 DESC
     LIMIT 5
 ----
 -117 219796664156 3023531799
@@ -1970,10 +1961,8 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
 03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
 
 query TI
 SELECT c1, ROW_NUMBER() OVER (PARTITION BY c1) as rn1 FROM aggregate_test_100 ORDER BY c1 ASC
@@ -2099,10 +2088,8 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
 03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.repartition_sorts = true;
@@ -2128,10 +2115,8 @@ physical_plan
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true]
-08)--------------CoalesceBatchesExec: target_batch_size=4096
-09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
 # test_window_agg_with_global_limit
 statement ok
@@ -2150,7 +2135,7 @@ physical_plan
 02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
 06)----------SortExec: TopK(fetch=1), expr=[c13@0 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c13], file_type=csv, has_header=true
 
@@ -2273,8 +2258,7 @@ physical_plan
 07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
-10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
+10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9, c1@0 as c1_alias], file_type=csv, has_header=true
 
 query IIIII
 SELECT c9,
@@ -2689,8 +2673,7 @@ physical_plan
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
 06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING": Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
-08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts, inc_col, desc_col], output_ordering=[ts@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIII
 SELECT
@@ -2845,8 +2828,7 @@ physical_plan
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
 04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
-06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts, inc_col], output_ordering=[ts@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIRR
 SELECT
@@ -3163,8 +3145,7 @@ physical_plan
 11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+14)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[CAST(c@3 AS Int64) as __common_expr_1, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIII
 SELECT a, b, c,
@@ -3224,10 +3205,9 @@ logical_plan
 06)----------TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
-02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5
-03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50
-04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
+02)--FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50, fetch=5
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # Top level sort is pushed down through BoundedWindowAggExec as its SUM result does already satisfy the required
 # global order. The existing sort is for the second-term lexicographical ordering requirement, which is being
@@ -3365,21 +3345,17 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
 02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
-03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
-05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-07)------------CoalesceBatchesExec: target_batch_size=4096
-08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
-10)------------------CoalesceBatchesExec: target_batch_size=4096
-11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-13)------------------------CoalesceBatchesExec: target_batch_size=4096
-14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)--------------------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
+03)----RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
+04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+08)--------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+10)------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
+11)--------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
+12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+13)------------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
 # reset the partition number 1 again
 statement ok
@@ -3530,9 +3506,8 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
 01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-02)--CoalesceBatchesExec: target_batch_size=4096
-03)----FilterExec: b@2 = 0
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
+02)--FilterExec: b@2 = 0
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
 
 # Since column b is constant after filter b=0,
 # window requirement b ASC, d ASC can be satisfied
@@ -3549,9 +3524,8 @@ logical_plan
 physical_plan
 01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--SortExec: expr=[d@4 ASC NULLS LAST], preserve_partitioning=[false]
-03)----CoalesceBatchesExec: target_batch_size=4096
-04)------FilterExec: b@2 = 0
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
+03)----FilterExec: b@2 = 0
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
 
 
 # Create an unbounded source where there is multiple orderings.
@@ -3604,9 +3578,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
 02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-03)----CoalesceBatchesExec: target_batch_size=4096
-04)------FilterExec: d@1 = 0
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
+03)----FilterExec: d@1 = 0
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 explain SELECT SUM(d) OVER(PARTITION BY c ORDER BY a ASC)
@@ -3725,10 +3698,8 @@ physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d]
 03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
-04)------CoalesceBatchesExec: target_batch_size=4096
-05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]]
+04)------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=1, maintains_sort_order=true
+05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]]
 
 # CTAS with NTILE function
 statement ok
@@ -4220,12 +4191,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=4096
-07)------------FilterExec: a@0 = 1
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+04)------FilterExec: a@0 = 1
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query I
 select ROW_NUMBER() over (partition by a) from (select * from a where a = 1);
@@ -4243,12 +4211,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
 02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=4096
-07)------------FilterExec: a@0 = 1
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+04)------FilterExec: a@0 = 1
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # LAG window function IGNORE/RESPECT NULLS support with ascending order and default offset 1
 query TTTTTT
@@ -5313,12 +5278,10 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=1
-06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------FilterExec: c1@0 = 2 OR c1@0 = 3
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+06)----------FilterExec: c1@0 = 2 OR c1@0 = 3
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5353,14 +5316,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----CoalesceBatchesExec: target_batch_size=1
-04)------FilterExec: c2@1 >= 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----FilterExec: c2@1 >= 10
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5395,16 +5355,13 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----CoalesceBatchesExec: target_batch_size=1
-04)------FilterExec: c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-06)----------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------CoalesceBatchesExec: target_batch_size=1
-10)------------------FilterExec: c1@0 = 1
-11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----FilterExec: c2@1 = 10
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+07)------------FilterExec: c1@0 = 1
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5436,14 +5393,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----CoalesceBatchesExec: target_batch_size=1
-04)------FilterExec: c1@0 = 1 OR c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----FilterExec: c1@0 = 1 OR c2@1 = 10
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5483,16 +5437,13 @@ physical_plan
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
 04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------CoalesceBatchesExec: target_batch_size=1
-07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
-08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-10)------------------CoalesceBatchesExec: target_batch_size=1
-11)--------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-12)----------------------CoalesceBatchesExec: target_batch_size=1
-13)------------------------FilterExec: c1@0 > 1
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)----------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
+07)------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+10)------------------FilterExec: c1@0 > 1
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+12)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IIII
 select c1, c2, rank1, rank2
@@ -5534,16 +5485,12 @@ physical_plan
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
 04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------CoalesceBatchesExec: target_batch_size=1
-07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
-08)--------------CoalesceBatchesExec: target_batch_size=1
-09)----------------FilterExec: c2@1 > 1
-10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-11)--------------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
-12)----------------------CoalesceBatchesExec: target_batch_size=1
-13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)----------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
+07)------------FilterExec: c2@1 > 1
+08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+10)------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IIII
 select c1, c2, rank1, rank2
@@ -5600,10 +5547,8 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as sum_c9]
 03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------CoalesceBatchesExec: target_batch_size=1
-05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1, maintains_sort_order=true
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT SUM(c9) OVER() as sum_c9 FROM aggregate_test_100_ordered ORDER BY sum_c9;
@@ -5631,10 +5576,8 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as min_c5]
 03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------CoalesceBatchesExec: target_batch_size=1
-05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1, maintains_sort_order=true
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT MAX(c5) OVER() as max_c5 FROM aggregate_test_100_ordered ORDER BY max_c5;
@@ -5831,9 +5774,8 @@ physical_plan
 02)--ProjectionExec: expr=[k@0 as k, time@2 as time, count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as normal_count, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as distinct_count]
 03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@0 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=1
-06)----------RepartitionExec: partitioning=Hash([k@0], 2), input_partitions=2
-07)------------DataSourceExec: partitions=2, partition_sizes=[5, 4]
+05)--------RepartitionExec: partitioning=Hash([k@0], 2), input_partitions=2
+06)----------DataSourceExec: partitions=2, partition_sizes=[5, 4]
 
 
 # Add testing for distinct sum
@@ -5894,10 +5836,9 @@ physical_plan
 02)--ProjectionExec: expr=[k@1 as k, time@2 as time, sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as sum_v, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as sum_distinct_v]
 03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@1 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=1
-06)----------RepartitionExec: partitioning=Hash([k@1], 2), input_partitions=2
-07)------------ProjectionExec: expr=[CAST(v@1 AS Int64) as __common_expr_1, k@0 as k, time@2 as time]
-08)--------------DataSourceExec: partitions=2, partition_sizes=[5, 4]
+05)--------RepartitionExec: partitioning=Hash([k@1], 2), input_partitions=2
+06)----------ProjectionExec: expr=[CAST(v@1 AS Int64) as __common_expr_1, k@0 as k, time@2 as time]
+07)------------DataSourceExec: partitions=2, partition_sizes=[5, 4]
 
 
 # FILTER clause with window functions
@@ -5937,7 +5878,7 @@ LIMIT 5
 ----
 DataFusion error: type_coercion
 caused by
-Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(nullable Null)
+Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(Null)
 
 
 
@@ -5965,12 +5906,10 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5
 05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2]
-07)------------ProjectionExec: expr=[c2@1 >= 2 as __common_expr_3, c1@0 as c1, c2@1 as c2]
-08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c1, c2], file_type=csv, has_header=false
+06)----------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c2@1 >= 2 as __common_expr_1, c2@1 >= 2 AND c2@1 < 4 AND c1@0 > 0 as __common_expr_2, c1, c2], file_type=csv, has_header=false
 
 
 # FILTER filters out some rows
diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt
index 883cd4404f4f3..3a10cbb96aac2 100644
--- a/datafusion/sqllogictest/test_files/window_limits.slt
+++ b/datafusion/sqllogictest/test_files/window_limits.slt
@@ -543,10 +543,8 @@ physical_plan
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
 03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
 # should handle partition by optimized
 statement ok
@@ -589,10 +587,8 @@ physical_plan
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
 03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
 # unbounded following
 statement ok
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 0d7e34881c9cb..8bfec86497ef0 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -46,12 +46,12 @@ prost = { workspace = true }
 substrait = { version = "0.62", features = ["serde"] }
 url = { workspace = true }
 tokio = { workspace = true, features = ["fs"] }
-uuid = { version = "1.17.0", features = ["v4"] }
+uuid = { version = "1.19.0", features = ["v4"] }
 
 [dev-dependencies]
 datafusion = { workspace = true, features = ["nested_expressions", "unicode_expressions"] }
 datafusion-functions-aggregate = { workspace = true }
-serde_json = "1.0"
+serde_json = { workspace = true }
 tokio = { workspace = true }
 insta = { workspace = true }
 
diff --git a/datafusion/substrait/src/extensions.rs b/datafusion/substrait/src/extensions.rs
index 0792928982268..0f848270babb9 100644
--- a/datafusion/substrait/src/extensions.rs
+++ b/datafusion/substrait/src/extensions.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::common::{plan_err, DataFusionError, HashMap};
+use datafusion::common::{DataFusionError, HashMap, plan_err};
+use substrait::proto::extensions::SimpleExtensionDeclaration;
 use substrait::proto::extensions::simple_extension_declaration::{
     ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
 };
-use substrait::proto::extensions::SimpleExtensionDeclaration;
 
 /// Substrait uses [SimpleExtensions](https://substrait.io/extensions/#simple-extensions) to define
 /// behavior of plans in addition to what's supported directly by the protobuf definitions.
@@ -38,7 +38,7 @@ impl Extensions {
     /// Registers a function and returns the anchor (reference) to it. If the function has already
     /// been registered, it returns the existing anchor.
     /// Function names are case-insensitive (converted to lowercase).
-    pub fn register_function(&mut self, function_name: String) -> u32 {
+    pub fn register_function(&mut self, function_name: &str) -> u32 {
         let function_name = function_name.to_lowercase();
 
         // Some functions are named differently in Substrait default extensions than in DF
@@ -64,7 +64,7 @@ impl Extensions {
 
     /// Registers a type and returns the anchor (reference) to it. If the type has already
     /// been registered, it returns the existing anchor.
-    pub fn register_type(&mut self, type_name: String) -> u32 {
+    pub fn register_type(&mut self, type_name: &str) -> u32 {
         let type_name = type_name.to_lowercase();
         match self.types.iter().find(|(_, t)| *t == &type_name) {
             Some((type_anchor, _)) => *type_anchor, // Type has been registered
@@ -115,7 +115,7 @@ impl TryFrom<&Vec<SimpleExtensionDeclaration>> for Extensions {
 impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
     // Silence deprecation warnings for `extension_uri_reference` during the uri -> urn migration
     // See: https://github.com/substrait-io/substrait/issues/856
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     fn from(val: Extensions) -> Vec<SimpleExtensionDeclaration> {
         let mut extensions = vec![];
         for (f_anchor, f_name) in val.functions {
diff --git a/datafusion/substrait/src/lib.rs b/datafusion/substrait/src/lib.rs
index 8bc31569f294e..407408aaa71b3 100644
--- a/datafusion/substrait/src/lib.rs
+++ b/datafusion/substrait/src/lib.rs
@@ -23,6 +23,8 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+#![deny(clippy::allow_attributes)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! Serialize / Deserialize DataFusion Plans to [Substrait.io]
 //!
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs b/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs
index 62e140acc27b3..096eef7ae3b0e 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs
@@ -16,11 +16,11 @@
 // under the License.
 
 use crate::logical_plan::consumer::{
-    from_substrait_func_args, substrait_fun_name, SubstraitConsumer,
+    SubstraitConsumer, from_substrait_func_args, substrait_fun_name,
 };
-use datafusion::common::{not_impl_datafusion_err, plan_err, DFSchema, ScalarValue};
+use datafusion::common::{DFSchema, ScalarValue, not_impl_datafusion_err, plan_err};
 use datafusion::execution::FunctionRegistry;
-use datafusion::logical_expr::{expr, Expr, SortExpr};
+use datafusion::logical_expr::{Expr, SortExpr, expr};
 use std::sync::Arc;
 use substrait::proto::AggregateFunction;
 
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/cast.rs b/datafusion/substrait/src/logical_plan/consumer/expr/cast.rs
index 5e8d3d93065f4..ec70ac3fec340 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/cast.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/cast.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::types::from_substrait_type_without_names;
 use crate::logical_plan::consumer::SubstraitConsumer;
-use datafusion::common::{substrait_err, DFSchema};
+use crate::logical_plan::consumer::types::from_substrait_type_without_names;
+use datafusion::common::{DFSchema, substrait_err};
 use datafusion::logical_expr::{Cast, Expr, TryCast};
 use substrait::proto::expression as substrait_expression;
 use substrait::proto::expression::cast::FailureBehavior::ReturnNull;
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs
index 90b5b6418149b..c17bf9c92edcc 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs
@@ -16,11 +16,11 @@
 // under the License.
 
 use crate::logical_plan::consumer::SubstraitConsumer;
-use datafusion::common::{not_impl_err, Column, DFSchema};
+use datafusion::common::{Column, DFSchema, not_impl_err};
 use datafusion::logical_expr::Expr;
+use substrait::proto::expression::FieldReference;
 use substrait::proto::expression::field_reference::ReferenceType::DirectReference;
 use substrait::proto::expression::reference_segment::ReferenceType::StructField;
-use substrait::proto::expression::FieldReference;
 
 pub async fn from_field_reference(
     _consumer: &impl SubstraitConsumer,
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/function_arguments.rs b/datafusion/substrait/src/logical_plan/consumer/expr/function_arguments.rs
index 0b610b61b1dea..cae5ecb6e5a8b 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/function_arguments.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/function_arguments.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use crate::logical_plan::consumer::SubstraitConsumer;
-use datafusion::common::{not_impl_err, DFSchema};
+use datafusion::common::{DFSchema, not_impl_err};
 use datafusion::logical_expr::Expr;
-use substrait::proto::function_argument::ArgType;
 use substrait::proto::FunctionArgument;
+use substrait::proto::function_argument::ArgType;
 
 /// Convert Substrait FunctionArguments to DataFusion Exprs
 pub async fn from_substrait_func_args(
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
index eb3d345dc001d..112f1ea374b32 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
@@ -15,39 +15,39 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::types::from_substrait_type;
-use crate::logical_plan::consumer::utils::{next_struct_field_name, DEFAULT_TIMEZONE};
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::logical_plan::consumer::types::from_substrait_type;
+use crate::logical_plan::consumer::utils::{DEFAULT_TIMEZONE, next_struct_field_name};
 use crate::variation_const::FLOAT_16_TYPE_NAME;
-#[allow(deprecated)]
+#[expect(deprecated)]
 use crate::variation_const::{
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
-    LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
+    LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
+    TIME_64_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
     TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
-    TIMESTAMP_SECOND_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
-    TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
+    TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
-use datafusion::arrow::array::{new_empty_array, AsArray, MapArray};
+use datafusion::arrow::array::{AsArray, MapArray, new_empty_array};
 use datafusion::arrow::buffer::OffsetBuffer;
 use datafusion::arrow::datatypes::{Field, IntervalDayTime, IntervalMonthDayNano};
 use datafusion::arrow::temporal_conversions::NANOSECONDS;
 use datafusion::common::scalar::ScalarStructBuilder;
 use datafusion::common::{
-    not_impl_err, plan_err, substrait_datafusion_err, substrait_err, ScalarValue,
+    ScalarValue, not_impl_err, plan_err, substrait_datafusion_err, substrait_err,
 };
 use datafusion::logical_expr::Expr;
 use prost::Message;
 use std::sync::Arc;
 use substrait::proto;
+use substrait::proto::expression::Literal;
 use substrait::proto::expression::literal::user_defined::Val;
 use substrait::proto::expression::literal::{
-    interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
-    LiteralType,
+    IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, LiteralType,
+    interval_day_to_second,
 };
-use substrait::proto::expression::Literal;
 
 pub async fn from_literal(
     consumer: &impl SubstraitConsumer,
@@ -104,7 +104,7 @@ pub(crate) fn from_substrait_literal(
         Some(LiteralType::Fp64(f)) => ScalarValue::Float64(Some(*f)),
         Some(LiteralType::Timestamp(t)) => {
             // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead
-            #[allow(deprecated)]
+            #[expect(deprecated)]
             match lit.type_variation_reference {
                 TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
                     ScalarValue::TimestampSecond(Some(*t), None)
@@ -386,19 +386,23 @@ pub(crate) fn from_substrait_literal(
             // DF only supports millisecond precision, so for any more granular type we lose precision
             let milliseconds = match precision_mode {
                 Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
-                None =>
+                None => {
                     if *subseconds != 0 {
-                        return substrait_err!("Cannot set subseconds field of IntervalDayToSecond without setting precision");
+                        return substrait_err!(
+                            "Cannot set subseconds field of IntervalDayToSecond without setting precision"
+                        );
                     } else {
                         0_i32
                     }
+                }
                 Some(PrecisionMode::Precision(0)) => *subseconds as i32 * 1000,
                 Some(PrecisionMode::Precision(3)) => *subseconds as i32,
                 Some(PrecisionMode::Precision(6)) => (subseconds / 1000) as i32,
                 Some(PrecisionMode::Precision(9)) => (subseconds / 1000 / 1000) as i32,
                 _ => {
                     return not_impl_err!(
-                    "Unsupported Substrait interval day to second precision mode: {precision_mode:?}")
+                        "Unsupported Substrait interval day to second precision mode: {precision_mode:?}"
+                    );
                 }
             };
 
@@ -505,20 +509,20 @@ pub(crate) fn from_substrait_literal(
                         return Ok(ScalarValue::Float16(Some(f16_val)));
                     }
                     // Kept for backwards compatibility - producers should use IntervalCompound instead
-                    #[allow(deprecated)]
+                    #[expect(deprecated)]
                     INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
                         interval_month_day_nano(user_defined)?
                     }
                     _ => {
                         return not_impl_err!(
-                        "Unsupported Substrait user defined type with ref {} and name {}",
-                        user_defined.type_reference,
-                        name
-                    )
+                            "Unsupported Substrait user defined type with ref {} and name {}",
+                            user_defined.type_reference,
+                            name
+                        );
                     }
                 }
             } else {
-                #[allow(deprecated)]
+                #[expect(deprecated)]
                 match user_defined.type_reference {
                     // Kept for backwards compatibility, producers should useIntervalYearToMonth instead
                     INTERVAL_YEAR_MONTH_TYPE_REF => {
@@ -563,7 +567,7 @@ pub(crate) fn from_substrait_literal(
                         return not_impl_err!(
                             "Unsupported Substrait user defined type literal with ref {}",
                             user_defined.type_reference
-                        )
+                        );
                     }
                 }
             }
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
index 7358f1422f1b4..6c2bc652bb19c 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
@@ -39,11 +39,11 @@ pub use window_function::*;
 
 use crate::extensions::Extensions;
 use crate::logical_plan::consumer::{
-    from_substrait_named_struct, rename_field, DefaultSubstraitConsumer,
-    SubstraitConsumer,
+    DefaultSubstraitConsumer, SubstraitConsumer, from_substrait_named_struct,
+    rename_field,
 };
 use datafusion::arrow::datatypes::Field;
-use datafusion::common::{not_impl_err, plan_err, substrait_err, DFSchema, DFSchemaRef};
+use datafusion::common::{DFSchema, DFSchemaRef, not_impl_err, plan_err, substrait_err};
 use datafusion::execution::SessionState;
 use datafusion::logical_expr::{Expr, ExprSchemable};
 use substrait::proto::expression::RexType;
@@ -124,7 +124,9 @@ pub async fn from_substrait_extended_expr(
     let input_schema = DFSchemaRef::new(match &extended_expr.base_schema {
         Some(base_schema) => from_substrait_named_struct(&consumer, base_schema),
         None => {
-            plan_err!("required property `base_schema` missing from Substrait ExtendedExpression message")
+            plan_err!(
+                "required property `base_schema` missing from Substrait ExtendedExpression message"
+            )
         }
     }?);
 
@@ -137,15 +139,15 @@ pub async fn from_substrait_extended_expr(
                 not_impl_err!("Measure expressions are not yet supported")
             }
             None => {
-                plan_err!("required property `expr_type` missing from Substrait ExpressionReference message")
+                plan_err!(
+                    "required property `expr_type` missing from Substrait ExpressionReference message"
+                )
             }
         }?;
         let expr = consumer
             .consume_expression(scalar_expr, &input_schema)
             .await?;
-        let (output_type, expected_nullability) =
-            expr.data_type_and_nullable(&input_schema)?;
-        let output_field = Field::new("", output_type, expected_nullability);
+        let output_field = expr.to_field(&input_schema)?.1;
         let mut names_idx = 0;
         let output_field = rename_field(
             &output_field,
@@ -198,13 +200,13 @@ mod tests {
     use crate::logical_plan::consumer::*;
     use datafusion::common::DFSchema;
     use datafusion::logical_expr::Expr;
-    use substrait::proto::expression::window_function::BoundsType;
-    use substrait::proto::expression::RexType;
     use substrait::proto::Expression;
+    use substrait::proto::expression::RexType;
+    use substrait::proto::expression::window_function::BoundsType;
 
     #[tokio::test]
-    async fn window_function_with_range_unit_and_no_order_by(
-    ) -> datafusion::common::Result<()> {
+    async fn window_function_with_range_unit_and_no_order_by()
+    -> datafusion::common::Result<()> {
         let substrait = Expression {
             rex_type: Some(RexType::WindowFunction(
                 substrait::proto::expression::WindowFunction {
@@ -221,7 +223,7 @@ mod tests {
         // Just registering a single function (index 0) so that the plan
         // does not throw a "function not found" error.
         let mut extensions = Extensions::default();
-        extensions.register_function("count".to_string());
+        extensions.register_function("count");
         consumer.extensions = &extensions;
 
         match from_substrait_rex(&consumer, &substrait, &DFSchema::empty()).await? {
@@ -248,7 +250,7 @@ mod tests {
         let mut consumer = test_consumer();
 
         let mut extensions = Extensions::default();
-        extensions.register_function("count".to_string());
+        extensions.register_function("count");
         consumer.extensions = &extensions;
 
         match from_substrait_rex(&consumer, &substrait, &DFSchema::empty()).await? {
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs b/datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs
index f80cf43eb81eb..10fe58862e021 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::{from_substrait_func_args, SubstraitConsumer};
+use crate::logical_plan::consumer::{SubstraitConsumer, from_substrait_func_args};
 use datafusion::common::Result;
 use datafusion::common::{
-    not_impl_err, plan_err, substrait_err, DFSchema, DataFusionError, ScalarValue,
+    DFSchema, DataFusionError, ScalarValue, not_impl_err, plan_err, substrait_err,
 };
 use datafusion::execution::FunctionRegistry;
-use datafusion::logical_expr::{expr, Between, BinaryExpr, Expr, Like, Operator};
+use datafusion::logical_expr::{Between, BinaryExpr, Expr, Like, Operator, expr};
 use std::vec::Drain;
 use substrait::proto::expression::ScalarFunction;
 
@@ -62,9 +62,9 @@ pub async fn from_scalar_function(
     } else if let Some(op) = name_to_op(fn_name) {
         if args.len() < 2 {
             return not_impl_err!(
-                        "Expect at least two arguments for binary operator {op:?}, the provided number of operators is {:?}",
-                       f.arguments.len()
-                    );
+                "Expect at least two arguments for binary operator {op:?}, the provided number of operators is {:?}",
+                f.arguments.len()
+            );
         }
         // In those cases we build a balanced tree of BinaryExprs
         arg_list_to_binary_op_tree(op, args)
@@ -76,15 +76,14 @@ pub async fn from_scalar_function(
 }
 
 pub fn substrait_fun_name(name: &str) -> &str {
-    let name = match name.rsplit_once(':') {
+    (match name.rsplit_once(':') {
         // Since 0.32.0, Substrait requires the function names to be in a compound format
         // https://substrait.io/extensions/#function-signature-compound-names
         // for example, `add:i8_i8`.
         // On the consumer side, we don't really care about the signature though, just the name.
         Some((name, _)) => name,
         None => name,
-    };
-    name
+    }) as _
 }
 
 pub fn name_to_op(name: &str) -> Option<Operator> {
@@ -177,8 +176,9 @@ struct BuiltinExprBuilder {
 impl BuiltinExprBuilder {
     pub fn try_from_name(name: &str) -> Option<Self> {
         match name {
-            "not" | "like" | "ilike" | "is_null" | "is_not_null" | "is_true"
-            | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
+            "not" | "like" | "ilike" | "like_match" | "like_imatch"
+            | "like_not_match" | "like_not_imatch" | "is_null" | "is_not_null"
+            | "is_true" | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
             | "is_not_unknown" | "negative" | "negate" | "and_not" | "xor"
             | "between" | "logb" => Some(Self {
                 expr_name: name.to_string(),
@@ -194,8 +194,12 @@ impl BuiltinExprBuilder {
         args: Vec<Expr>,
     ) -> Result<Expr> {
         match self.expr_name.as_str() {
-            "like" => Self::build_like_expr(false, f, args).await,
-            "ilike" => Self::build_like_expr(true, f, args).await,
+            "like" => Self::build_like_expr(false, false, f, args).await,
+            "ilike" => Self::build_like_expr(true, false, f, args).await,
+            "like_match" => Self::build_like_expr(false, false, f, args).await,
+            "like_imatch" => Self::build_like_expr(true, false, f, args).await,
+            "like_not_match" => Self::build_like_expr(false, true, f, args).await,
+            "like_not_imatch" => Self::build_like_expr(true, true, f, args).await,
             "not" | "negative" | "negate" | "is_null" | "is_not_null" | "is_true"
             | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
             | "is_not_unknown" => Self::build_unary_expr(&self.expr_name, args).await,
@@ -236,6 +240,7 @@ impl BuiltinExprBuilder {
 
     async fn build_like_expr(
         case_insensitive: bool,
+        negated: bool,
         f: &ScalarFunction,
         args: Vec<Expr>,
     ) -> Result<Expr> {
@@ -265,8 +270,8 @@ impl BuiltinExprBuilder {
                 }
                 _ => {
                     return substrait_err!(
-                    "Expect Utf8 literal for escape char, but found {escape_char_expr:?}"
-                )
+                        "Expect Utf8 literal for escape char, but found {escape_char_expr:?}"
+                    );
                 }
             }
         } else {
@@ -274,7 +279,7 @@ impl BuiltinExprBuilder {
         };
 
         Ok(Expr::Like(Like {
-            negated: false,
+            negated,
             expr: Box::new(expr),
             pattern: Box::new(pattern),
             escape_char,
@@ -286,7 +291,7 @@ impl BuiltinExprBuilder {
         let [a, b] = match args.try_into() {
             Ok(args_arr) => args_arr,
             Err(_) => {
-                return substrait_err!("Expected two arguments for `{fn_name}` expr")
+                return substrait_err!("Expected two arguments for `{fn_name}` expr");
             }
         };
         match fn_name {
@@ -310,7 +315,7 @@ impl BuiltinExprBuilder {
         let [expression, low, high] = match args.try_into() {
             Ok(args_arr) => args_arr,
             Err(_) => {
-                return substrait_err!("Expected three arguments for `{fn_name}` expr")
+                return substrait_err!("Expected three arguments for `{fn_name}` expr");
             }
         };
 
@@ -476,4 +481,96 @@ mod tests {
         let _ = consumer.consume_scalar_function(&func, &df_schema).await?;
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_like_match_conversion() -> Result<()> {
+        // 1. Setup the consumer with the "like_match" function registered
+        let mut extensions = Extensions::default();
+        extensions
+            .functions
+            .insert(0, "like_match:str_str".to_string());
+        extensions
+            .functions
+            .insert(1, "like_not_match:str_str".to_string());
+        extensions
+            .functions
+            .insert(2, "like_imatch:str_str".to_string());
+
+        let consumer = DefaultSubstraitConsumer::new(&extensions, &TEST_SESSION_STATE);
+
+        // 2. Create the arguments (column "a" and pattern "%foo%")
+        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
+        let df_schema = DFSchema::try_from(schema).unwrap();
+
+        let col_arg = FunctionArgument {
+            arg_type: Some(ArgType::Value(Expression {
+                rex_type: Some(RexType::Selection(Box::new(
+                    substrait::proto::expression::FieldReference {
+                        reference_type: Some(substrait::proto::expression::field_reference::ReferenceType::DirectReference(
+                            substrait::proto::expression::ReferenceSegment {
+                                reference_type: Some(substrait::proto::expression::reference_segment::ReferenceType::StructField(
+                                    Box::new(substrait::proto::expression::reference_segment::StructField {
+                                        field: 0,
+                                        child: None,
+                                    })
+                                )),
+                            }
+                        )),
+                        root_type: Some(substrait::proto::expression::field_reference::RootType::RootReference(
+                            substrait::proto::expression::field_reference::RootReference {}
+                        )),
+                    }
+                ))),
+            })),
+        };
+
+        let pattern_arg = FunctionArgument {
+            arg_type: Some(ArgType::Value(Expression {
+                rex_type: Some(RexType::Literal(Literal {
+                    nullable: false,
+                    type_variation_reference: 0,
+                    literal_type: Some(LiteralType::String("foo".to_string())),
+                })),
+            })),
+        };
+
+        // 3. Test "like_match" (Standard LIKE)
+        let func_like = ScalarFunction {
+            function_reference: 0,
+            arguments: vec![col_arg.clone(), pattern_arg.clone()],
+            ..Default::default()
+        };
+
+        let result = consumer
+            .consume_scalar_function(&func_like, &df_schema)
+            .await?;
+
+        if let Expr::Like(like) = result {
+            assert!(!like.negated);
+            assert!(!like.case_insensitive);
+            assert_eq!(format!("{}", like.pattern), "Utf8(\"foo\")");
+        } else {
+            panic!("Expected Expr::Like, got {result:?}");
+        }
+
+        // 4. Test "like_not_match" (NOT LIKE)
+        let func_not_like = ScalarFunction {
+            function_reference: 1,
+            arguments: vec![col_arg.clone(), pattern_arg.clone()],
+            ..Default::default()
+        };
+
+        let result = consumer
+            .consume_scalar_function(&func_not_like, &df_schema)
+            .await?;
+
+        if let Expr::Like(like) = result {
+            assert!(like.negated);
+            assert!(!like.case_insensitive);
+        } else {
+            panic!("Expected Expr::Like (negated), got {result:?}");
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/singular_or_list.rs b/datafusion/substrait/src/logical_plan/consumer/expr/singular_or_list.rs
index 6d44ebcce5908..3937ee7b15fde 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/singular_or_list.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/singular_or_list.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::{from_substrait_rex_vec, SubstraitConsumer};
+use crate::logical_plan::consumer::{SubstraitConsumer, from_substrait_rex_vec};
 use datafusion::common::DFSchema;
-use datafusion::logical_expr::expr::InList;
 use datafusion::logical_expr::Expr;
+use datafusion::logical_expr::expr::InList;
 use substrait::proto::expression::SingularOrList;
 
 pub async fn from_singular_or_list(
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs b/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs
index 917bcc007716b..15fe7947a1e1f 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs
@@ -16,13 +16,13 @@
 // under the License.
 
 use crate::logical_plan::consumer::SubstraitConsumer;
-use datafusion::common::{substrait_err, DFSchema, Spans};
+use datafusion::common::{DFSchema, Spans, substrait_err};
 use datafusion::logical_expr::expr::{Exists, InSubquery};
 use datafusion::logical_expr::{Expr, Subquery};
 use std::sync::Arc;
 use substrait::proto::expression as substrait_expression;
-use substrait::proto::expression::subquery::set_predicate::PredicateOp;
 use substrait::proto::expression::subquery::SubqueryType;
+use substrait::proto::expression::subquery::set_predicate::PredicateOp;
 
 pub async fn from_subquery(
     consumer: &impl SubstraitConsumer,
@@ -33,7 +33,9 @@ pub async fn from_subquery(
         Some(subquery_type) => match subquery_type {
             SubqueryType::InPredicate(in_predicate) => {
                 if in_predicate.needles.len() != 1 {
-                    substrait_err!("InPredicate Subquery type must have exactly one Needle expression")
+                    substrait_err!(
+                        "InPredicate Subquery type must have exactly one Needle expression"
+                    )
                 } else {
                     let needle_expr = &in_predicate.needles[0];
                     let haystack_expr = &in_predicate.haystack;
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/window_function.rs b/datafusion/substrait/src/logical_plan/consumer/expr/window_function.rs
index 3399d660df62b..1f6f602a2ab73 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/window_function.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/window_function.rs
@@ -16,19 +16,19 @@
 // under the License.
 
 use crate::logical_plan::consumer::{
-    from_substrait_func_args, from_substrait_rex_vec, from_substrait_sorts,
-    substrait_fun_name, SubstraitConsumer,
+    SubstraitConsumer, from_substrait_func_args, from_substrait_rex_vec,
+    from_substrait_sorts, substrait_fun_name,
 };
 use datafusion::common::{
-    not_impl_err, plan_datafusion_err, plan_err, substrait_err, DFSchema, ScalarValue,
+    DFSchema, ScalarValue, not_impl_err, plan_datafusion_err, plan_err, substrait_err,
 };
 use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::expr::WindowFunctionParams;
 use datafusion::logical_expr::{
-    expr, Expr, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    Expr, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, expr,
 };
-use substrait::proto::expression::window_function::{Bound, BoundsType};
 use substrait::proto::expression::WindowFunction;
+use substrait::proto::expression::window_function::{Bound, BoundsType};
 use substrait::proto::expression::{
     window_function::bound as SubstraitBound, window_function::bound::Kind as BoundKind,
 };
diff --git a/datafusion/substrait/src/logical_plan/consumer/plan.rs b/datafusion/substrait/src/logical_plan/consumer/plan.rs
index f994f792a17ea..d5e10fb604017 100644
--- a/datafusion/substrait/src/logical_plan/consumer/plan.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/plan.rs
@@ -20,9 +20,9 @@ use super::{DefaultSubstraitConsumer, SubstraitConsumer};
 use crate::extensions::Extensions;
 use datafusion::common::{not_impl_err, plan_err};
 use datafusion::execution::SessionState;
-use datafusion::logical_expr::{col, Aggregate, LogicalPlan, Projection};
+use datafusion::logical_expr::{Aggregate, LogicalPlan, Projection, col};
 use std::sync::Arc;
-use substrait::proto::{plan_rel, Plan};
+use substrait::proto::{Plan, plan_rel};
 
 /// Convert Substrait Plan to DataFusion LogicalPlan
 pub async fn from_substrait_plan(
@@ -53,38 +53,75 @@ pub async fn from_substrait_plan_with_consumer(
                 Some(rt) => match rt {
                     plan_rel::RelType::Rel(rel) => Ok(consumer.consume_rel(rel).await?),
                     plan_rel::RelType::Root(root) => {
-                        let plan = consumer.consume_rel(root.input.as_ref().unwrap()).await?;
+                        let plan =
+                            consumer.consume_rel(root.input.as_ref().unwrap()).await?;
                         if root.names.is_empty() {
                             // Backwards compatibility for plans missing names
                             return Ok(plan);
                         }
-                        let renamed_schema = make_renamed_schema(plan.schema(), &root.names)?;
-                        if renamed_schema.has_equivalent_names_and_types(plan.schema()).is_ok() {
+                        let renamed_schema =
+                            make_renamed_schema(plan.schema(), &root.names)?;
+                        if renamed_schema
+                            .has_equivalent_names_and_types(plan.schema())
+                            .is_ok()
+                        {
                             // Nothing to do if the schema is already equivalent
                             return Ok(plan);
                         }
                         match plan {
                             // If the last node of the plan produces expressions, bake the renames into those expressions.
                             // This isn't necessary for correctness, but helps with roundtrip tests.
-                            LogicalPlan::Projection(p) => Ok(LogicalPlan::Projection(Projection::try_new(rename_expressions(p.expr, p.input.schema(), renamed_schema.fields())?, p.input)?)),
+                            LogicalPlan::Projection(p) => {
+                                Ok(LogicalPlan::Projection(Projection::try_new(
+                                    rename_expressions(
+                                        p.expr,
+                                        p.input.schema(),
+                                        renamed_schema.fields(),
+                                    )?,
+                                    p.input,
+                                )?))
+                            }
                             LogicalPlan::Aggregate(a) => {
-                                let (group_fields, expr_fields) = renamed_schema.fields().split_at(a.group_expr.len());
-                                let new_group_exprs = rename_expressions(a.group_expr, a.input.schema(), group_fields)?;
-                                let new_aggr_exprs = rename_expressions(a.aggr_expr, a.input.schema(), expr_fields)?;
-                                Ok(LogicalPlan::Aggregate(Aggregate::try_new(a.input, new_group_exprs, new_aggr_exprs)?))
-                            },
+                                let (group_fields, expr_fields) =
+                                    renamed_schema.fields().split_at(a.group_expr.len());
+                                let new_group_exprs = rename_expressions(
+                                    a.group_expr,
+                                    a.input.schema(),
+                                    group_fields,
+                                )?;
+                                let new_aggr_exprs = rename_expressions(
+                                    a.aggr_expr,
+                                    a.input.schema(),
+                                    expr_fields,
+                                )?;
+                                Ok(LogicalPlan::Aggregate(Aggregate::try_new(
+                                    a.input,
+                                    new_group_exprs,
+                                    new_aggr_exprs,
+                                )?))
+                            }
                             // There are probably more plans where we could bake things in, can add them later as needed.
                             // Otherwise, add a new Project to handle the renaming.
-                            _ => Ok(LogicalPlan::Projection(Projection::try_new(rename_expressions(plan.schema().columns().iter().map(|c| col(c.to_owned())), plan.schema(), renamed_schema.fields())?, Arc::new(plan))?))
+                            _ => Ok(LogicalPlan::Projection(Projection::try_new(
+                                rename_expressions(
+                                    plan.schema()
+                                        .columns()
+                                        .iter()
+                                        .map(|c| col(c.to_owned())),
+                                    plan.schema(),
+                                    renamed_schema.fields(),
+                                )?,
+                                Arc::new(plan),
+                            )?)),
                         }
                     }
                 },
-                None => plan_err!("Cannot parse plan relation: None")
+                None => plan_err!("Cannot parse plan relation: None"),
             }
-        },
+        }
         _ => not_impl_err!(
             "Substrait plan with more than 1 relation trees not supported. Number of relation trees: {:?}",
             plan.relations.len()
-        )
+        ),
     }
 }
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
index fce074cd51983..da57751f6ad84 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::{from_substrait_agg_func, from_substrait_sorts};
 use crate::logical_plan::consumer::{NameTracker, SubstraitConsumer};
-use datafusion::common::{not_impl_err, DFSchemaRef};
+use crate::logical_plan::consumer::{from_substrait_agg_func, from_substrait_sorts};
+use datafusion::common::{DFSchemaRef, not_impl_err};
 use datafusion::logical_expr::{Expr, GroupingSet, LogicalPlan, LogicalPlanBuilder};
+use substrait::proto::AggregateRel;
 use substrait::proto::aggregate_function::AggregationInvocation;
 use substrait::proto::aggregate_rel::Grouping;
-use substrait::proto::AggregateRel;
 
 pub async fn from_aggregate_rel(
     consumer: &impl SubstraitConsumer,
@@ -122,7 +122,7 @@ pub async fn from_aggregate_rel(
     }
 }
 
-#[allow(deprecated)]
+#[expect(deprecated)]
 async fn from_substrait_grouping(
     consumer: &impl SubstraitConsumer,
     grouping: &Grouping,
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/exchange_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/exchange_rel.rs
index d326fff44bbbd..a6132e047f7da 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/exchange_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/exchange_rel.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::from_substrait_field_reference;
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::logical_plan::consumer::from_substrait_field_reference;
 use datafusion::common::{not_impl_err, substrait_err};
 use datafusion::logical_expr::{LogicalPlan, Partitioning, Repartition};
 use std::sync::Arc;
-use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::ExchangeRel;
+use substrait::proto::exchange_rel::ExchangeKind;
 
 pub async fn from_exchange_rel(
     consumer: &impl SubstraitConsumer,
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/fetch_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/fetch_rel.rs
index 74161d8600ea6..bd6d94736e265 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/fetch_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/fetch_rel.rs
@@ -17,9 +17,9 @@
 
 use crate::logical_plan::consumer::SubstraitConsumer;
 use async_recursion::async_recursion;
-use datafusion::common::{not_impl_err, DFSchema, DFSchemaRef};
-use datafusion::logical_expr::{lit, LogicalPlan, LogicalPlanBuilder};
-use substrait::proto::{fetch_rel, FetchRel};
+use datafusion::common::{DFSchema, DFSchemaRef, not_impl_err};
+use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder, lit};
+use substrait::proto::{FetchRel, fetch_rel};
 
 #[async_recursion]
 pub async fn from_fetch_rel(
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs
index 5681c92326e1a..3604630d6f0bb 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs
@@ -16,14 +16,14 @@
 // under the License.
 
 use crate::logical_plan::consumer::SubstraitConsumer;
-use datafusion::common::{not_impl_err, plan_err, Column, JoinType, NullEquality};
+use datafusion::common::{Column, JoinType, NullEquality, not_impl_err, plan_err};
 use datafusion::logical_expr::requalify_sides_if_needed;
 use datafusion::logical_expr::utils::split_conjunction;
 use datafusion::logical_expr::{
     BinaryExpr, Expr, LogicalPlan, LogicalPlanBuilder, Operator,
 };
 
-use substrait::proto::{join_rel, JoinRel};
+use substrait::proto::{JoinRel, join_rel};
 
 pub async fn from_join_rel(
     consumer: &impl SubstraitConsumer,
@@ -98,7 +98,7 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality(
     let mut nulls_equal_nulls = false;
 
     for expr in exprs {
-        #[allow(clippy::collapsible_match)]
+        #[expect(clippy::collapsible_match)]
         match expr {
             Expr::BinaryExpr(binary_expr) => match binary_expr {
                 x @ (BinaryExpr {
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/mod.rs b/datafusion/substrait/src/logical_plan/consumer/rel/mod.rs
index a83ddd8997b29..038ada115b9d8 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/mod.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/mod.rs
@@ -37,16 +37,16 @@ pub use read_rel::*;
 pub use set_rel::*;
 pub use sort_rel::*;
 
-use crate::logical_plan::consumer::utils::NameTracker;
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::logical_plan::consumer::utils::NameTracker;
 use async_recursion::async_recursion;
-use datafusion::common::{not_impl_err, substrait_datafusion_err, substrait_err, Column};
+use datafusion::common::{Column, not_impl_err, substrait_datafusion_err, substrait_err};
 use datafusion::logical_expr::builder::project;
 use datafusion::logical_expr::{Expr, LogicalPlan, Projection};
 use std::sync::Arc;
 use substrait::proto::rel::RelType;
 use substrait::proto::rel_common::{Emit, EmitKind};
-use substrait::proto::{rel_common, Rel, RelCommon};
+use substrait::proto::{Rel, RelCommon, rel_common};
 
 /// Convert Substrait Rel to DataFusion DataFrame
 #[async_recursion]
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/project_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/project_rel.rs
index 239073108ce50..07f9a34888fc4 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/project_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/project_rel.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::utils::NameTracker;
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::logical_plan::consumer::utils::NameTracker;
 use async_recursion::async_recursion;
-use datafusion::common::{not_impl_err, Column};
+use datafusion::common::{Column, not_impl_err};
 use datafusion::logical_expr::builder::project;
 use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
 use std::collections::HashSet;
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs
index 48e93c04bb034..832110e11131c 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::logical_plan::consumer::SubstraitConsumer;
 use crate::logical_plan::consumer::from_substrait_literal;
 use crate::logical_plan::consumer::from_substrait_named_struct;
 use crate::logical_plan::consumer::utils::ensure_schema_compatibility;
-use crate::logical_plan::consumer::SubstraitConsumer;
 use datafusion::common::{
-    not_impl_err, plan_err, substrait_datafusion_err, substrait_err, DFSchema,
-    DFSchemaRef, TableReference,
+    DFSchema, DFSchemaRef, TableReference, not_impl_err, plan_err,
+    substrait_datafusion_err, substrait_err,
 };
 use datafusion::datasource::provider_as_source;
 use datafusion::logical_expr::utils::split_conjunction_owned;
@@ -30,12 +30,12 @@ use datafusion::logical_expr::{
 };
 use std::sync::Arc;
 use substrait::proto::expression::MaskExpression;
-use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
 use substrait::proto::read_rel::ReadType;
+use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
 use substrait::proto::{Expression, ReadRel};
 use url::Url;
 
-#[allow(deprecated)]
+#[expect(deprecated)]
 pub async fn from_read_rel(
     consumer: &impl SubstraitConsumer,
     read: &ReadRel,
@@ -121,57 +121,53 @@ pub async fn from_read_rel(
                 }));
             }
 
+            // Check for produce_one_row pattern in both old (values) and new (expressions) formats.
+            // A VirtualTable with exactly one row containing only empty/default fields represents
+            // an EmptyRelation with produce_one_row=true. This pattern is used for queries without
+            // a FROM clause (e.g., "SELECT 1 AS one") where a single phantom row is needed to
+            // provide a context for evaluating scalar expressions. This is conceptually similar to
+            // the SQL "DUAL" table (see: https://en.wikipedia.org/wiki/DUAL_table) which some
+            // databases provide as a single-row source for selecting constant expressions when no
+            // real table is present.
+            let is_produce_one_row = (vt.values.len() == 1
+                && vt.expressions.is_empty()
+                && substrait_schema.fields().is_empty()
+                && vt.values[0].fields.is_empty())
+                || (vt.expressions.len() == 1
+                    && vt.values.is_empty()
+                    && substrait_schema.fields().is_empty()
+                    && vt.expressions[0].fields.is_empty());
+
+            if is_produce_one_row {
+                return Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                    produce_one_row: true,
+                    schema: DFSchemaRef::new(substrait_schema),
+                }));
+            }
+
             let values = if !vt.expressions.is_empty() {
                 let mut exprs = vec![];
                 for row in &vt.expressions {
-                    let mut name_idx = 0;
                     let mut row_exprs = vec![];
                     for expression in &row.fields {
-                        name_idx += 1;
                         let expr = consumer
-                            .consume_expression(expression, &DFSchema::empty())
+                            .consume_expression(expression, &substrait_schema)
                             .await?;
                         row_exprs.push(expr);
                     }
-                    if name_idx != named_struct.names.len() {
+                    // For expressions, validate against top-level schema fields, not nested names
+                    if row_exprs.len() != substrait_schema.fields().len() {
                         return substrait_err!(
-                                "Names list must match exactly to nested schema, but found {} uses for {} names",
-                                name_idx,
-                                named_struct.names.len()
-                            );
+                            "Field count mismatch: expected {} fields but found {} in virtual table row",
+                            substrait_schema.fields().len(),
+                            row_exprs.len()
+                        );
                     }
                     exprs.push(row_exprs);
                 }
                 exprs
             } else {
-                vt
-                .values
-                .iter()
-                .map(|row| {
-                    let mut name_idx = 0;
-                    let lits = row
-                        .fields
-                        .iter()
-                        .map(|lit| {
-                            name_idx += 1; // top-level names are provided through schema
-                            Ok(Expr::Literal(from_substrait_literal(
-                                consumer,
-                                lit,
-                                &named_struct.names,
-                                &mut name_idx,
-                            )?, None))
-                        })
-                        .collect::<datafusion::common::Result<_>>()?;
-                    if name_idx != named_struct.names.len() {
-                        return substrait_err!(
-                                "Names list must match exactly to nested schema, but found {} uses for {} names",
-                                name_idx,
-                                named_struct.names.len()
-                            );
-                    }
-                    Ok(lits)
-                })
-                .collect::<datafusion::common::Result<_>>()?
+                convert_literal_rows(consumer, vt, named_struct)?
             };
 
             Ok(LogicalPlan::Values(Values {
@@ -226,6 +222,46 @@ pub async fn from_read_rel(
     }
 }
 
+/// Converts Substrait literal rows from a VirtualTable into DataFusion expressions.
+///
+/// This function processes the deprecated `values` field of VirtualTable, converting
+/// each literal value into a `Expr::Literal` while tracking and validating the name
+/// indices against the provided named struct schema.
+fn convert_literal_rows(
+    consumer: &impl SubstraitConsumer,
+    vt: &substrait::proto::read_rel::VirtualTable,
+    named_struct: &substrait::proto::NamedStruct,
+) -> datafusion::common::Result<Vec<Vec<Expr>>> {
+    #[expect(deprecated)]
+    vt.values
+        .iter()
+        .map(|row| {
+            let mut name_idx = 0;
+            let lits = row
+                .fields
+                .iter()
+                .map(|lit| {
+                    name_idx += 1; // top-level names are provided through schema
+                    Ok(Expr::Literal(from_substrait_literal(
+                        consumer,
+                        lit,
+                        &named_struct.names,
+                        &mut name_idx,
+                    )?, None))
+                })
+                .collect::<datafusion::common::Result<_>>()?;
+            if name_idx != named_struct.names.len() {
+                return substrait_err!(
+                    "Names list must match exactly to nested schema, but found {} uses for {} names",
+                    name_idx,
+                    named_struct.names.len()
+                );
+            }
+            Ok(lits)
+        })
+        .collect::<datafusion::common::Result<_>>()
+}
+
 pub fn apply_masking(
     schema: DFSchema,
     mask_expression: &::core::option::Option<MaskExpression>,
@@ -242,9 +278,7 @@ pub fn apply_masking(
                 let fields = column_indices
                     .iter()
                     .map(|i| schema.qualified_field(*i))
-                    .map(|(qualifier, field)| {
-                        (qualifier.cloned(), Arc::new(field.clone()))
-                    })
+                    .map(|(qualifier, field)| (qualifier.cloned(), Arc::clone(field)))
                     .collect();
 
                 Ok(DFSchema::new_with_metadata(
@@ -288,7 +322,7 @@ fn apply_projection(
             let fields = column_indices
                 .iter()
                 .map(|i| df_schema.qualified_field(*i))
-                .map(|(qualifier, field)| (qualifier.cloned(), Arc::new(field.clone())))
+                .map(|(qualifier, field)| (qualifier.cloned(), Arc::clone(field)))
                 .collect();
 
             scan.projected_schema = DFSchemaRef::new(DFSchema::new_with_metadata(
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/set_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/set_rel.rs
index 6688a80f52746..36bf8dbae4a92 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/set_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/set_rel.rs
@@ -81,7 +81,7 @@ async fn intersect_rels(
             rel,
             consumer.consume_rel(input).await?,
             is_all,
-        )?
+        )?;
     }
 
     Ok(rel)
@@ -95,7 +95,8 @@ async fn except_rels(
     let mut rel = consumer.consume_rel(&rels[0]).await?;
 
     for input in &rels[1..] {
-        rel = LogicalPlanBuilder::except(rel, consumer.consume_rel(input).await?, is_all)?
+        rel =
+            LogicalPlanBuilder::except(rel, consumer.consume_rel(input).await?, is_all)?;
     }
 
     Ok(rel)
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/sort_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/sort_rel.rs
index 56ca0ba03857d..24f6829c20394 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/sort_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/sort_rel.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::consumer::{from_substrait_sorts, SubstraitConsumer};
+use crate::logical_plan::consumer::{SubstraitConsumer, from_substrait_sorts};
 use datafusion::common::not_impl_err;
 use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder};
 use substrait::proto::SortRel;
diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
index c734b9eb7a541..4c19227a30c75 100644
--- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
@@ -27,7 +27,7 @@ use async_trait::async_trait;
 use datafusion::arrow::datatypes::DataType;
 use datafusion::catalog::TableProvider;
 use datafusion::common::{
-    not_impl_err, substrait_err, DFSchema, ScalarValue, TableReference,
+    DFSchema, ScalarValue, TableReference, not_impl_err, substrait_err,
 };
 use datafusion::execution::{FunctionRegistry, SessionState};
 use datafusion::logical_expr::{Expr, Extension, LogicalPlan};
@@ -39,9 +39,9 @@ use substrait::proto::expression::{
     SingularOrList, SwitchExpression, WindowFunction,
 };
 use substrait::proto::{
-    r#type, AggregateRel, ConsistentPartitionWindowRel, CrossRel, DynamicParameter,
-    ExchangeRel, Expression, ExtensionLeafRel, ExtensionMultiRel, ExtensionSingleRel,
-    FetchRel, FilterRel, JoinRel, ProjectRel, ReadRel, Rel, SetRel, SortRel,
+    AggregateRel, ConsistentPartitionWindowRel, CrossRel, DynamicParameter, ExchangeRel,
+    Expression, ExtensionLeafRel, ExtensionMultiRel, ExtensionSingleRel, FetchRel,
+    FilterRel, JoinRel, ProjectRel, ReadRel, Rel, SetRel, SortRel, r#type,
 };
 
 #[async_trait]
@@ -492,8 +492,8 @@ impl SubstraitConsumer for DefaultSubstraitConsumer<'_> {
             .deserialize_logical_plan(&ext_detail.type_url, &ext_detail.value)?;
         let Some(input_rel) = &rel.input else {
             return substrait_err!(
-                    "ExtensionSingleRel missing input rel, try using ExtensionLeafRel instead"
-                );
+                "ExtensionSingleRel missing input rel, try using ExtensionLeafRel instead"
+            );
         };
         let input_plan = self.consume_rel(input_rel).await?;
         let plan = plan.with_exprs_and_inputs(plan.expressions(), vec![input_plan])?;
diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs
index ef1000a1ccdba..eb2cc967ca236 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::utils::{from_substrait_precision, next_struct_field_name, DEFAULT_TIMEZONE};
 use super::SubstraitConsumer;
-#[allow(deprecated)]
+use super::utils::{DEFAULT_TIMEZONE, from_substrait_precision, next_struct_field_name};
+#[expect(deprecated)]
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
@@ -26,10 +26,10 @@ use crate::variation_const::{
     DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
-    LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
+    LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
+    TIME_64_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
     TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
-    TIMESTAMP_SECOND_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
-    TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
+    TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
 use crate::variation_const::{FLOAT_16_TYPE_NAME, NULL_TYPE_NAME};
@@ -37,10 +37,10 @@ use datafusion::arrow::datatypes::{
     DataType, Field, Fields, IntervalUnit, Schema, TimeUnit,
 };
 use datafusion::common::{
-    not_impl_err, substrait_datafusion_err, substrait_err, DFSchema,
+    DFSchema, not_impl_err, substrait_datafusion_err, substrait_err,
 };
 use std::sync::Arc;
-use substrait::proto::{r#type, NamedStruct, Type};
+use substrait::proto::{NamedStruct, Type, r#type};
 
 pub(crate) fn from_substrait_type_without_names(
     consumer: &impl SubstraitConsumer,
@@ -90,7 +90,7 @@ pub fn from_substrait_type(
             r#type::Kind::Fp64(_) => Ok(DataType::Float64),
             r#type::Kind::Timestamp(ts) => {
                 // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead
-                #[allow(deprecated)]
+                #[expect(deprecated)]
                 match ts.type_variation_reference {
                     TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
                         Ok(DataType::Timestamp(TimeUnit::Second, None))
@@ -248,20 +248,22 @@ pub fn from_substrait_type(
                 // TODO: remove the code below once the producer has been updated
                 if let Some(name) = consumer.get_extensions().types.get(&u.type_reference)
                 {
-                    #[allow(deprecated)]
+                    #[expect(deprecated)]
                     match name.as_ref() {
                         // Kept for backwards compatibility, producers should use IntervalCompound instead
-                        INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+                        INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
+                            Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+                        }
                         FLOAT_16_TYPE_NAME => Ok(DataType::Float16),
                         NULL_TYPE_NAME => Ok(DataType::Null),
                         _ => not_impl_err!(
-                                "Unsupported Substrait user defined type with ref {} and variation {}",
-                                u.type_reference,
-                                u.type_variation_reference
-                            ),
+                            "Unsupported Substrait user defined type with ref {} and variation {}",
+                            u.type_reference,
+                            u.type_variation_reference
+                        ),
                     }
                 } else {
-                    #[allow(deprecated)]
+                    #[expect(deprecated)]
                     match u.type_reference {
                         // Kept for backwards compatibility, producers should use IntervalYear instead
                         INTERVAL_YEAR_MONTH_TYPE_REF => {
@@ -276,10 +278,10 @@ pub fn from_substrait_type(
                             Ok(DataType::Interval(IntervalUnit::MonthDayNano))
                         }
                         _ => not_impl_err!(
-                        "Unsupported Substrait user defined type with ref {} and variation {}",
-                        u.type_reference,
-                        u.type_variation_reference
-                    ),
+                            "Unsupported Substrait user defined type with ref {} and variation {}",
+                            u.type_reference,
+                            u.type_variation_reference
+                        ),
                     }
                 }
             }
diff --git a/datafusion/substrait/src/logical_plan/consumer/utils.rs b/datafusion/substrait/src/logical_plan/consumer/utils.rs
index f7eedcb7a2b25..9325926c278ad 100644
--- a/datafusion/substrait/src/logical_plan/consumer/utils.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/utils.rs
@@ -18,16 +18,16 @@
 use crate::logical_plan::consumer::SubstraitConsumer;
 use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit, UnionFields};
 use datafusion::common::{
-    exec_err, not_impl_err, substrait_datafusion_err, substrait_err, DFSchema,
-    DFSchemaRef,
+    DFSchema, DFSchemaRef, exec_err, not_impl_err, substrait_datafusion_err,
+    substrait_err,
 };
 use datafusion::logical_expr::expr::Sort;
 use datafusion::logical_expr::{Cast, Expr, ExprSchemable};
 use std::collections::HashSet;
 use std::sync::Arc;
+use substrait::proto::SortField;
 use substrait::proto::sort_field::SortDirection;
 use substrait::proto::sort_field::SortKind::{ComparisonFunctionReference, Direction};
-use substrait::proto::SortField;
 
 // Substrait PrecisionTimestampTz indicates that the timestamp is relative to UTC, which
 // is the same as the expectation for any non-empty timezone in DF, so any non-empty timezone
@@ -246,7 +246,8 @@ pub(super) fn make_renamed_schema(
         return substrait_err!(
             "Names list must match exactly to nested schema, but found {} uses for {} names",
             name_idx,
-            dfs_names.len());
+            dfs_names.len()
+        );
     }
 
     DFSchema::from_field_specific_qualified_schema(
@@ -577,12 +578,12 @@ pub(crate) mod tests {
 
         assert_eq!(renamed_schema.fields().len(), 5);
         assert_eq!(
-            *renamed_schema.field(0),
-            Field::new("a", DataType::Int32, false)
+            renamed_schema.field(0),
+            &Arc::new(Field::new("a", DataType::Int32, false))
         );
         assert_eq!(
-            *renamed_schema.field(1),
-            Field::new_struct(
+            renamed_schema.field(1),
+            &Arc::new(Field::new_struct(
                 "b",
                 vec![
                     Field::new("c", DataType::Int32, false),
@@ -593,11 +594,11 @@ pub(crate) mod tests {
                     )
                 ],
                 false,
-            )
+            ))
         );
         assert_eq!(
-            *renamed_schema.field(2),
-            Field::new_list(
+            renamed_schema.field(2),
+            &Arc::new(Field::new_list(
                 "f",
                 Arc::new(Field::new_struct(
                     "item",
@@ -605,11 +606,11 @@ pub(crate) mod tests {
                     false,
                 )),
                 false,
-            )
+            ))
         );
         assert_eq!(
-            *renamed_schema.field(3),
-            Field::new_large_list(
+            renamed_schema.field(3),
+            &Arc::new(Field::new_large_list(
                 "h",
                 Arc::new(Field::new_struct(
                     "item",
@@ -617,11 +618,11 @@ pub(crate) mod tests {
                     false,
                 )),
                 false,
-            )
+            ))
         );
         assert_eq!(
-            *renamed_schema.field(4),
-            Field::new_map(
+            renamed_schema.field(4),
+            &Arc::new(Field::new_map(
                 "j",
                 "entries",
                 Arc::new(Field::new_struct(
@@ -636,7 +637,7 @@ pub(crate) mod tests {
                 )),
                 false,
                 false,
-            )
+            ))
         );
         Ok(())
     }
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs b/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs
index 1e79897a1b770..3713f8934f19f 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs
@@ -54,7 +54,7 @@ pub fn from_aggregate_function(
         });
     }
     let function_anchor = producer.register_function(func.name().to_string());
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     Ok(Measure {
         measure: Some(AggregateFunction {
             function_reference: function_anchor,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
index 71c2140bac8bf..53d3d3e12c4bf 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{to_substrait_type, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, to_substrait_type};
 use crate::variation_const::DEFAULT_TYPE_VARIATION_REF;
 use datafusion::common::{DFSchemaRef, ScalarValue};
 use datafusion::logical_expr::{Cast, Expr, TryCast};
+use substrait::proto::Expression;
 use substrait::proto::expression::cast::FailureBehavior;
 use substrait::proto::expression::literal::LiteralType;
 use substrait::proto::expression::{Literal, RexType};
-use substrait::proto::Expression;
 
 pub fn from_cast(
     producer: &mut impl SubstraitProducer,
@@ -80,7 +80,7 @@ pub fn from_try_cast(
 mod tests {
     use super::*;
     use crate::logical_plan::producer::{
-        to_substrait_extended_expr, DefaultSubstraitProducer,
+        DefaultSubstraitProducer, to_substrait_extended_expr,
     };
     use datafusion::arrow::datatypes::{DataType, Field};
     use datafusion::common::DFSchema;
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/field_reference.rs b/datafusion/substrait/src/logical_plan/producer/expr/field_reference.rs
index d1d80ca545ff2..b6af7d3bbc8e1 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/field_reference.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/field_reference.rs
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::common::{substrait_err, Column, DFSchemaRef};
+use datafusion::common::{Column, DFSchemaRef, substrait_err};
 use datafusion::logical_expr::Expr;
+use substrait::proto::Expression;
 use substrait::proto::expression::field_reference::{
     ReferenceType, RootReference, RootType,
 };
 use substrait::proto::expression::{
-    reference_segment, FieldReference, ReferenceSegment, RexType,
+    FieldReference, ReferenceSegment, RexType, reference_segment,
 };
-use substrait::proto::Expression;
 
 pub fn from_column(
     col: &Column,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/if_then.rs b/datafusion/substrait/src/logical_plan/producer/expr/if_then.rs
index a34959ead76de..2c10b26436f50 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/if_then.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/if_then.rs
@@ -18,9 +18,9 @@
 use crate::logical_plan::producer::SubstraitProducer;
 use datafusion::common::DFSchemaRef;
 use datafusion::logical_expr::Case;
+use substrait::proto::Expression;
 use substrait::proto::expression::if_then::IfClause;
 use substrait::proto::expression::{IfThen, RexType};
-use substrait::proto::Expression;
 
 pub fn from_case(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
index 1bb24168e57a4..8882c992dca1c 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{to_substrait_type, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, to_substrait_type};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, FLOAT_16_TYPE_NAME,
@@ -25,7 +25,7 @@ use crate::variation_const::{
 };
 use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
 use datafusion::arrow::temporal_conversions::NANOSECONDS;
-use datafusion::common::{exec_err, not_impl_err, ScalarValue};
+use datafusion::common::{ScalarValue, exec_err, not_impl_err};
 use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
 use substrait::proto::expression::literal::map::KeyValue;
 use substrait::proto::expression::literal::{
@@ -33,7 +33,7 @@ use substrait::proto::expression::literal::{
     LiteralType, Map, PrecisionTime, PrecisionTimestamp, Struct,
 };
 use substrait::proto::expression::{Literal, RexType};
-use substrait::proto::{r#type, Expression};
+use substrait::proto::{Expression, r#type};
 
 pub fn from_literal(
     producer: &mut impl SubstraitProducer,
@@ -413,8 +413,8 @@ mod tests {
     use datafusion::arrow::datatypes::{
         DataType, Field, IntervalDayTime, IntervalMonthDayNano,
     };
-    use datafusion::common::scalar::ScalarStructBuilder;
     use datafusion::common::Result;
+    use datafusion::common::scalar::ScalarStructBuilder;
     use datafusion::prelude::SessionContext;
     use std::sync::Arc;
 
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
index f4e43fd586773..5057564d370cf 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
@@ -37,13 +37,13 @@ pub use window_function::*;
 
 use crate::logical_plan::producer::utils::flatten_names;
 use crate::logical_plan::producer::{
-    to_substrait_named_struct, DefaultSubstraitProducer, SubstraitProducer,
+    DefaultSubstraitProducer, SubstraitProducer, to_substrait_named_struct,
 };
 use datafusion::arrow::datatypes::Field;
-use datafusion::common::{internal_err, not_impl_err, DFSchemaRef};
+use datafusion::common::{DFSchemaRef, internal_err, not_impl_err};
 use datafusion::execution::SessionState;
-use datafusion::logical_expr::expr::Alias;
 use datafusion::logical_expr::Expr;
+use datafusion::logical_expr::expr::Alias;
 use substrait::proto::expression_reference::ExprType;
 use substrait::proto::{Expression, ExpressionReference, ExtendedExpression};
 use substrait::version;
@@ -62,7 +62,7 @@ use substrait::version;
 /// message.  The field names of the input schema will be serialized.
 // Silence deprecation warnings for `extension_uris` during the uri -> urn migration
 // See: https://github.com/substrait-io/substrait/issues/856
-#[allow(deprecated)]
+#[expect(deprecated)]
 pub fn to_substrait_extended_expr(
     exprs: &[(&Expr, &Field)],
     schema: &DFSchemaRef,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs b/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs
index abb26f6f66822..bd8a9d9a99b53 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{to_substrait_literal_expr, SubstraitProducer};
-use datafusion::common::{not_impl_err, DFSchemaRef, ScalarValue};
-use datafusion::logical_expr::{expr, Between, BinaryExpr, Expr, Like, Operator};
+use crate::logical_plan::producer::{SubstraitProducer, to_substrait_literal_expr};
+use datafusion::common::{DFSchemaRef, ScalarValue, not_impl_err};
+use datafusion::logical_expr::{Between, BinaryExpr, Expr, Like, Operator, expr};
 use substrait::proto::expression::{RexType, ScalarFunction};
 use substrait::proto::function_argument::ArgType;
 use substrait::proto::{Expression, FunctionArgument};
@@ -37,7 +37,7 @@ pub fn from_scalar_function(
     let arguments = custom_argument_handler(fun.name(), arguments);
 
     let function_anchor = producer.register_function(fun.name().to_string());
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     Ok(Expression {
         rex_type: Some(RexType::ScalarFunction(ScalarFunction {
             function_reference: function_anchor,
@@ -155,7 +155,7 @@ fn make_substrait_like_expr(
         },
     ];
 
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     let substrait_like = Expression {
         rex_type: Some(RexType::ScalarFunction(ScalarFunction {
             function_reference: function_anchor,
@@ -169,7 +169,7 @@ fn make_substrait_like_expr(
     if negated {
         let function_anchor = producer.register_function("not".to_string());
 
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         Ok(Expression {
             rex_type: Some(RexType::ScalarFunction(ScalarFunction {
                 function_reference: function_anchor,
@@ -217,7 +217,7 @@ pub fn make_binary_op_scalar_func(
     op: Operator,
 ) -> Expression {
     let function_anchor = producer.register_function(operator_to_name(op).to_string());
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     Expression {
         rex_type: Some(RexType::ScalarFunction(ScalarFunction {
             function_reference: function_anchor,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/singular_or_list.rs b/datafusion/substrait/src/logical_plan/producer/expr/singular_or_list.rs
index 1c0b6dcc154bc..2d53db6501a5e 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/singular_or_list.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/singular_or_list.rs
@@ -48,7 +48,7 @@ pub fn from_in_list(
     if *negated {
         let function_anchor = producer.register_function("not".to_string());
 
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         Ok(Expression {
             rex_type: Some(RexType::ScalarFunction(ScalarFunction {
                 function_reference: function_anchor,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs b/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs
index c1ee78c68c258..f2e6ff551223c 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs
@@ -54,7 +54,7 @@ pub fn from_in_subquery(
     if *negated {
         let function_anchor = producer.register_function("not".to_string());
 
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         Ok(Expression {
             rex_type: Some(RexType::ScalarFunction(ScalarFunction {
                 function_reference: function_anchor,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/window_function.rs b/datafusion/substrait/src/logical_plan/producer/expr/window_function.rs
index 465479e1e0488..5d5f31cf116b0 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/window_function.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/window_function.rs
@@ -15,16 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::utils::substrait_sort_field;
 use crate::logical_plan::producer::SubstraitProducer;
-use datafusion::common::{not_impl_err, DFSchemaRef, ScalarValue};
+use crate::logical_plan::producer::utils::substrait_sort_field;
+use datafusion::common::{DFSchemaRef, ScalarValue, not_impl_err};
 use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion::logical_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits};
+use substrait::proto::expression::RexType;
+use substrait::proto::expression::WindowFunction as SubstraitWindowFunction;
 use substrait::proto::expression::window_function::bound as SubstraitBound;
 use substrait::proto::expression::window_function::bound::Kind as BoundKind;
 use substrait::proto::expression::window_function::{Bound, BoundsType};
-use substrait::proto::expression::RexType;
-use substrait::proto::expression::WindowFunction as SubstraitWindowFunction;
 use substrait::proto::function_argument::ArgType;
 use substrait::proto::{Expression, FunctionArgument, SortField};
 
@@ -86,7 +86,7 @@ fn make_substrait_window_function(
     bounds: (Bound, Bound),
     bounds_type: BoundsType,
 ) -> Expression {
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     Expression {
         rex_type: Some(RexType::WindowFunction(SubstraitWindowFunction {
             function_reference,
diff --git a/datafusion/substrait/src/logical_plan/producer/plan.rs b/datafusion/substrait/src/logical_plan/producer/plan.rs
index ad8f45ec3606f..9396329f8d3e7 100644
--- a/datafusion/substrait/src/logical_plan/producer/plan.rs
+++ b/datafusion/substrait/src/logical_plan/producer/plan.rs
@@ -16,17 +16,17 @@
 // under the License.
 
 use crate::logical_plan::producer::{
-    to_substrait_named_struct, DefaultSubstraitProducer, SubstraitProducer,
+    DefaultSubstraitProducer, SubstraitProducer, to_substrait_named_struct,
 };
 use datafusion::execution::SessionState;
 use datafusion::logical_expr::{LogicalPlan, SubqueryAlias};
-use substrait::proto::{plan_rel, Plan, PlanRel, Rel, RelRoot};
+use substrait::proto::{Plan, PlanRel, Rel, RelRoot, plan_rel};
 use substrait::version;
 
 /// Convert DataFusion LogicalPlan to Substrait Plan
 // Silence deprecation warnings for `extension_uris` during the uri -> urn migration
 // See: https://github.com/substrait-io/substrait/issues/856
-#[allow(deprecated)]
+#[expect(deprecated)]
 pub fn to_substrait_plan(
     plan: &LogicalPlan,
     state: &SessionState,
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs
index 917959ea7ddae..dec94b0422257 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs
@@ -16,10 +16,11 @@
 // under the License.
 
 use crate::logical_plan::producer::{
-    from_aggregate_function, substrait_field_ref, SubstraitProducer,
+    SubstraitProducer, from_aggregate_function, substrait_field_ref,
 };
-use datafusion::common::{internal_err, not_impl_err, DFSchemaRef, DataFusionError};
+use datafusion::common::{DFSchemaRef, internal_err, not_impl_err};
 use datafusion::logical_expr::expr::Alias;
+use datafusion::logical_expr::utils::powerset;
 use datafusion::logical_expr::{Aggregate, Distinct, Expr, GroupingSet};
 use substrait::proto::aggregate_rel::{Grouping, Measure};
 use substrait::proto::rel::RelType;
@@ -63,7 +64,7 @@ pub fn from_distinct(
                 .map(substrait_field_ref)
                 .collect::<datafusion::common::Result<Vec<_>>>()?;
 
-            #[allow(deprecated)]
+            #[expect(deprecated)]
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Aggregate(Box::new(AggregateRel {
                     common: None,
@@ -91,10 +92,22 @@ pub fn to_substrait_groupings(
     let groupings = match exprs.len() {
         1 => match &exprs[0] {
             Expr::GroupingSet(gs) => match gs {
-                GroupingSet::Cube(_) => Err(DataFusionError::NotImplemented(
-                    "GroupingSet CUBE is not yet supported".to_string(),
-                )),
-                GroupingSet::GroupingSets(sets) => Ok(sets
+                GroupingSet::Cube(set) => {
+                    // Generate power set of grouping expressions
+                    let cube_sets = powerset(set)?;
+                    cube_sets
+                        .iter()
+                        .map(|set| {
+                            parse_flat_grouping_exprs(
+                                producer,
+                                &set.iter().map(|v| (*v).clone()).collect::<Vec<_>>(),
+                                schema,
+                                &mut ref_group_exprs,
+                            )
+                        })
+                        .collect::<datafusion::common::Result<Vec<_>>>()
+                }
+                GroupingSet::GroupingSets(sets) => sets
                     .iter()
                     .map(|set| {
                         parse_flat_grouping_exprs(
@@ -104,14 +117,13 @@ pub fn to_substrait_groupings(
                             &mut ref_group_exprs,
                         )
                     })
-                    .collect::<datafusion::common::Result<Vec<_>>>()?),
+                    .collect::<datafusion::common::Result<Vec<_>>>(),
                 GroupingSet::Rollup(set) => {
                     let mut sets: Vec<Vec<Expr>> = vec![vec![]];
                     for i in 0..set.len() {
                         sets.push(set[..=i].to_vec());
                     }
-                    Ok(sets
-                        .iter()
+                    sets.iter()
                         .rev()
                         .map(|set| {
                             parse_flat_grouping_exprs(
@@ -121,7 +133,7 @@ pub fn to_substrait_groupings(
                                 &mut ref_group_exprs,
                             )
                         })
-                        .collect::<datafusion::common::Result<Vec<_>>>()?)
+                        .collect::<datafusion::common::Result<Vec<_>>>()
                 }
             },
             _ => Ok(vec![parse_flat_grouping_exprs(
@@ -156,7 +168,7 @@ pub fn parse_flat_grouping_exprs(
         ref_group_exprs.push(rex);
         expression_references.push((ref_group_exprs.len() - 1) as u32);
     }
-    #[allow(deprecated)]
+    #[expect(deprecated)]
     Ok(Grouping {
         grouping_expressions,
         expression_references,
@@ -169,7 +181,9 @@ pub fn to_substrait_agg_measure(
     schema: &DFSchemaRef,
 ) -> datafusion::common::Result<Measure> {
     match expr {
-        Expr::AggregateFunction(agg_fn) => from_aggregate_function(producer, agg_fn, schema),
+        Expr::AggregateFunction(agg_fn) => {
+            from_aggregate_function(producer, agg_fn, schema)
+        }
         Expr::Alias(Alias { expr, .. }) => {
             to_substrait_agg_measure(producer, expr, schema)
         }
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/exchange_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/exchange_rel.rs
index 9e0ef8905f432..50c4b3da86cbe 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/exchange_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/exchange_rel.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::logical_plan::producer::{
-    try_to_substrait_field_reference, SubstraitProducer,
+    SubstraitProducer, try_to_substrait_field_reference,
 };
 use datafusion::common::not_impl_err;
 use datafusion::logical_expr::{Partitioning, Repartition};
@@ -35,7 +35,7 @@ pub fn from_repartition(
         Partitioning::DistributeBy(_) => {
             return not_impl_err!(
                 "Physical plan does not support DistributeBy partitioning"
-            )
+            );
         }
     };
     // ref: https://substrait.io/relations/physical_relations/#exchange-types
@@ -53,7 +53,7 @@ pub fn from_repartition(
         Partitioning::DistributeBy(_) => {
             return not_impl_err!(
                 "Physical plan does not support DistributeBy partitioning"
-            )
+            );
         }
     };
     let exchange_rel = ExchangeRel {
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/fetch_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/fetch_rel.rs
index 4706401d558ec..e878b3816ff42 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/fetch_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/fetch_rel.rs
@@ -20,7 +20,7 @@ use datafusion::common::DFSchema;
 use datafusion::logical_expr::Limit;
 use std::sync::Arc;
 use substrait::proto::rel::RelType;
-use substrait::proto::{fetch_rel, FetchRel, Rel};
+use substrait::proto::{FetchRel, Rel, fetch_rel};
 
 pub fn from_limit(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/join.rs b/datafusion/substrait/src/logical_plan/producer/rel/join.rs
index 835d3ee37a459..cbf5593ffc86c 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/join.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/join.rs
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{make_binary_op_scalar_func, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, make_binary_op_scalar_func};
 use datafusion::common::{
-    not_impl_err, DFSchemaRef, JoinConstraint, JoinType, NullEquality,
+    DFSchemaRef, JoinConstraint, JoinType, NullEquality, not_impl_err,
 };
 use datafusion::logical_expr::{Expr, Join, Operator};
 use std::sync::Arc;
 use substrait::proto::rel::RelType;
-use substrait::proto::{join_rel, Expression, JoinRel, Rel};
+use substrait::proto::{Expression, JoinRel, Rel, join_rel};
 
 pub fn from_join(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/project_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/project_rel.rs
index 0190dca12bf53..33920cdf86f7a 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/project_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/project_rel.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{substrait_field_ref, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, substrait_field_ref};
 use datafusion::logical_expr::{Projection, Window};
 use substrait::proto::rel::RelType;
 use substrait::proto::rel_common::EmitKind;
 use substrait::proto::rel_common::EmitKind::Emit;
-use substrait::proto::{rel_common, ProjectRel, Rel, RelCommon};
+use substrait::proto::{ProjectRel, Rel, RelCommon, rel_common};
 
 pub fn from_projection(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
index 4b2e3782108b6..8dfbb36d3767d 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
@@ -16,19 +16,83 @@
 // under the License.
 
 use crate::logical_plan::producer::{
-    to_substrait_literal, to_substrait_named_struct, SubstraitProducer,
+    SubstraitProducer, to_substrait_literal, to_substrait_named_struct,
 };
-use datafusion::common::{not_impl_err, substrait_datafusion_err, DFSchema, ToDFSchema};
+use datafusion::common::{DFSchema, ToDFSchema, substrait_datafusion_err};
 use datafusion::logical_expr::utils::conjunction;
 use datafusion::logical_expr::{EmptyRelation, Expr, TableScan, Values};
+use datafusion::scalar::ScalarValue;
 use std::sync::Arc;
-use substrait::proto::expression::literal::Struct;
-use substrait::proto::expression::mask_expression::{StructItem, StructSelect};
 use substrait::proto::expression::MaskExpression;
+use substrait::proto::expression::literal::Struct as LiteralStruct;
+use substrait::proto::expression::mask_expression::{StructItem, StructSelect};
+use substrait::proto::expression::nested::Struct as NestedStruct;
 use substrait::proto::read_rel::{NamedTable, ReadType, VirtualTable};
 use substrait::proto::rel::RelType;
 use substrait::proto::{ReadRel, Rel};
 
+/// Converts rows of literal expressions into Substrait literal structs.
+///
+/// Each row is expected to contain only `Expr::Literal` or `Expr::Alias` wrapping literals.
+/// Aliases are unwrapped and the underlying literal is converted.
+fn convert_literal_rows(
+    producer: &mut impl SubstraitProducer,
+    rows: &[Vec<Expr>],
+) -> datafusion::common::Result<Vec<LiteralStruct>> {
+    rows.iter()
+        .map(|row| {
+            let fields = row
+                .iter()
+                .map(|expr| match expr {
+                    Expr::Literal(sv, _) => to_substrait_literal(producer, sv),
+                    Expr::Alias(alias) => match alias.expr.as_ref() {
+                        // The schema gives us the names, so we can skip aliases
+                        Expr::Literal(sv, _) => to_substrait_literal(producer, sv),
+                        _ => Err(substrait_datafusion_err!(
+                            "Only literal types can be aliased in Virtual Tables, got: {}",
+                            alias.expr.variant_name()
+                        )),
+                    },
+                    _ => Err(substrait_datafusion_err!(
+                        "Only literal types and aliases are supported in Virtual Tables, got: {}",
+                        expr.variant_name()
+                    )),
+                })
+                .collect::<datafusion::common::Result<_>>()?;
+            Ok(LiteralStruct { fields })
+        })
+        .collect()
+}
+
+/// Converts rows of arbitrary expressions into Substrait nested structs.
+///
+/// Validates that each row has the expected schema length and converts each expression
+/// using the producer's expression handler.
+fn convert_expression_rows(
+    producer: &mut impl SubstraitProducer,
+    rows: &[Vec<Expr>],
+    schema_len: usize,
+    empty_schema: &Arc<DFSchema>,
+) -> datafusion::common::Result<Vec<NestedStruct>> {
+    rows.iter()
+        .map(|row| {
+            if row.len() != schema_len {
+                return Err(substrait_datafusion_err!(
+                    "Names list must match exactly to nested schema, but found {} uses for {} names",
+                    row.len(),
+                    schema_len
+                ));
+            }
+
+            let fields = row
+                .iter()
+                .map(|expr| producer.handle_expr(expr, empty_schema))
+                .collect::<datafusion::common::Result<_>>()?;
+            Ok(NestedStruct { fields })
+        })
+        .collect()
+}
+
 pub fn from_table_scan(
     producer: &mut impl SubstraitProducer,
     scan: &TableScan,
@@ -83,26 +147,61 @@ pub fn from_table_scan(
     }))
 }
 
+/// Encodes an EmptyRelation as a Substrait VirtualTable.
+///
+/// EmptyRelation represents a relation with no input data. When `produce_one_row` is true,
+/// it generates a single row with all fields set to their default values (typically NULL).
+/// This is used for queries without a FROM clause, such as "SELECT 1 AS one" or
+/// "SELECT current_timestamp()".
+///
+/// When `produce_one_row` is false, it represents a truly empty relation with no rows,
+/// used in optimizations or as a placeholder.
 pub fn from_empty_relation(
     producer: &mut impl SubstraitProducer,
     e: &EmptyRelation,
 ) -> datafusion::common::Result<Box<Rel>> {
-    if e.produce_one_row {
-        return not_impl_err!("Producing a row from empty relation is unsupported");
-    }
-    #[allow(deprecated)]
+    let base_schema = to_substrait_named_struct(producer, &e.schema)?;
+
+    let read_type = if e.produce_one_row {
+        // Create one row with default scalar values for each field in the schema.
+        // For example, an Int32 field gets Int32(NULL), a Utf8 field gets Utf8(NULL), etc.
+        // This represents the "phantom row" that provides a context for evaluating
+        // scalar expressions in queries without a FROM clause.
+        let fields = e
+            .schema
+            .fields()
+            .iter()
+            .map(|f| {
+                let scalar = ScalarValue::try_from(f.data_type())?;
+                to_substrait_literal(producer, &scalar)
+            })
+            .collect::<datafusion::common::Result<_>>()?;
+
+        ReadType::VirtualTable(VirtualTable {
+            // Use deprecated 'values' field instead of 'expressions' because the consumer's
+            // nested expression support (RexType::Nested) is not yet implemented.
+            // The 'values' field uses literal::Struct which the consumer can properly
+            // deserialize with field name preservation.
+            #[expect(deprecated)]
+            values: vec![LiteralStruct { fields }],
+            expressions: vec![],
+        })
+    } else {
+        ReadType::VirtualTable(VirtualTable {
+            #[expect(deprecated)]
+            values: vec![],
+            expressions: vec![],
+        })
+    };
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
-            base_schema: Some(to_substrait_named_struct(producer, &e.schema)?),
+            base_schema: Some(base_schema),
             filter: None,
             best_effort_filter: None,
             projection: None,
             advanced_extension: None,
-            read_type: Some(ReadType::VirtualTable(VirtualTable {
-                values: vec![],
-                expressions: vec![],
-            })),
+            read_type: Some(read_type),
         }))),
     }))
 }
@@ -111,30 +210,25 @@ pub fn from_values(
     producer: &mut impl SubstraitProducer,
     v: &Values,
 ) -> datafusion::common::Result<Box<Rel>> {
-    let values = v
-        .values
-        .iter()
-        .map(|row| {
-            let fields = row
-                .iter()
-                .map(|v| match v {
-                    Expr::Literal(sv, _) => to_substrait_literal(producer, sv),
-                    Expr::Alias(alias) => match alias.expr.as_ref() {
-                        // The schema gives us the names, so we can skip aliases
-                        Expr::Literal(sv, _) => to_substrait_literal(producer, sv),
-                        _ => Err(substrait_datafusion_err!(
-                                    "Only literal types can be aliased in Virtual Tables, got: {}", alias.expr.variant_name()
-                                )),
-                    },
-                    _ => Err(substrait_datafusion_err!(
-                                "Only literal types and aliases are supported in Virtual Tables, got: {}", v.variant_name()
-                            )),
-                })
-                .collect::<datafusion::common::Result<_>>()?;
-            Ok(Struct { fields })
+    let schema_len = v.schema.fields().len();
+    let empty_schema = Arc::new(DFSchema::empty());
+
+    let use_literals = v.values.iter().all(|row| {
+        row.iter().all(|expr| match expr {
+            Expr::Literal(_, _) => true,
+            Expr::Alias(alias) => matches!(alias.expr.as_ref(), Expr::Literal(_, _)),
+            _ => false,
         })
-        .collect::<datafusion::common::Result<_>>()?;
-    #[allow(deprecated)]
+    });
+
+    let (values, expressions) = if use_literals {
+        let values = convert_literal_rows(producer, &v.values)?;
+        (values, vec![])
+    } else {
+        let expressions =
+            convert_expression_rows(producer, &v.values, schema_len, &empty_schema)?;
+        (vec![], expressions)
+    };
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
@@ -143,9 +237,10 @@ pub fn from_values(
             best_effort_filter: None,
             projection: None,
             advanced_extension: None,
+            #[expect(deprecated)]
             read_type: Some(ReadType::VirtualTable(VirtualTable {
                 values,
-                expressions: vec![],
+                expressions,
             })),
         }))),
     }))
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/set_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/set_rel.rs
index 58ddfca3617ae..41482c11854bb 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/set_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/set_rel.rs
@@ -18,7 +18,7 @@
 use crate::logical_plan::producer::SubstraitProducer;
 use datafusion::logical_expr::Union;
 use substrait::proto::rel::RelType;
-use substrait::proto::{set_rel, Rel, SetRel};
+use substrait::proto::{Rel, SetRel, set_rel};
 
 pub fn from_union(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/sort_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/sort_rel.rs
index aaa8be1635600..d4520a4c37b14 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/sort_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/sort_rel.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::{substrait_sort_field, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, substrait_sort_field};
 use crate::variation_const::DEFAULT_TYPE_VARIATION_REF;
 use datafusion::logical_expr::Sort;
 use substrait::proto::expression::literal::LiteralType;
 use substrait::proto::expression::{Literal, RexType};
 use substrait::proto::rel::RelType;
-use substrait::proto::{fetch_rel, Expression, FetchRel, Rel, SortRel};
+use substrait::proto::{Expression, FetchRel, Rel, SortRel, fetch_rel};
 
 pub fn from_sort(
     producer: &mut impl SubstraitProducer,
diff --git a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
index db08e0f7bfd0c..ffc920ffe609e 100644
--- a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
@@ -24,14 +24,14 @@ use crate::logical_plan::producer::{
     from_subquery_alias, from_table_scan, from_try_cast, from_unary_expr, from_union,
     from_values, from_window, from_window_function, to_substrait_rel, to_substrait_rex,
 };
-use datafusion::common::{substrait_err, Column, DFSchemaRef, ScalarValue};
-use datafusion::execution::registry::SerializerRegistry;
+use datafusion::common::{Column, DFSchemaRef, ScalarValue, substrait_err};
 use datafusion::execution::SessionState;
+use datafusion::execution::registry::SerializerRegistry;
 use datafusion::logical_expr::expr::{Alias, InList, InSubquery, WindowFunction};
 use datafusion::logical_expr::{
-    expr, Aggregate, Between, BinaryExpr, Case, Cast, Distinct, EmptyRelation, Expr,
-    Extension, Filter, Join, Like, Limit, LogicalPlan, Projection, Repartition, Sort,
-    SubqueryAlias, TableScan, TryCast, Union, Values, Window,
+    Aggregate, Between, BinaryExpr, Case, Cast, Distinct, EmptyRelation, Expr, Extension,
+    Filter, Join, Like, Limit, LogicalPlan, Projection, Repartition, Sort, SubqueryAlias,
+    TableScan, TryCast, Union, Values, Window, expr,
 };
 use pbjson_types::Any as ProtoAny;
 use substrait::proto::aggregate_rel::Measure;
@@ -67,11 +67,11 @@ use substrait::proto::{
 /// impl SubstraitProducer for CustomSubstraitProducer {
 ///
 ///     fn register_function(&mut self, signature: String) -> u32 {
-///        self.extensions.register_function(signature)
+///        self.extensions.register_function(&signature)
 ///     }
 ///
 ///     fn register_type(&mut self, type_name: String) -> u32 {
-///         self.extensions.register_type(type_name)
+///         self.extensions.register_type(&type_name)
 ///     }
 ///
 ///     fn get_extensions(self) -> Extensions {
@@ -224,7 +224,9 @@ pub trait SubstraitProducer: Send + Sync + Sized {
         &mut self,
         _plan: &Extension,
     ) -> datafusion::common::Result<Box<Rel>> {
-        substrait_err!("Specify handling for LogicalPlan::Extension by implementing the SubstraitProducer trait")
+        substrait_err!(
+            "Specify handling for LogicalPlan::Extension by implementing the SubstraitProducer trait"
+        )
     }
 
     // Expression Methods
@@ -377,11 +379,11 @@ impl<'a> DefaultSubstraitProducer<'a> {
 
 impl SubstraitProducer for DefaultSubstraitProducer<'_> {
     fn register_function(&mut self, fn_name: String) -> u32 {
-        self.extensions.register_function(fn_name)
+        self.extensions.register_function(&fn_name)
     }
 
     fn register_type(&mut self, type_name: String) -> u32 {
-        self.extensions.register_type(type_name)
+        self.extensions.register_type(&type_name)
     }
 
     fn get_extensions(self) -> Extensions {
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs
index 0613ed07be2a5..3727596119bc3 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::logical_plan::producer::utils::flatten_names;
-use crate::logical_plan::producer::{to_substrait_precision, SubstraitProducer};
+use crate::logical_plan::producer::{SubstraitProducer, to_substrait_precision};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
@@ -28,8 +28,8 @@ use crate::variation_const::{
     UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
 use datafusion::arrow::datatypes::{DataType, IntervalUnit};
-use datafusion::common::{not_impl_err, plan_err, DFSchemaRef};
-use substrait::proto::{r#type, NamedStruct};
+use datafusion::common::{DFSchemaRef, not_impl_err, plan_err};
+use substrait::proto::{NamedStruct, r#type};
 
 pub(crate) fn to_substrait_type(
     producer: &mut impl SubstraitProducer,
@@ -386,8 +386,8 @@ mod tests {
     use super::*;
     use crate::logical_plan::consumer::tests::test_consumer;
     use crate::logical_plan::consumer::{
-        from_substrait_named_struct, from_substrait_type_without_names,
-        DefaultSubstraitConsumer,
+        DefaultSubstraitConsumer, from_substrait_named_struct,
+        from_substrait_type_without_names,
     };
     use crate::logical_plan::producer::DefaultSubstraitProducer;
     use datafusion::arrow::datatypes::{Field, Fields, Schema, TimeUnit};
diff --git a/datafusion/substrait/src/logical_plan/producer/utils.rs b/datafusion/substrait/src/logical_plan/producer/utils.rs
index 9f96b88d084fe..820c14809dd7f 100644
--- a/datafusion/substrait/src/logical_plan/producer/utils.rs
+++ b/datafusion/substrait/src/logical_plan/producer/utils.rs
@@ -17,10 +17,10 @@
 
 use crate::logical_plan::producer::SubstraitProducer;
 use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
-use datafusion::common::{plan_err, DFSchemaRef};
+use datafusion::common::{DFSchemaRef, plan_err};
 use datafusion::logical_expr::SortExpr;
-use substrait::proto::sort_field::{SortDirection, SortKind};
 use substrait::proto::SortField;
+use substrait::proto::sort_field::{SortDirection, SortKind};
 
 // Substrait wants a list of all field names, including nested fields from structs,
 // also from within e.g. lists and maps. However, it does not want the list and map field names
diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs
index 45a19cea80cfc..ac0f26722513c 100644
--- a/datafusion/substrait/src/physical_plan/consumer.rs
+++ b/datafusion/substrait/src/physical_plan/consumer.rs
@@ -37,11 +37,11 @@ use async_recursion::async_recursion;
 use chrono::DateTime;
 use datafusion::datasource::memory::DataSourceExec;
 use object_store::ObjectMeta;
-use substrait::proto::r#type::{Kind, Nullability};
-use substrait::proto::read_rel::local_files::file_or_files::PathType;
 use substrait::proto::Type;
+use substrait::proto::read_rel::local_files::file_or_files::PathType;
+use substrait::proto::r#type::{Kind, Nullability};
 use substrait::proto::{
-    expression::MaskExpression, read_rel::ReadType, rel::RelType, Rel,
+    Rel, expression::MaskExpression, read_rel::ReadType, rel::RelType,
 };
 
 /// Convert Substrait Rel to DataFusion ExecutionPlan
@@ -53,7 +53,6 @@ pub async fn from_substrait_rel(
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let mut base_config_builder;
 
-    let source = Arc::new(ParquetSource::default());
     match &rel.rel_type {
         Some(RelType::Read(read)) => {
             if read.filter.is_some() || read.best_effort_filter.is_some() {
@@ -80,9 +79,10 @@ pub async fn from_substrait_rel(
                 .collect::<Result<Vec<Field>>>()
             {
                 Ok(fields) => {
+                    let schema = Arc::new(Schema::new(fields));
+                    let source = Arc::new(ParquetSource::new(Arc::clone(&schema)));
                     base_config_builder = FileScanConfigBuilder::new(
                         ObjectStoreUrl::local_filesystem(),
-                        Arc::new(Schema::new(fields)),
                         source,
                     );
                 }
@@ -144,16 +144,16 @@ pub async fn from_substrait_rel(
                     base_config_builder =
                         base_config_builder.with_file_groups(file_groups);
 
-                    if let Some(MaskExpression { select, .. }) = &read.projection {
-                        if let Some(projection) = &select.as_ref() {
-                            let column_indices: Vec<usize> = projection
-                                .struct_items
-                                .iter()
-                                .map(|item| item.field as usize)
-                                .collect();
-                            base_config_builder = base_config_builder
-                                .with_projection_indices(Some(column_indices));
-                        }
+                    if let Some(MaskExpression { select, .. }) = &read.projection
+                        && let Some(projection) = &select.as_ref()
+                    {
+                        let column_indices: Vec<usize> = projection
+                            .struct_items
+                            .iter()
+                            .map(|item| item.field as usize)
+                            .collect();
+                        base_config_builder = base_config_builder
+                            .with_projection_indices(Some(column_indices))?;
                     }
 
                     Ok(
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index 20d41c2e6112a..7a2da70352b00 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -25,23 +25,23 @@ use crate::variation_const::{
 use datafusion::arrow::datatypes::DataType;
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::error::{DataFusionError, Result};
-use datafusion::physical_plan::{displayable, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, displayable};
 
 use datafusion::datasource::physical_plan::ParquetSource;
-use substrait::proto::expression::mask_expression::{StructItem, StructSelect};
+use substrait::proto::ReadRel;
+use substrait::proto::Rel;
 use substrait::proto::expression::MaskExpression;
-use substrait::proto::r#type::{
-    Binary, Boolean, Fp64, Kind, Nullability, String as SubstraitString, Struct, I64,
-};
-use substrait::proto::read_rel::local_files::file_or_files::ParquetReadOptions;
-use substrait::proto::read_rel::local_files::file_or_files::{FileFormat, PathType};
-use substrait::proto::read_rel::local_files::FileOrFiles;
+use substrait::proto::expression::mask_expression::{StructItem, StructSelect};
 use substrait::proto::read_rel::LocalFiles;
 use substrait::proto::read_rel::ReadType;
+use substrait::proto::read_rel::local_files::FileOrFiles;
+use substrait::proto::read_rel::local_files::file_or_files::ParquetReadOptions;
+use substrait::proto::read_rel::local_files::file_or_files::{FileFormat, PathType};
 use substrait::proto::rel::RelType;
-use substrait::proto::ReadRel;
-use substrait::proto::Rel;
-use substrait::proto::{extensions, NamedStruct, Type};
+use substrait::proto::r#type::{
+    Binary, Boolean, Fp64, I64, Kind, Nullability, String as SubstraitString, Struct,
+};
+use substrait::proto::{NamedStruct, Type, extensions};
 
 /// Convert DataFusion ExecutionPlan to Substrait Rel
 pub fn to_substrait_rel(
@@ -51,85 +51,84 @@ pub fn to_substrait_rel(
         HashMap<String, u32>,
     ),
 ) -> Result<Box<Rel>> {
-    if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>() {
-        if let Some((file_config, _)) =
+    if let Some(data_source_exec) = plan.as_any().downcast_ref::<DataSourceExec>()
+        && let Some((file_config, _)) =
             data_source_exec.downcast_to_file_source::<ParquetSource>()
-        {
-            let mut substrait_files = vec![];
-            for (partition_index, files) in file_config.file_groups.iter().enumerate() {
-                for file in files.iter() {
-                    substrait_files.push(FileOrFiles {
-                        partition_index: partition_index.try_into().unwrap(),
-                        start: 0,
-                        length: file.object_meta.size,
-                        path_type: Some(PathType::UriPath(
-                            file.object_meta.location.as_ref().to_string(),
-                        )),
-                        file_format: Some(FileFormat::Parquet(ParquetReadOptions {})),
-                    });
-                }
+    {
+        let mut substrait_files = vec![];
+        for (partition_index, files) in file_config.file_groups.iter().enumerate() {
+            for file in files.iter() {
+                substrait_files.push(FileOrFiles {
+                    partition_index: partition_index.try_into().unwrap(),
+                    start: 0,
+                    length: file.object_meta.size,
+                    path_type: Some(PathType::UriPath(
+                        file.object_meta.location.as_ref().to_string(),
+                    )),
+                    file_format: Some(FileFormat::Parquet(ParquetReadOptions {})),
+                });
             }
+        }
 
-            let mut names = vec![];
-            let mut types = vec![];
+        let mut names = vec![];
+        let mut types = vec![];
 
-            for field in file_config.file_schema().fields.iter() {
-                match to_substrait_type(field.data_type(), field.is_nullable()) {
-                    Ok(t) => {
-                        names.push(field.name().clone());
-                        types.push(t);
-                    }
-                    Err(e) => return Err(e),
+        for field in file_config.file_schema().fields.iter() {
+            match to_substrait_type(field.data_type(), field.is_nullable()) {
+                Ok(t) => {
+                    names.push(field.name().clone());
+                    types.push(t);
                 }
+                Err(e) => return Err(e),
             }
+        }
 
-            let type_info = Struct {
-                types,
-                // FIXME: duckdb doesn't set this field, keep it as default variant 0.
-                // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1106-L1127
-                type_variation_reference: 0,
-                nullability: Nullability::Required.into(),
-            };
+        let type_info = Struct {
+            types,
+            // FIXME: duckdb doesn't set this field, keep it as default variant 0.
+            // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1106-L1127
+            type_variation_reference: 0,
+            nullability: Nullability::Required.into(),
+        };
 
-            let mut select_struct = None;
-            if let Some(projection) = file_config.projection_exprs.as_ref() {
-                let struct_items = projection
-                    .column_indices()
-                    .into_iter()
-                    .map(|index| StructItem {
-                        field: index as i32,
-                        // FIXME: duckdb sets this to None, but it's not clear why.
-                        // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
-                        child: None,
-                    })
-                    .collect();
+        let mut select_struct = None;
+        if let Some(projection) = file_config.file_source().projection().as_ref() {
+            let struct_items = projection
+                .column_indices()
+                .into_iter()
+                .map(|index| StructItem {
+                    field: index as i32,
+                    // FIXME: duckdb sets this to None, but it's not clear why.
+                    // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
+                    child: None,
+                })
+                .collect();
 
-                select_struct = Some(StructSelect { struct_items });
-            }
+            select_struct = Some(StructSelect { struct_items });
+        }
 
-            return Ok(Box::new(Rel {
-                rel_type: Some(RelType::Read(Box::new(ReadRel {
-                    common: None,
-                    base_schema: Some(NamedStruct {
-                        names,
-                        r#struct: Some(type_info),
-                    }),
-                    filter: None,
-                    best_effort_filter: None,
-                    projection: Some(MaskExpression {
-                        select: select_struct,
-                        // FIXME: duckdb set this to true, but it's not clear why.
-                        // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1186.
-                        maintain_singular_struct: true,
-                    }),
+        return Ok(Box::new(Rel {
+            rel_type: Some(RelType::Read(Box::new(ReadRel {
+                common: None,
+                base_schema: Some(NamedStruct {
+                    names,
+                    r#struct: Some(type_info),
+                }),
+                filter: None,
+                best_effort_filter: None,
+                projection: Some(MaskExpression {
+                    select: select_struct,
+                    // FIXME: duckdb set this to true, but it's not clear why.
+                    // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1186.
+                    maintain_singular_struct: true,
+                }),
+                advanced_extension: None,
+                read_type: Some(ReadType::LocalFiles(LocalFiles {
+                    items: substrait_files,
                     advanced_extension: None,
-                    read_type: Some(ReadType::LocalFiles(LocalFiles {
-                        items: substrait_files,
-                        advanced_extension: None,
-                    })),
-                }))),
-            }));
-        }
+                })),
+            }))),
+        }));
     }
     Err(DataFusionError::Substrait(format!(
         "Unsupported plan in Substrait physical plan producer: {}",
diff --git a/datafusion/substrait/src/serializer.rs b/datafusion/substrait/src/serializer.rs
index 4a9e5d55ce055..ee71bc3121afe 100644
--- a/datafusion/substrait/src/serializer.rs
+++ b/datafusion/substrait/src/serializer.rs
@@ -46,6 +46,7 @@ pub async fn serialize(
         .open(path)
         .await?;
     file.write_all(&protobuf_out).await?;
+    file.flush().await?;
     Ok(())
 }
 
diff --git a/datafusion/substrait/tests/cases/aggregation_tests.rs b/datafusion/substrait/tests/cases/aggregation_tests.rs
index 815550bca5b89..92a41850b208d 100644
--- a/datafusion/substrait/tests/cases/aggregation_tests.rs
+++ b/datafusion/substrait/tests/cases/aggregation_tests.rs
@@ -35,10 +35,10 @@ mod tests {
 
         assert_snapshot!(
             plan,
-            @r#"
-                Aggregate: groupBy=[[]], aggr=[[sum(c0) AS summation]]
-                  EmptyRelation: rows=0
-                "#
+            @r"
+        Aggregate: groupBy=[[]], aggr=[[sum(c0) AS summation]]
+          EmptyRelation: rows=0
+        "
         );
 
         // Trigger execution to ensure plan validity
@@ -57,10 +57,10 @@ mod tests {
 
         assert_snapshot!(
             plan,
-            @r#"
-                Aggregate: groupBy=[[c0]], aggr=[[sum(c0) AS summation]]
-                  EmptyRelation: rows=0
-                "#
+            @r"
+        Aggregate: groupBy=[[c0]], aggr=[[sum(c0) AS summation]]
+          EmptyRelation: rows=0
+        "
         );
 
         // Trigger execution to ensure plan validity
diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index a92fc2957cae3..194098cf060e3 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -53,13 +53,13 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER
-              Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST
-                Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]]
-                  Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT
-                    Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }")
-                      TableScan: LINEITEM
-            "#
+        Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER
+          Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST
+            Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]]
+              Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT
+                Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }")
+                  TableScan: LINEITEM
+        "#
                 );
         Ok(())
     }
@@ -70,31 +70,31 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Limit: skip=0, fetch=100
-              Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST
-                Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT
-                  Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8("%BRASS") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") AND PARTSUPP.PS_SUPPLYCOST = (<subquery>)
-                    Subquery:
-                      Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]]
-                        Projection: PARTSUPP.PS_SUPPLYCOST
-                          Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE")
+        Limit: skip=0, fetch=100
+          Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST
+            Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT
+              Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8("%BRASS") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") AND PARTSUPP.PS_SUPPLYCOST = (<subquery>)
+                Subquery:
+                  Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]]
+                    Projection: PARTSUPP.PS_SUPPLYCOST
+                      Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE")
+                        Cross Join: 
+                          Cross Join: 
                             Cross Join: 
-                              Cross Join: 
-                                Cross Join: 
-                                  TableScan: PARTSUPP
-                                  TableScan: SUPPLIER
-                                TableScan: NATION
-                              TableScan: REGION
+                              TableScan: PARTSUPP
+                              TableScan: SUPPLIER
+                            TableScan: NATION
+                          TableScan: REGION
+                Cross Join: 
+                  Cross Join: 
                     Cross Join: 
                       Cross Join: 
-                        Cross Join: 
-                          Cross Join: 
-                            TableScan: PART
-                            TableScan: SUPPLIER
-                          TableScan: PARTSUPP
-                        TableScan: NATION
-                      TableScan: REGION
-            "#
+                        TableScan: PART
+                        TableScan: SUPPLIER
+                      TableScan: PARTSUPP
+                    TableScan: NATION
+                  TableScan: REGION
+        "#
                 );
         Ok(())
     }
@@ -105,19 +105,19 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY
-              Limit: skip=0, fetch=10
-                Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST
-                  Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY
-                    Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
-                      Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
-                        Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32)
-                          Cross Join: 
-                            Cross Join: 
-                              TableScan: LINEITEM
-                              TableScan: CUSTOMER
-                            TableScan: ORDERS
-            "#
+        Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY
+          Limit: skip=0, fetch=10
+            Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST
+              Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY
+                Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
+                  Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
+                    Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32)
+                      Cross Join: 
+                        Cross Join: 
+                          TableScan: LINEITEM
+                          TableScan: CUSTOMER
+                        TableScan: ORDERS
+        "#
                 );
         Ok(())
     }
@@ -128,16 +128,16 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT
-              Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST
-                Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]]
-                  Projection: ORDERS.O_ORDERPRIORITY
-                    Filter: ORDERS.O_ORDERDATE >= CAST(Utf8("1993-07-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1993-10-01") AS Date32) AND EXISTS (<subquery>)
-                      Subquery:
-                        Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE
-                          TableScan: LINEITEM
-                      TableScan: ORDERS
-            "#
+        Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT
+          Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST
+            Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]]
+              Projection: ORDERS.O_ORDERPRIORITY
+                Filter: ORDERS.O_ORDERDATE >= CAST(Utf8("1993-07-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1993-10-01") AS Date32) AND EXISTS (<subquery>)
+                  Subquery:
+                    Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE
+                      TableScan: LINEITEM
+                  TableScan: ORDERS
+        "#
                 );
         Ok(())
     }
@@ -148,23 +148,23 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE
-              Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST
-                Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
-                  Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
-                    Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32)
+        Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE
+          Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST
+            Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
+              Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
+                Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32)
+                  Cross Join: 
+                    Cross Join: 
                       Cross Join: 
                         Cross Join: 
                           Cross Join: 
-                            Cross Join: 
-                              Cross Join: 
-                                TableScan: CUSTOMER
-                                TableScan: ORDERS
-                              TableScan: LINEITEM
-                            TableScan: SUPPLIER
-                          TableScan: NATION
-                        TableScan: REGION
-            "#
+                            TableScan: CUSTOMER
+                            TableScan: ORDERS
+                          TableScan: LINEITEM
+                        TableScan: SUPPLIER
+                      TableScan: NATION
+                    TableScan: REGION
+        "#
                 );
         Ok(())
     }
@@ -175,11 +175,11 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]]
-              Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT
-                Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2))
-                  TableScan: LINEITEM
-            "#
+        Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]]
+          Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT
+            Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2))
+              TableScan: LINEITEM
+        "#
                 );
         Ok(())
     }
@@ -214,21 +214,21 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT
-              Limit: skip=0, fetch=20
-                Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST
-                  Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT
-                    Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
-                      Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
-                        Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY
+        Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT
+          Limit: skip=0, fetch=20
+            Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST
+              Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT
+                Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
+                  Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
+                    Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY
+                      Cross Join: 
+                        Cross Join: 
                           Cross Join: 
-                            Cross Join: 
-                              Cross Join: 
-                                TableScan: CUSTOMER
-                                TableScan: ORDERS
-                              TableScan: LINEITEM
-                            TableScan: NATION
-            "#
+                            TableScan: CUSTOMER
+                            TableScan: ORDERS
+                          TableScan: LINEITEM
+                        TableScan: NATION
+        "#
                 );
         Ok(())
     }
@@ -239,28 +239,28 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value
-              Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST
-                Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > (<subquery>)
-                  Subquery:
-                    Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10)
-                      Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]
-                        Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))
-                          Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN")
-                            Cross Join: 
-                              Cross Join: 
-                                TableScan: PARTSUPP
-                                TableScan: SUPPLIER
-                              TableScan: NATION
-                  Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]
-                    Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))
+        Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value
+          Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST
+            Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > (<subquery>)
+              Subquery:
+                Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10)
+                  Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]
+                    Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))
                       Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN")
                         Cross Join: 
                           Cross Join: 
                             TableScan: PARTSUPP
                             TableScan: SUPPLIER
                           TableScan: NATION
-            "#
+              Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]
+                Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))
+                  Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN")
+                    Cross Join: 
+                      Cross Join: 
+                        TableScan: PARTSUPP
+                        TableScan: SUPPLIER
+                      TableScan: NATION
+        "#
                 );
         Ok(())
     }
@@ -271,15 +271,15 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT
-              Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST
-                Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]]
-                  Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END
-                    Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32)
-                      Cross Join: 
-                        TableScan: ORDERS
-                        TableScan: LINEITEM
-            "#
+        Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT
+          Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST
+            Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]]
+              Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END
+                Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32)
+                  Cross Join: 
+                    TableScan: ORDERS
+                    TableScan: LINEITEM
+        "#
                 );
         Ok(())
     }
@@ -290,17 +290,17 @@ mod tests {
         assert_snapshot!(
             plan_str,
             @r#"
-            Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST
-              Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST
-                Projection: count(ORDERS.O_ORDERKEY), count(Int64(1))
-                  Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]]
-                    Projection: count(ORDERS.O_ORDERKEY)
-                      Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]]
-                        Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY
-                          Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8("%special%requests%") AS Utf8)
-                            TableScan: CUSTOMER
-                            TableScan: ORDERS
-            "#        );
+        Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST
+          Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST
+            Projection: count(ORDERS.O_ORDERKEY), count(Int64(1))
+              Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]]
+                Projection: count(ORDERS.O_ORDERKEY)
+                  Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]]
+                    Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY
+                      Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8("%special%requests%") AS Utf8)
+                        TableScan: CUSTOMER
+                        TableScan: ORDERS
+        "#        );
         Ok(())
     }
 
@@ -310,14 +310,14 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE
-              Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
-                Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
-                  Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32)
-                    Cross Join: 
-                      TableScan: LINEITEM
-                      TableScan: PART
-            "#
+        Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE
+          Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]
+            Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
+              Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32)
+                Cross Join: 
+                  TableScan: LINEITEM
+                  TableScan: PART
+        "#
                 );
         Ok(())
     }
@@ -336,19 +336,19 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT
-              Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST
-                Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]]
-                  Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY
-                    Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8("Brand#45") AND NOT PART.P_TYPE LIKE CAST(Utf8("MEDIUM POLISHED%") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN (<subquery>)
-                      Subquery:
-                        Projection: SUPPLIER.S_SUPPKEY
-                          Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8("%Customer%Complaints%") AS Utf8)
-                            TableScan: SUPPLIER
-                      Cross Join: 
-                        TableScan: PARTSUPP
-                        TableScan: PART
-            "#
+        Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT
+          Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST
+            Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]]
+              Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY
+                Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8("Brand#45") AND NOT PART.P_TYPE LIKE CAST(Utf8("MEDIUM POLISHED%") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN (<subquery>)
+                  Subquery:
+                    Projection: SUPPLIER.S_SUPPKEY
+                      Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8("%Customer%Complaints%") AS Utf8)
+                        TableScan: SUPPLIER
+                  Cross Join: 
+                    TableScan: PARTSUPP
+                    TableScan: PART
+        "#
                 );
         Ok(())
     }
@@ -366,25 +366,25 @@ mod tests {
         let plan_str = tpch_plan_to_string(18).await?;
         assert_snapshot!(
         plan_str,
-        @r#"
-            Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5
-              Limit: skip=0, fetch=100
-                Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST
-                  Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
-                    Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY
-                      Filter: ORDERS.O_ORDERKEY IN (<subquery>) AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY
-                        Subquery:
-                          Projection: LINEITEM.L_ORDERKEY
-                            Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2))
-                              Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
-                                Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY
-                                  TableScan: LINEITEM
-                        Cross Join: 
-                          Cross Join: 
-                            TableScan: CUSTOMER
-                            TableScan: ORDERS
-                          TableScan: LINEITEM
-            "#
+        @r"
+        Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5
+          Limit: skip=0, fetch=100
+            Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST
+              Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
+                Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY
+                  Filter: ORDERS.O_ORDERKEY IN (<subquery>) AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY
+                    Subquery:
+                      Projection: LINEITEM.L_ORDERKEY
+                        Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2))
+                          Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
+                            Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY
+                              TableScan: LINEITEM
+                    Cross Join: 
+                      Cross Join: 
+                        TableScan: CUSTOMER
+                        TableScan: ORDERS
+                      TableScan: LINEITEM
+        "
                 );
         Ok(())
     }
@@ -394,13 +394,13 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]]
-              Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
-                Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON")
-                  Cross Join: 
-                    TableScan: LINEITEM
-                    TableScan: PART
-            "#
+        Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]]
+          Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)
+            Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON")
+              Cross Join: 
+                TableScan: LINEITEM
+                TableScan: PART
+        "#
                 );
         Ok(())
     }
@@ -411,27 +411,27 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Sort: SUPPLIER.S_NAME ASC NULLS LAST
-              Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS
-                Filter: SUPPLIER.S_SUPPKEY IN (<subquery>) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("CANADA")
-                  Subquery:
-                    Projection: PARTSUPP.PS_SUPPKEY
-                      Filter: PARTSUPP.PS_PARTKEY IN (<subquery>) AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > (<subquery>)
-                        Subquery:
-                          Projection: PART.P_PARTKEY
-                            Filter: PART.P_NAME LIKE CAST(Utf8("forest%") AS Utf8)
-                              TableScan: PART
-                        Subquery:
-                          Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY)
-                            Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
-                              Projection: LINEITEM.L_QUANTITY
-                                Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32)
-                                  TableScan: LINEITEM
-                        TableScan: PARTSUPP
-                  Cross Join: 
-                    TableScan: SUPPLIER
-                    TableScan: NATION
-            "#
+        Sort: SUPPLIER.S_NAME ASC NULLS LAST
+          Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS
+            Filter: SUPPLIER.S_SUPPKEY IN (<subquery>) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("CANADA")
+              Subquery:
+                Projection: PARTSUPP.PS_SUPPKEY
+                  Filter: PARTSUPP.PS_PARTKEY IN (<subquery>) AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > (<subquery>)
+                    Subquery:
+                      Projection: PART.P_PARTKEY
+                        Filter: PART.P_NAME LIKE CAST(Utf8("forest%") AS Utf8)
+                          TableScan: PART
+                    Subquery:
+                      Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY)
+                        Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]]
+                          Projection: LINEITEM.L_QUANTITY
+                            Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32)
+                              TableScan: LINEITEM
+                    TableScan: PARTSUPP
+              Cross Join: 
+                TableScan: SUPPLIER
+                TableScan: NATION
+        "#
                 );
         Ok(())
     }
@@ -511,10 +511,10 @@ mod tests {
 
         assert_snapshot!(
         plan_str,
-        @r#"
-            Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]
-              Values: (Int64(0))
-            "#
+        @r"
+        Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]
+          Values: (Int64(0))
+        "
                 );
         Ok(())
     }
@@ -527,9 +527,9 @@ mod tests {
         assert_snapshot!(
         plan_str,
         @r#"
-            Projection: dummy1 AS result1, dummy2 AS result2
-              Values: (Int64(0), Utf8("temp")), (Int64(1), Utf8("test"))
-            "#
+        Projection: dummy1 AS result1, dummy2 AS result2
+          Values: (Int64(0), Utf8("temp")), (Int64(1), Utf8("test"))
+        "#
                 );
         Ok(())
     }
@@ -545,10 +545,10 @@ mod tests {
 
         //Test correct plan structure
         assert_snapshot!(plan_str,
-          @r#"
+          @r"
         Projection: a, b, (a OR b) AND NOT a AND b AS result
           Values: (Boolean(true), Boolean(true)), (Boolean(true), Boolean(false)), (Boolean(false), Boolean(true)), (Boolean(false), Boolean(false))
-        "#
+        "
         );
 
         Ok(())
@@ -566,10 +566,10 @@ mod tests {
 
         //Test correct plan structure
         assert_snapshot!(plan_str,
-          @r#"
+          @r"
         Projection: a, b, a AND NOT b AS result
           Values: (Boolean(true), Boolean(true)), (Boolean(true), Boolean(false)), (Boolean(false), Boolean(true)), (Boolean(false), Boolean(false))
-        "#
+        "
         );
 
         Ok(())
@@ -582,10 +582,10 @@ mod tests {
         let plan_str =
             test_plan_to_string("scalar_fn_to_between_expr.substrait.json").await?;
         assert_snapshot!(plan_str,
-          @r#"
-          Projection: expr BETWEEN low AND high AS result
-            Values: (Int8(2), Int8(1), Int8(3)), (Int8(4), Int8(1), Int8(2))
-          "#
+          @r"
+        Projection: expr BETWEEN low AND high AS result
+          Values: (Int8(2), Int8(1), Int8(3)), (Int8(4), Int8(1), Int8(2))
+        "
         );
         Ok(())
     }
@@ -594,10 +594,10 @@ mod tests {
     async fn test_logb_expr() -> Result<()> {
         let plan_str = test_plan_to_string("scalar_fn_logb_expr.substrait.json").await?;
         assert_snapshot!(plan_str,
-          @r#"
-          Projection: x, base, log(base, x) AS result
-            Values: (Float32(1), Float32(10)), (Float32(100), Float32(10))
-          "#
+          @r"
+        Projection: x, base, log(base, x) AS result
+          Values: (Float32(1), Float32(10)), (Float32(100), Float32(10))
+        "
         );
         Ok(())
     }
@@ -639,11 +639,11 @@ mod tests {
 
         assert_snapshot!(
         plan_str,
-        @r#"
+        @r"
         Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR
           WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
             TableScan: DATA
-        "#
+        "
                         );
         Ok(())
     }
diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs
index e916b4cb0e1a9..24508fd054d97 100644
--- a/datafusion/substrait/tests/cases/emit_kind_tests.rs
+++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs
@@ -38,10 +38,10 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1
-              TableScan: DATA
-            "#
+        @r"
+        Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1
+          TableScan: DATA
+        "
                 );
         Ok(())
     }
@@ -57,11 +57,11 @@ mod tests {
         assert_snapshot!(
         plan,
         // Note that duplicate references in the remap are aliased
-        @r#"
-            Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2
-              Filter: DATA.B = Int64(2)
-                TableScan: DATA
-            "#
+        @r"
+        Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2
+          Filter: DATA.B = Int64(2)
+            TableScan: DATA
+        "
                 );
         Ok(())
     }
@@ -88,21 +88,21 @@ mod tests {
         let plan = df.into_unoptimized_plan();
         assert_snapshot!(
             plan,
-            @r#"
-            Projection: random() AS c1, data.a + Int64(1) AS c2
-              TableScan: data
-            "#        );
+            @r"
+        Projection: random() AS c1, data.a + Int64(1) AS c2
+          TableScan: data
+        "        );
 
         let proto = to_substrait_plan(&plan, &ctx.state())?;
         let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
         // note how the Projections are not flattened
         assert_snapshot!(
         plan2,
-        @r#"
-            Projection: random() AS c1, data.a + Int64(1) AS c2
-              Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)
-                TableScan: data
-            "#
+        @r"
+        Projection: random() AS c1, data.a + Int64(1) AS c2
+          Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)
+            TableScan: data
+        "
                 );
         Ok(())
     }
@@ -115,10 +115,10 @@ mod tests {
         let plan = df.into_unoptimized_plan();
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: data.a + Int64(1), data.b + Int64(2)
-              TableScan: data
-            "#
+        @r"
+        Projection: data.a + Int64(1), data.b + Int64(2)
+          TableScan: data
+        "
                 );
 
         let proto = to_substrait_plan(&plan, &ctx.state())?;
diff --git a/datafusion/substrait/tests/cases/function_test.rs b/datafusion/substrait/tests/cases/function_test.rs
index 1816c64d39212..d71c80678a091 100644
--- a/datafusion/substrait/tests/cases/function_test.rs
+++ b/datafusion/substrait/tests/cases/function_test.rs
@@ -35,10 +35,10 @@ mod tests {
         assert_snapshot!(
         plan,
         @r#"
-            Projection: nation.n_name
-              Filter: contains(nation.n_name, Utf8("IA"))
-                TableScan: nation
-            "#
+        Projection: nation.n_name
+          Filter: contains(nation.n_name, Utf8("IA"))
+            TableScan: nation
+        "#
                 );
         Ok(())
     }
diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs
index 426f3c12e5a15..5ebacaf5336d4 100644
--- a/datafusion/substrait/tests/cases/logical_plans.rs
+++ b/datafusion/substrait/tests/cases/logical_plans.rs
@@ -43,10 +43,10 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: NOT DATA.D AS EXPR$0
-              TableScan: DATA
-            "#
+        @r"
+        Projection: NOT DATA.D AS EXPR$0
+          TableScan: DATA
+        "
                 );
 
         // Trigger execution to ensure plan validity
@@ -74,11 +74,11 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR
-              WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
-                TableScan: DATA
-            "#
+        @r"
+        Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR
+          WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
+            TableScan: DATA
+        "
                 );
 
         // Trigger execution to ensure plan validity
@@ -101,11 +101,11 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED
-              WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-                TableScan: DATA
-            "#
+        @r"
+        Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED
+          WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: DATA
+        "
                 );
 
         // Trigger execution to ensure plan validity
@@ -130,12 +130,12 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1
-              WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-                WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-                  TableScan: DATA
-            "#
+        @r"
+        Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1
+          WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+              TableScan: DATA
+        "
                 );
 
         // Trigger execution to ensure plan validity
@@ -151,8 +151,9 @@ mod tests {
 
         // File generated with substrait-java's Isthmus:
         // ./isthmus-cli/build/graal/isthmus --create "create table A (a int); create table B (a int, c int); create table C (a int, d int)" "select t.*, C.d, CAST(NULL AS VARCHAR) as e from (select a, CAST(NULL AS VARCHAR) as c from A UNION ALL select a, c from B) t LEFT JOIN C ON t.a = C.a"
-        let proto_plan =
-            read_json("tests/testdata/test_plans/disambiguate_literals_with_same_name.substrait.json");
+        let proto_plan = read_json(
+            "tests/testdata/test_plans/disambiguate_literals_with_same_name.substrait.json",
+        );
         let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
         let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
 
@@ -164,7 +165,7 @@ mod tests {
         settings.bind(|| {
             assert_snapshot!(
                 plan,
-                @r#"
+                @r"
             Projection: left.A, left.[UUID] AS C, right.D, Utf8(NULL) AS [UUID] AS E
               Left Join: left.A = right.A
                 SubqueryAlias: left
@@ -175,7 +176,7 @@ mod tests {
                       TableScan: B
                 SubqueryAlias: right
                   TableScan: C
-            "#
+            "
             );
         });
 
@@ -197,9 +198,7 @@ mod tests {
 
         assert_snapshot!(
                 &plan,
-            @r#"
-        Values: (List([1, 2]))
-        "#
+            @"Values: (List([1, 2]))"
         );
 
         // Trigger execution to ensure plan validity
@@ -217,12 +216,12 @@ mod tests {
 
         assert_snapshot!(
         plan,
-        @r#"
-            Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count
-              Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]]
-                Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]]
-                  TableScan: sales
-            "#
+        @r"
+        Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count
+          Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]]
+            Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]]
+              TableScan: sales
+        "
                 );
 
         // Trigger execution to ensure plan validity
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index f14d4cbf1fcc3..98b35bf082ec4 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -17,6 +17,8 @@
 
 use crate::utils::test::read_json;
 use datafusion::arrow::array::ArrayRef;
+use datafusion::functions_nested::map::map;
+use datafusion::logical_expr::LogicalPlanBuilder;
 use datafusion::physical_plan::Accumulator;
 use datafusion::scalar::ScalarValue;
 use datafusion_substrait::logical_plan::{
@@ -26,14 +28,15 @@ use std::cmp::Ordering;
 use std::mem::size_of_val;
 
 use datafusion::arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
-use datafusion::common::{not_impl_err, plan_err, DFSchema, DFSchemaRef};
+use datafusion::common::tree_node::Transformed;
+use datafusion::common::{DFSchema, DFSchemaRef, not_impl_err, plan_err};
 use datafusion::error::Result;
 use datafusion::execution::registry::SerializerRegistry;
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::logical_expr::{
-    Extension, InvariantLevel, LogicalPlan, PartitionEvaluator, Repartition,
-    UserDefinedLogicalNode, Values, Volatility,
+    EmptyRelation, Extension, InvariantLevel, LogicalPlan, PartitionEvaluator,
+    Repartition, UserDefinedLogicalNode, Values, Volatility,
 };
 use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLINE_INLIST;
 use datafusion::prelude::*;
@@ -42,7 +45,7 @@ use std::hash::Hash;
 use std::sync::Arc;
 use substrait::proto::extensions::simple_extension_declaration::MappingType;
 use substrait::proto::rel::RelType;
-use substrait::proto::{plan_rel, Plan, Rel};
+use substrait::proto::{Plan, Rel, plan_rel};
 
 #[derive(Debug)]
 struct MockSerializerRegistry;
@@ -185,16 +188,56 @@ async fn simple_select() -> Result<()> {
     roundtrip("SELECT a, b FROM data").await
 }
 
+#[tokio::test]
+async fn roundtrip_literal_without_from() -> Result<()> {
+    roundtrip("SELECT 1 AS one").await
+}
+
+#[tokio::test]
+async fn roundtrip_empty_relation_with_schema() -> Result<()> {
+    // Test produce_one_row=true with multiple typed columns
+    roundtrip("SELECT 1::int as a, 'hello'::text as b, 3.14::double as c").await
+}
+
+#[tokio::test]
+async fn roundtrip_empty_relation_no_rows() -> Result<()> {
+    // Test produce_one_row=false
+    let ctx = create_context().await?;
+    let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+        produce_one_row: false,
+        schema: DFSchemaRef::new(DFSchema::empty()),
+    });
+    roundtrip_logical_plan_with_ctx(plan, ctx).await?;
+    Ok(())
+}
+
+#[tokio::test]
+async fn roundtrip_subquery_with_empty_relation() -> Result<()> {
+    // Test EmptyRelation in the context of scalar subqueries.
+    // The optimizer may simplify the subquery away, but we're testing that
+    // the EmptyRelation round-trips correctly when it appears in the plan.
+    let ctx = create_context().await?;
+    let df = ctx.sql("SELECT (SELECT 1) as nested").await?;
+    let plan = df.into_optimized_plan()?;
+
+    // Just verify the round-trip succeeds and produces valid results
+    let proto = to_substrait_plan(&plan, &ctx.state())?;
+    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
+    let df2 = DataFrame::new(ctx.state(), plan2);
+    df2.show().await?;
+    Ok(())
+}
+
 #[tokio::test]
 async fn wildcard_select() -> Result<()> {
     let plan = generate_plan_from_sql("SELECT * FROM data", true, false).await?;
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: data.a, data.b, data.c, data.d, data.e, data.f
       TableScan: data
-    "#
+    "
     );
     Ok(())
 }
@@ -310,11 +353,31 @@ async fn aggregate_grouping_rollup() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
-        Projection: data.a, data.c, data.e, avg(data.b)
-          Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]]
-            TableScan: data projection=[a, b, c, e]
-        "#
+    @r"
+    Projection: data.a, data.c, data.e, avg(data.b)
+      Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]]
+        TableScan: data projection=[a, b, c, e]
+    "
+    );
+    Ok(())
+}
+
+#[tokio::test]
+async fn aggregate_grouping_cube() -> Result<()> {
+    let plan = generate_plan_from_sql(
+        "SELECT a, c, avg(b) FROM data GROUP BY CUBE (a, c)",
+        true,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r"
+    Projection: data.a, data.c, avg(data.b)
+      Aggregate: groupBy=[[GROUPING SETS ((), (data.a), (data.c), (data.a, data.c))]], aggr=[[avg(data.b)]]
+        TableScan: data projection=[a, b, c]
+    "
     );
     Ok(())
 }
@@ -330,11 +393,11 @@ async fn multilayer_aggregate() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Aggregate: groupBy=[[data.a]], aggr=[[sum(count(data.b)) AS sum(partial_count_b)]]
       Aggregate: groupBy=[[data.a]], aggr=[[count(data.b)]]
         TableScan: data projection=[a, b]
-    "#
+    "
     );
     Ok(())
 }
@@ -518,10 +581,10 @@ async fn aggregate_case() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]]
       TableScan: data projection=[a]
-    "#
+    "
     );
     Ok(())
 }
@@ -616,12 +679,12 @@ async fn roundtrip_exists_filter() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: data.b
       LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64)
         TableScan: data projection=[a, b, e]
         TableScan: data2 projection=[a, e]
-    "#
+    "
             );
     Ok(())
 }
@@ -637,11 +700,11 @@ async fn roundtrip_not_exists_filter_left_anti_join() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     LeftAnti Join: book_author.isbn = book.isbn
       TableScan: book_author projection=[isbn, author]
       TableScan: book projection=[isbn]
-    "#
+    "
             );
     Ok(())
 }
@@ -657,11 +720,11 @@ async fn roundtrip_right_anti_join() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     RightAnti Join: book.isbn = book_author.isbn
       TableScan: book projection=[isbn]
       TableScan: book_author projection=[isbn, author]
-    "#
+    "
             );
     Ok(())
 }
@@ -677,11 +740,11 @@ async fn roundtrip_right_semi_join() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     RightSemi Join: book.isbn = book_author.isbn
       TableScan: book projection=[isbn]
       TableScan: book_author projection=[isbn, author]
-    "#
+    "
             );
     Ok(())
 }
@@ -697,12 +760,12 @@ async fn inner_join() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: data.a
       Inner Join: data.a = data2.a
         TableScan: data projection=[a]
         TableScan: data2 projection=[a]
-    "#
+    "
             );
     Ok(())
 }
@@ -750,14 +813,14 @@ async fn self_join_introduces_aliases() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: left.b, right.c
       Inner Join: left.b = right.b
         SubqueryAlias: left
           TableScan: data projection=[b]
         SubqueryAlias: right
           TableScan: data projection=[b, c]
-    "#
+    "
             );
     Ok(())
 }
@@ -907,26 +970,27 @@ async fn aggregate_wo_projection_consume() -> Result<()> {
     let plan = generate_plan_from_substrait(proto_plan).await?;
     assert_snapshot!(
     plan,
-    @r#"
-            Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
-              TableScan: data projection=[a]
-            "#
+    @r"
+    Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
+      TableScan: data projection=[a]
+    "
         );
     Ok(())
 }
 
 #[tokio::test]
 async fn aggregate_wo_projection_group_expression_ref_consume() -> Result<()> {
-    let proto_plan =
-        read_json("tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json");
+    let proto_plan = read_json(
+        "tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json",
+    );
 
     let plan = generate_plan_from_substrait(proto_plan).await?;
     assert_snapshot!(
     plan,
-    @r#"
-            Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
-              TableScan: data projection=[a]
-            "#
+    @r"
+    Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
+      TableScan: data projection=[a]
+    "
         );
     Ok(())
 }
@@ -939,26 +1003,27 @@ async fn aggregate_wo_projection_sorted_consume() -> Result<()> {
     let plan = generate_plan_from_substrait(proto_plan).await?;
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) ORDER BY [data.a DESC NULLS FIRST] AS countA]]
       TableScan: data projection=[a]
-    "#
+    "
             );
     Ok(())
 }
 
 #[tokio::test]
 async fn aggregate_identical_grouping_expressions() -> Result<()> {
-    let proto_plan =
-        read_json("tests/testdata/test_plans/aggregate_identical_grouping_expressions.substrait.json");
+    let proto_plan = read_json(
+        "tests/testdata/test_plans/aggregate_identical_grouping_expressions.substrait.json",
+    );
 
     let plan = generate_plan_from_substrait(proto_plan).await?;
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Aggregate: groupBy=[[Int32(1) AS grouping_col_1, Int32(1) AS grouping_col_2]], aggr=[[]]
       TableScan: data projection=[]
-    "#
+    "
             );
     Ok(())
 }
@@ -1099,6 +1164,96 @@ async fn simple_intersect_table_reuse() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn self_referential_intersect() -> Result<()> {
+    // Test INTERSECT with the same table on both sides
+    // This previously failed with "Schema contains duplicate qualified field name"
+    // The fix ensures requalify_sides_if_needed is called in intersect_or_except
+    // After roundtrip through Substrait, SubqueryAlias is lost and requalification
+    // produces "left" and "right" aliases
+    // Note: INTERSECT (without ALL) includes DISTINCT, but the outer Aggregate
+    // is optimized away, resulting in just the **LeftSemi** join
+    // (LeftSemi returns rows from left that exist in right)
+    assert_expected_plan(
+        "SELECT a FROM data WHERE a > 0 INTERSECT SELECT a FROM data WHERE a < 5",
+        "LeftSemi Join: left.a = right.a\
+        \n  SubqueryAlias: left\
+        \n    Aggregate: groupBy=[[data.a]], aggr=[[]]\
+        \n      Filter: data.a > Int64(0)\
+        \n        TableScan: data projection=[a], partial_filters=[data.a > Int64(0)]\
+        \n  SubqueryAlias: right\
+        \n    Filter: data.a < Int64(5)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a < Int64(5)]",
+        true,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn self_referential_except() -> Result<()> {
+    // Test EXCEPT with the same table on both sides
+    // This previously failed with "Schema contains duplicate qualified field name"
+    // The fix ensures requalify_sides_if_needed is called in intersect_or_except
+    // After roundtrip through Substrait, SubqueryAlias is lost and requalification
+    // produces "left" and "right" aliases
+    // Note: EXCEPT (without ALL) includes DISTINCT, but the outer Aggregate
+    // is optimized away, resulting in just the **LeftAnti** join
+    // (LeftAnti returns rows from left that don't exist in right)
+    assert_expected_plan(
+        "SELECT a FROM data WHERE a > 0 EXCEPT SELECT a FROM data WHERE a < 5",
+        "LeftAnti Join: left.a = right.a\
+        \n  SubqueryAlias: left\
+        \n    Aggregate: groupBy=[[data.a]], aggr=[[]]\
+        \n      Filter: data.a > Int64(0)\
+        \n        TableScan: data projection=[a], partial_filters=[data.a > Int64(0)]\
+        \n  SubqueryAlias: right\
+        \n    Filter: data.a < Int64(5)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a < Int64(5)]",
+        true,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn self_referential_intersect_all() -> Result<()> {
+    // Test INTERSECT ALL with the same table on both sides
+    // INTERSECT ALL preserves duplicates and does not include DISTINCT
+    // Uses **LeftSemi** join (returns rows from left that exist in right)
+    // The requalification ensures no duplicate field name errors
+    assert_expected_plan(
+        "SELECT a FROM data WHERE a > 0 INTERSECT ALL SELECT a FROM data WHERE a < 5",
+        "LeftSemi Join: left.a = right.a\
+        \n  SubqueryAlias: left\
+        \n    Filter: data.a > Int64(0)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a > Int64(0)]\
+        \n  SubqueryAlias: right\
+        \n    Filter: data.a < Int64(5)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a < Int64(5)]",
+        true,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn self_referential_except_all() -> Result<()> {
+    // Test EXCEPT ALL with the same table on both sides
+    // EXCEPT ALL preserves duplicates and does not include DISTINCT
+    // Uses **LeftAnti** join (returns rows from left that don't exist in right)
+    // The requalification ensures no duplicate field name errors
+    assert_expected_plan(
+        "SELECT a FROM data WHERE a > 0 EXCEPT ALL SELECT a FROM data WHERE a < 5",
+        "LeftAnti Join: left.a = right.a\
+        \n  SubqueryAlias: left\
+        \n    Filter: data.a > Int64(0)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a > Int64(0)]\
+        \n  SubqueryAlias: right\
+        \n    Filter: data.a < Int64(5)\
+        \n      TableScan: data projection=[a], partial_filters=[data.a < Int64(5)]",
+        true,
+    )
+    .await
+}
+
 #[tokio::test]
 async fn simple_window_function() -> Result<()> {
     roundtrip("SELECT RANK() OVER (PARTITION BY a ORDER BY b), d, sum(b) OVER (PARTITION BY a) FROM data;").await
@@ -1178,10 +1333,10 @@ async fn roundtrip_literal_struct() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: Struct({c0:1,c1:true,c2:}) AS struct(Int64(1),Boolean(true),NULL)
       TableScan: data projection=[]
-    "#
+    "
             );
     Ok(())
 }
@@ -1218,10 +1373,10 @@ async fn roundtrip_literal_renamed_struct() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: Struct({int_field:1}) AS Struct({c0:1})
       TableScan: data projection=[]
-    "#
+    "
             );
     Ok(())
 }
@@ -1247,9 +1402,7 @@ async fn roundtrip_values() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
-    Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())
-    "#
+    @r#"Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())"#
             );
     Ok(())
 }
@@ -1266,6 +1419,34 @@ async fn roundtrip_values_no_columns() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_values_with_scalar_function() -> Result<()> {
+    let ctx = create_context().await?;
+    //  datafusion::functions_nested::map::map;
+    let expr = map(vec![lit("a")], vec![lit(1)]);
+    let plan = LogicalPlanBuilder::values(vec![vec![expr]])?.build()?;
+    let expected = ctx.state().optimize(&plan)?;
+
+    let actual = substrait_roundtrip(&plan, &ctx).await?;
+
+    let strip_aliases_from_values = |plan: &LogicalPlan| -> LogicalPlan {
+        plan.clone()
+            .map_expressions(|expr| Ok(Transformed::yes(expr.unalias())))
+            .map(|t| t.data)
+            .unwrap_or_else(|_| plan.clone())
+    };
+
+    let normalized_expected = strip_aliases_from_values(&expected);
+    let normalized_actual = strip_aliases_from_values(&actual);
+
+    assert_eq!(
+        format!("{normalized_expected}"),
+        format!("{normalized_actual}")
+    );
+    assert_eq!(normalized_expected.schema(), normalized_actual.schema());
+    Ok(())
+}
+
 #[tokio::test]
 async fn roundtrip_values_empty_relation() -> Result<()> {
     roundtrip("SELECT * FROM (VALUES ('a')) LIMIT 0").await
@@ -1302,11 +1483,11 @@ async fn duplicate_column() -> Result<()> {
 
     assert_snapshot!(
     plan,
-    @r#"
+    @r"
     Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2
       Projection: data.a + Int64(1)
         TableScan: data projection=[a]
-    "#
+    "
         );
     Ok(())
 }
@@ -1449,9 +1630,7 @@ async fn roundtrip_repartition_roundrobin() -> Result<()> {
         partitioning_scheme: Partitioning::RoundRobinBatch(8),
     });
 
-    let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
+    let plan2 = substrait_roundtrip(&plan, &ctx).await?;
 
     assert_eq!(format!("{plan}"), format!("{plan2}"));
     Ok(())
@@ -1466,9 +1645,7 @@ async fn roundtrip_repartition_hash() -> Result<()> {
         partitioning_scheme: Partitioning::Hash(vec![col("data.a")], 8),
     });
 
-    let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
+    let plan2 = substrait_roundtrip(&plan, &ctx).await?;
 
     assert_eq!(format!("{plan}"), format!("{plan2}"));
     Ok(())
@@ -1650,9 +1827,7 @@ async fn assert_expected_plan(
     let ctx = create_context().await?;
     let df = ctx.sql(sql).await?;
     let plan = df.into_optimized_plan()?;
-    let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
+    let plan2 = substrait_roundtrip(&plan, &ctx).await?;
 
     if assert_schema {
         assert_eq!(plan.schema(), plan2.schema());
@@ -1694,9 +1869,7 @@ async fn roundtrip_fill_na(sql: &str) -> Result<()> {
     let ctx = create_context().await?;
     let df = ctx.sql(sql).await?;
     let plan = df.into_optimized_plan()?;
-    let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
+    let plan2 = substrait_roundtrip(&plan, &ctx).await?;
 
     // Format plan string and replace all None's with 0
     let plan1str = format!("{plan}").replace("None", "0");
@@ -1708,6 +1881,18 @@ async fn roundtrip_fill_na(sql: &str) -> Result<()> {
     Ok(())
 }
 
+/// Converts a logical plan to Substrait and back, applying optimization.
+/// Returns the roundtripped and optimized logical plan.
+async fn substrait_roundtrip(
+    plan: &LogicalPlan,
+    ctx: &SessionContext,
+) -> Result<LogicalPlan> {
+    let proto = to_substrait_plan(plan, &ctx.state())?;
+    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
+    let plan2 = ctx.state().optimize(&plan2)?;
+    Ok(plan2)
+}
+
 async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> {
     // Since we ignore the SubqueryAlias in the producer, the result should be
     // the same as producing a Substrait plan from the same query without aliases
@@ -1735,8 +1920,7 @@ async fn roundtrip_logical_plan_with_ctx(
     ctx: SessionContext,
 ) -> Result<Box<Plan>> {
     let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
+    let plan2 = substrait_roundtrip(&plan, &ctx).await?;
 
     let plan1str = format!("{plan}");
     let plan2str = format!("{plan2}");
diff --git a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
index 64599465f96f7..9773cf4aba10f 100644
--- a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
@@ -26,7 +26,7 @@ use datafusion::datasource::physical_plan::{
     FileGroup, FileScanConfigBuilder, ParquetSource,
 };
 use datafusion::error::Result;
-use datafusion::physical_plan::{displayable, ExecutionPlan};
+use datafusion::physical_plan::{ExecutionPlan, displayable};
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
 use datafusion_substrait::physical_plan::{consumer, producer};
 
@@ -35,24 +35,22 @@ use substrait::proto::extensions;
 
 #[tokio::test]
 async fn parquet_exec() -> Result<()> {
-    let source = Arc::new(ParquetSource::default());
-
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        Arc::new(Schema::empty()),
-        source,
-    )
-    .with_file_groups(vec![
-        FileGroup::new(vec![PartitionedFile::new(
-            "file://foo/part-0.parquet".to_string(),
-            123,
-        )]),
-        FileGroup::new(vec![PartitionedFile::new(
-            "file://foo/part-1.parquet".to_string(),
-            123,
-        )]),
-    ])
-    .build();
+    let schema = Arc::new(Schema::empty());
+    let source = Arc::new(ParquetSource::new(schema.clone()));
+
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_file_groups(vec![
+                FileGroup::new(vec![PartitionedFile::new(
+                    "file://foo/part-0.parquet".to_string(),
+                    123,
+                )]),
+                FileGroup::new(vec![PartitionedFile::new(
+                    "file://foo/part-1.parquet".to_string(),
+                    123,
+                )]),
+            ])
+            .build();
     let parquet_exec: Arc<dyn ExecutionPlan> =
         DataSourceExec::from_data_source(scan_config);
 
diff --git a/datafusion/substrait/tests/cases/serialize.rs b/datafusion/substrait/tests/cases/serialize.rs
index 39c0622e3ba39..d0f9511760938 100644
--- a/datafusion/substrait/tests/cases/serialize.rs
+++ b/datafusion/substrait/tests/cases/serialize.rs
@@ -31,7 +31,7 @@ mod tests {
     use std::fs;
     use substrait::proto::plan_rel::RelType;
     use substrait::proto::rel_common::{Emit, EmitKind};
-    use substrait::proto::{rel, RelCommon};
+    use substrait::proto::{RelCommon, rel};
 
     #[tokio::test]
     async fn serialize_to_file() -> Result<()> {
@@ -95,10 +95,10 @@ mod tests {
 
         assert_snapshot!(
                     format!("{}", datafusion_plan),
-                    @r#"
-Projection: data.b, data.a + data.a, data.a
-  TableScan: data projection=[a, b]
-"#
+                    @r"
+        Projection: data.b, data.a + data.a, data.a
+          TableScan: data projection=[a, b]
+        "
         ,
                 );
 
@@ -142,11 +142,11 @@ Projection: data.b, data.a + data.a, data.a
         let datafusion_plan = df.into_optimized_plan()?;
         assert_snapshot!(
                     datafusion_plan,
-                    @r#"
-Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c
-  WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-    TableScan: data projection=[a, b, c]
-"#
+                    @r"
+        Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c
+          WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+            TableScan: data projection=[a, b, c]
+        "
         ,
                 );
 
diff --git a/datafusion/substrait/tests/cases/substrait_validations.rs b/datafusion/substrait/tests/cases/substrait_validations.rs
index c8cc3fe9940ce..9841c736da8c9 100644
--- a/datafusion/substrait/tests/cases/substrait_validations.rs
+++ b/datafusion/substrait/tests/cases/substrait_validations.rs
@@ -69,10 +69,10 @@ mod tests {
 
             assert_snapshot!(
             plan,
-            @r#"
-                Projection: DATA.a, DATA.b
-                  TableScan: DATA
-                "#
+            @r"
+            Projection: DATA.a, DATA.b
+              TableScan: DATA
+            "
                         );
             Ok(())
         }
@@ -92,10 +92,10 @@ mod tests {
 
             assert_snapshot!(
             plan,
-            @r#"
-                Projection: DATA.a, DATA.b
-                  TableScan: DATA projection=[a, b]
-                "#
+            @r"
+            Projection: DATA.a, DATA.b
+              TableScan: DATA projection=[a, b]
+            "
                         );
             Ok(())
         }
@@ -117,10 +117,10 @@ mod tests {
 
             assert_snapshot!(
             plan,
-            @r#"
-                Projection: DATA.a, DATA.b
-                  TableScan: DATA projection=[a, b]
-                "#
+            @r"
+            Projection: DATA.a, DATA.b
+              TableScan: DATA projection=[a, b]
+            "
                         );
             Ok(())
         }
diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs
index f84594312b634..2d63980aadf0d 100644
--- a/datafusion/substrait/tests/utils.rs
+++ b/datafusion/substrait/tests/utils.rs
@@ -17,14 +17,14 @@
 
 #[cfg(test)]
 pub mod test {
-    use datafusion::common::{substrait_datafusion_err, substrait_err, TableReference};
-    use datafusion::datasource::empty::EmptyTable;
+    use datafusion::common::{TableReference, substrait_datafusion_err, substrait_err};
     use datafusion::datasource::TableProvider;
+    use datafusion::datasource::empty::EmptyTable;
     use datafusion::error::Result;
     use datafusion::prelude::SessionContext;
     use datafusion_substrait::extensions::Extensions;
     use datafusion_substrait::logical_plan::consumer::{
-        from_substrait_named_struct, DefaultSubstraitConsumer, SubstraitConsumer,
+        DefaultSubstraitConsumer, SubstraitConsumer, from_substrait_named_struct,
     };
     use std::collections::HashMap;
     use std::fs::File;
@@ -32,9 +32,9 @@ pub mod test {
     use std::sync::Arc;
     use substrait::proto::exchange_rel::ExchangeKind;
     use substrait::proto::expand_rel::expand_field::FieldType;
+    use substrait::proto::expression::RexType;
     use substrait::proto::expression::nested::NestedType;
     use substrait::proto::expression::subquery::SubqueryType;
-    use substrait::proto::expression::RexType;
     use substrait::proto::function_argument::ArgType;
     use substrait::proto::read_rel::{NamedTable, ReadType};
     use substrait::proto::rel::RelType;
@@ -69,12 +69,14 @@ pub mod test {
             let schema = table.schema();
             if let Some(existing_table) =
                 schema_map.insert(table_reference.clone(), table)
+                && existing_table.schema() != schema
             {
-                if existing_table.schema() != schema {
-                    return substrait_err!(
-                        "Substrait plan contained the same table {} with different schemas.\nSchema 1: {}\nSchema 2: {}",
-                        table_reference, existing_table.schema(), schema);
-                }
+                return substrait_err!(
+                    "Substrait plan contained the same table {} with different schemas.\nSchema 1: {}\nSchema 2: {}",
+                    table_reference,
+                    existing_table.schema(),
+                    schema
+                );
             }
         }
         for (table_reference, table) in schema_map.into_iter() {
@@ -155,7 +157,7 @@ pub mod test {
             Ok(())
         }
 
-        #[allow(deprecated)]
+        #[expect(deprecated)]
         fn collect_schemas_from_rel(&mut self, rel: &Rel) -> Result<()> {
             let rel_type = rel
                 .rel_type
diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml
index d8b042cbb76c0..16fa9790f65b6 100644
--- a/datafusion/wasmtest/Cargo.toml
+++ b/datafusion/wasmtest/Cargo.toml
@@ -63,7 +63,7 @@ object_store = { workspace = true }
 # needs to be compiled
 tokio = { workspace = true }
 url = { workspace = true }
-wasm-bindgen-test = "0.3.55"
+wasm-bindgen-test = "0.3.56"
 
 [package.metadata.cargo-machete]
 ignored = ["chrono", "getrandom"]
diff --git a/datafusion/wasmtest/datafusion-wasm-app/package-lock.json b/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
index 80d3d7b473bca..98ee1a34f01eb 100644
--- a/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
+++ b/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
@@ -1470,40 +1470,39 @@
       }
     },
     "node_modules/express": {
-      "version": "4.21.2",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz",
-      "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==",
+      "version": "4.22.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz",
+      "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==",
       "dev": true,
-      "license": "MIT",
       "dependencies": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
-        "body-parser": "1.20.3",
-        "content-disposition": "0.5.4",
+        "body-parser": "~1.20.3",
+        "content-disposition": "~0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.7.1",
-        "cookie-signature": "1.0.6",
+        "cookie": "~0.7.1",
+        "cookie-signature": "~1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
         "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.3.1",
-        "fresh": "0.5.2",
-        "http-errors": "2.0.0",
+        "finalhandler": "~1.3.1",
+        "fresh": "~0.5.2",
+        "http-errors": "~2.0.0",
         "merge-descriptors": "1.0.3",
         "methods": "~1.1.2",
-        "on-finished": "2.4.1",
+        "on-finished": "~2.4.1",
         "parseurl": "~1.3.3",
-        "path-to-regexp": "0.1.12",
+        "path-to-regexp": "~0.1.12",
         "proxy-addr": "~2.0.7",
-        "qs": "6.13.0",
+        "qs": "~6.14.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
-        "send": "0.19.0",
-        "serve-static": "1.16.2",
+        "send": "~0.19.0",
+        "serve-static": "~1.16.2",
         "setprototypeof": "1.2.0",
-        "statuses": "2.0.1",
+        "statuses": "~2.0.1",
         "type-is": "~1.6.18",
         "utils-merge": "1.0.1",
         "vary": "~1.1.2"
@@ -1536,6 +1535,21 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/express/node_modules/qs": {
+      "version": "6.14.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
+      "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==",
+      "dev": true,
+      "dependencies": {
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/express/node_modules/safe-buffer": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@@ -2596,11 +2610,10 @@
       "dev": true
     },
     "node_modules/node-forge": {
-      "version": "1.3.1",
-      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.1.tgz",
-      "integrity": "sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA==",
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.2.tgz",
+      "integrity": "sha512-6xKiQ+cph9KImrRh0VsjH2d8/GXA4FIMlgU4B757iI1ApvcyA9VlouP0yZJha01V+huImO+kKMU7ih+2+E14fw==",
       "dev": true,
-      "license": "(BSD-3-Clause OR GPL-2.0)",
       "engines": {
         "node": ">= 6.13.0"
       }
@@ -5250,39 +5263,39 @@
       "dev": true
     },
     "express": {
-      "version": "4.21.2",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz",
-      "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==",
+      "version": "4.22.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz",
+      "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==",
       "dev": true,
       "requires": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
-        "body-parser": "1.20.3",
-        "content-disposition": "0.5.4",
+        "body-parser": "~1.20.3",
+        "content-disposition": "~0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.7.1",
-        "cookie-signature": "1.0.6",
+        "cookie": "~0.7.1",
+        "cookie-signature": "~1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
         "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.3.1",
-        "fresh": "0.5.2",
-        "http-errors": "2.0.0",
+        "finalhandler": "~1.3.1",
+        "fresh": "~0.5.2",
+        "http-errors": "~2.0.0",
         "merge-descriptors": "1.0.3",
         "methods": "~1.1.2",
-        "on-finished": "2.4.1",
+        "on-finished": "~2.4.1",
         "parseurl": "~1.3.3",
-        "path-to-regexp": "0.1.12",
+        "path-to-regexp": "~0.1.12",
         "proxy-addr": "~2.0.7",
-        "qs": "6.13.0",
+        "qs": "~6.14.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
-        "send": "0.19.0",
-        "serve-static": "1.16.2",
+        "send": "~0.19.0",
+        "serve-static": "~1.16.2",
         "setprototypeof": "1.2.0",
-        "statuses": "2.0.1",
+        "statuses": "~2.0.1",
         "type-is": "~1.6.18",
         "utils-merge": "1.0.1",
         "vary": "~1.1.2"
@@ -5303,6 +5316,15 @@
           "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
           "dev": true
         },
+        "qs": {
+          "version": "6.14.0",
+          "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
+          "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==",
+          "dev": true,
+          "requires": {
+            "side-channel": "^1.1.0"
+          }
+        },
         "safe-buffer": {
           "version": "5.2.1",
           "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@@ -6007,9 +6029,9 @@
       "dev": true
     },
     "node-forge": {
-      "version": "1.3.1",
-      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.1.tgz",
-      "integrity": "sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA==",
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.2.tgz",
+      "integrity": "sha512-6xKiQ+cph9KImrRh0VsjH2d8/GXA4FIMlgU4B757iI1ApvcyA9VlouP0yZJha01V+huImO+kKMU7ih+2+E14fw==",
       "dev": true
     },
     "node-releases": {
diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs
index d2efe995f100d..b20e6c24ffeaa 100644
--- a/datafusion/wasmtest/src/lib.rs
+++ b/datafusion/wasmtest/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -98,7 +99,7 @@ mod test {
     };
     use datafusion_physical_plan::collect;
     use datafusion_sql::parser::DFParser;
-    use object_store::{memory::InMemory, path::Path, ObjectStore};
+    use object_store::{ObjectStore, memory::InMemory, path::Path};
     use url::Url;
     use wasm_bindgen_test::wasm_bindgen_test;
 
diff --git a/dev/changelog/51.0.0.md b/dev/changelog/51.0.0.md
new file mode 100644
index 0000000000000..60dd24cde5595
--- /dev/null
+++ b/dev/changelog/51.0.0.md
@@ -0,0 +1,717 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 51.0.0 Changelog
+
+This release consists of 537 commits from 129 contributors. See credits at the end of this changelog for more information.
+
+See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions.
+
+**Breaking changes:**
+
+- Introduce `TypeSignatureClass::Binary` to allow accepting arbitrarily sized `FixedSizeBinary` arguments [#17531](https://github.com/apache/datafusion/pull/17531) (Jefffrey)
+- feat: change `datafusion-proto` to use `TaskContext` rather than`SessionContext` for physical plan serialization [#17601](https://github.com/apache/datafusion/pull/17601) (milenkovicm)
+- chore: refactor usage of `reassign_predicate_columns` [#17703](https://github.com/apache/datafusion/pull/17703) (rkrishn7)
+- fix: correct edge case where null haystack returns false instead of null [#17818](https://github.com/apache/datafusion/pull/17818) (Jefffrey)
+- clean up duplicate information in FileOpener trait [#17956](https://github.com/apache/datafusion/pull/17956) (adriangb)
+- refactor : deprecate `ParquetSource::predicate()` and merge into `FileSource::filter()` [#17971](https://github.com/apache/datafusion/pull/17971) (getChan)
+- feat: convert_array_to_scalar_vec respects null elements [#17891](https://github.com/apache/datafusion/pull/17891) (vegarsti)
+- make Union::try_new pub [#18125](https://github.com/apache/datafusion/pull/18125) (leoyvens)
+- refactor: remove unused `type_coercion/aggregate.rs` functions [#18091](https://github.com/apache/datafusion/pull/18091) (Jefffrey)
+- refactor: remove core crate from datafusion-proto [#18123](https://github.com/apache/datafusion/pull/18123) (timsaucer)
+- Use TableSchema in FileScanConfig [#18231](https://github.com/apache/datafusion/pull/18231) (adriangb)
+- Enable placeholders with extension types [#17986](https://github.com/apache/datafusion/pull/17986) (paleolimbot)
+- Implement `DESCRIBE SELECT` to show schema rather than `EXPLAIN` plan [#18238](https://github.com/apache/datafusion/pull/18238) (djanderson)
+- Push partition_statistics into DataSource [#18233](https://github.com/apache/datafusion/pull/18233) (adriangb)
+- Let `FileScanConfig` own a list of `ProjectionExpr`s [#18253](https://github.com/apache/datafusion/pull/18253) (friendlymatthew)
+- Introduce `expr_fields` to `AccumulatorArgs` to hold input argument fields [#18100](https://github.com/apache/datafusion/pull/18100) (Jefffrey)
+- Rename `is_ordered_set_aggregate` to `supports_within_group_clause` for UDAFs [#18397](https://github.com/apache/datafusion/pull/18397) (Jefffrey)
+- Move generate_series projection logic into LazyMemoryStream [#18373](https://github.com/apache/datafusion/pull/18373) (mkleen)
+
+**Performance related:**
+
+- Improve `Hash` and `Ord` speed for `dyn LogicalType` [#17437](https://github.com/apache/datafusion/pull/17437) (findepi)
+- Faster `&&String::to_string` [#17583](https://github.com/apache/datafusion/pull/17583) (findepi)
+- perf: Simplify CASE for any WHEN TRUE [#17602](https://github.com/apache/datafusion/pull/17602) (petern48)
+- perf: Improve the performance of WINDOW functions with many partitions [#17528](https://github.com/apache/datafusion/pull/17528) (nuno-faria)
+- Avoid redundant Schema clones [#17643](https://github.com/apache/datafusion/pull/17643) (findepi)
+- Prevent exponential planning time for Window functions - v2 [#17684](https://github.com/apache/datafusion/pull/17684) (berkaysynnada)
+- Add case expr simplifiers for literal comparisons [#17743](https://github.com/apache/datafusion/pull/17743) (jackkleeman)
+- Enable Projection Pushdown Optimization for Recursive CTEs [#16696](https://github.com/apache/datafusion/pull/16696) (kosiew)
+- perf: Optimize CASE for any WHEN false [#17835](https://github.com/apache/datafusion/pull/17835) (petern48)
+- feat: Simplify `NOT(IN ..)` to `NOT IN` and `NOT (EXISTS ..)` to `NOT EXISTS` [#17848](https://github.com/apache/datafusion/pull/17848) (Tpt)
+- perf: Faster `string_agg()` aggregate function (1000x speed for no DISTINCT and ORDER case) [#17837](https://github.com/apache/datafusion/pull/17837) (2010YOUY01)
+- optimizer: allow projection pushdown through aliased recursive CTE references [#17875](https://github.com/apache/datafusion/pull/17875) (kosiew)
+- perf: Implement boolean group values [#17726](https://github.com/apache/datafusion/pull/17726) (ashdnazg)
+- #17838 Rewrite `regexp_like` calls as `~` and `*~` operator expressions when possible [#17839](https://github.com/apache/datafusion/pull/17839) (pepijnve)
+- perf: add to `aggregate_vectorized` bench benchmark for `PrimitiveGroupValueBuilder` as well [#17930](https://github.com/apache/datafusion/pull/17930) (rluvaton)
+- #17972 Restore case expr/expr optimisation while ensuring lazy evaluation [#17973](https://github.com/apache/datafusion/pull/17973) (pepijnve)
+- chore: use `NullBuffer::union` for Spark `concat` [#18087](https://github.com/apache/datafusion/pull/18087) (comphead)
+- Short circuit complex case evaluation modes as soon as possible [#17898](https://github.com/apache/datafusion/pull/17898) (pepijnve)
+- perf: Fix NLJ slow join with condition `array_has` [#18161](https://github.com/apache/datafusion/pull/18161) (2010YOUY01)
+- perf: improve `ScalarValue::to_array_of_size` for Boolean and some null values [#18180](https://github.com/apache/datafusion/pull/18180) (rluvaton)
+- Allow filter pushdown through AggregateExec [#18404](https://github.com/apache/datafusion/pull/18404) (LiaCastaneda)
+- Avoid scatter operation in `ExpressionOrExpression` case evaluation method [#18444](https://github.com/apache/datafusion/pull/18444) (pepijnve)
+
+**Implemented enhancements:**
+
+- feat: Implement `DFSchema.print_schema_tree()` method [#17459](https://github.com/apache/datafusion/pull/17459) (comphead)
+- feat(spark): implement Spark `length` function [#17475](https://github.com/apache/datafusion/pull/17475) (wForget)
+- feat: Add binary to `join_fuzz` testing [#17497](https://github.com/apache/datafusion/pull/17497) (jonathanc-n)
+- feat: Support log for Decimal128 and Decimal256 [#17023](https://github.com/apache/datafusion/pull/17023) (theirix)
+- feat(spark): implement Spark bitwise function shiftleft/shiftright/shiftrightunsighed [#17013](https://github.com/apache/datafusion/pull/17013) (chenkovsky)
+- feat: Ensure explain format in config is valid [#17549](https://github.com/apache/datafusion/pull/17549) (Weijun-H)
+- feat: Simplify CASE WHEN true THEN expr to expr [#17450](https://github.com/apache/datafusion/pull/17450) (EeshanBembi)
+- feat: add `sql` feature to make sql planning optional [#17332](https://github.com/apache/datafusion/pull/17332) (timsaucer)
+- feat: Add `OR REPLACE` to creating external tables [#17580](https://github.com/apache/datafusion/pull/17580) (jonathanc-n)
+- feat(substrait): add support for RightAnti and RightSemi join types [#17604](https://github.com/apache/datafusion/pull/17604) (bvolpato)
+- feat(small): Display `NullEquality` in join executor's `EXPLAIN` output [#17664](https://github.com/apache/datafusion/pull/17664) (2010YOUY01)
+- feat(substrait): add time literal support [#17655](https://github.com/apache/datafusion/pull/17655) (bvolpato)
+- feat(spark): implement Spark `make_interval` function [#17424](https://github.com/apache/datafusion/pull/17424) (davidlghellin)
+- feat: expose `udafs` and `udwfs` methods on `FunctionRegistry` [#17650](https://github.com/apache/datafusion/pull/17650) (milenkovicm)
+- feat: Support Seconds and Milliseconds literals in substrait [#17707](https://github.com/apache/datafusion/pull/17707) (petern48)
+- feat: support for null, date, and timestamp types in approx_distinct [#17618](https://github.com/apache/datafusion/pull/17618) (killme2008)
+- feat: support `Utf8View` for more args of `regexp_replace` [#17195](https://github.com/apache/datafusion/pull/17195) (mbutrovich)
+- feat(spark): implement Spark `map` function `map_from_arrays` [#17456](https://github.com/apache/datafusion/pull/17456) (SparkApplicationMaster)
+- feat: Display window function's alias name in output column [#17788](https://github.com/apache/datafusion/pull/17788) (devampatel03)
+- feat(spark): implement Spark `make_dt_interval` function [#17728](https://github.com/apache/datafusion/pull/17728) (davidlghellin)
+- feat: support multi-threaded writing of Parquet files with modular encryption [#16738](https://github.com/apache/datafusion/pull/16738) (rok)
+- feat(spark): implement Spark `map` function `map_from_entries` [#17779](https://github.com/apache/datafusion/pull/17779) (SparkApplicationMaster)
+- feat: Add Hash Join benchmarks [#17636](https://github.com/apache/datafusion/pull/17636) (jonathanc-n)
+- feat: Support swap for `RightMark` Join [#17651](https://github.com/apache/datafusion/pull/17651) (jonathanc-n)
+- feat: support spark udf format_string [#17561](https://github.com/apache/datafusion/pull/17561) (chenkovsky)
+- feat(spark): implement Spark `try_parse_url` function [#17485](https://github.com/apache/datafusion/pull/17485) (rafafrdz)
+- feat: Support reading CSV files with inconsistent column counts [#17553](https://github.com/apache/datafusion/pull/17553) (EeshanBembi)
+- feat: Adds Instrumented Object Store Registry to datafusion-cli [#17953](https://github.com/apache/datafusion/pull/17953) (BlakeOrth)
+- feat: add cargo-machete in CI [#18030](https://github.com/apache/datafusion/pull/18030) (Weijun-H)
+- feat(spark): implement Spark `elt` function [#17729](https://github.com/apache/datafusion/pull/17729) (davidlghellin)
+- feat: support Spark `concat` string function [#18063](https://github.com/apache/datafusion/pull/18063) (comphead)
+- feat: support `null_treatment`, `distinct`, and `filter` for window functions in proto [#18024](https://github.com/apache/datafusion/pull/18024) (dqkqd)
+- feat: Add percentile_cont aggregate function [#17988](https://github.com/apache/datafusion/pull/17988) (adriangb)
+- feat: spark udf array shuffle [#17674](https://github.com/apache/datafusion/pull/17674) (chenkovsky)
+- feat: Support configurable `EXPLAIN ANALYZE` detail level [#18098](https://github.com/apache/datafusion/pull/18098) (2010YOUY01)
+- feat: add fp16 support to Substrait [#18086](https://github.com/apache/datafusion/pull/18086) (westonpace)
+- feat: `ClassicJoin` for PWMJ [#17482](https://github.com/apache/datafusion/pull/17482) (jonathanc-n)
+- feat(docs): display compatible logo for dark mode [#18197](https://github.com/apache/datafusion/pull/18197) (foskey51)
+- feat: Add `deregister_object_store` [#17999](https://github.com/apache/datafusion/pull/17999) (jonathanc-n)
+- feat: Add existence join to NestedLoopJoin benchmarks [#18005](https://github.com/apache/datafusion/pull/18005) (jonathanc-n)
+- feat(small): Set 'summary' level metrics for `DataSourceExec` with parquet source [#18196](https://github.com/apache/datafusion/pull/18196) (2010YOUY01)
+- feat: be indifferent to padding when decoding base64 [#18264](https://github.com/apache/datafusion/pull/18264) (colinmarc)
+- feat: Add `output_bytes` to baseline metrics [#18268](https://github.com/apache/datafusion/pull/18268) (2010YOUY01)
+- feat: Introduce `PruningMetrics` and use it in parquet file pruning metric [#18297](https://github.com/apache/datafusion/pull/18297) (2010YOUY01)
+- feat: Improve metrics for aggregate streams. [#18325](https://github.com/apache/datafusion/pull/18325) (EmilyMatt)
+- feat: allow pushdown of dynamic filters having partition cols [#18172](https://github.com/apache/datafusion/pull/18172) (feniljain)
+- feat: support temporary views in DataFrameTableProvider [#18158](https://github.com/apache/datafusion/pull/18158) (r1b)
+- feat: Better parquet row-group/page pruning metrics display [#18321](https://github.com/apache/datafusion/pull/18321) (2010YOUY01)
+- feat: Add Hash trait to StatsType enum [#18382](https://github.com/apache/datafusion/pull/18382) (rluvaton)
+- feat: support get_field for map literal [#18371](https://github.com/apache/datafusion/pull/18371) (chenkovsky)
+- feat(docs): enable navbar [#18324](https://github.com/apache/datafusion/pull/18324) (foskey51)
+- feat: Add `selectivity` metrics to `FilterExec` [#18406](https://github.com/apache/datafusion/pull/18406) (2010YOUY01)
+- feat: Add `reduction_factor` metric to `AggregateExec` for EXPLAIN ANALYZE [#18455](https://github.com/apache/datafusion/pull/18455) (petern48)
+- feat: support named arguments for aggregate and window udfs [#18389](https://github.com/apache/datafusion/pull/18389) (bubulalabu)
+- feat: Add selectivity metric to NestedLoopJoinExec for EXPLAIN ANALYZE [#18481](https://github.com/apache/datafusion/pull/18481) (petern48)
+
+**Fixed bugs:**
+
+- fix: lazy evaluation for coalesce [#17357](https://github.com/apache/datafusion/pull/17357) (chenkovsky)
+- fix: Implement AggregateUDFImpl::reverse_expr for StringAgg [#17165](https://github.com/apache/datafusion/pull/17165) (nuno-faria)
+- fix: Support aggregate expressions in `QUALIFY` [#17313](https://github.com/apache/datafusion/pull/17313) (rkrishn7)
+- fix: synchronize partition bounds reporting in HashJoin [#17452](https://github.com/apache/datafusion/pull/17452) (rkrishn7)
+- fix: correct typos in `CONTRIBUTING.md` [#17507](https://github.com/apache/datafusion/pull/17507) (Weijun-H)
+- fix: Add AWS environment variable checks for S3 tests [#17519](https://github.com/apache/datafusion/pull/17519) (Weijun-H)
+- fix: Ensure the CachedParquetFileReader respects the metadata prefetch hint [#17302](https://github.com/apache/datafusion/pull/17302) (nuno-faria)
+- fix: prevent UnionExec panic with empty inputs [#17449](https://github.com/apache/datafusion/pull/17449) (EeshanBembi)
+- fix: ignore non-existent columns when adding filter equivalence info in `FileScanConfig` [#17546](https://github.com/apache/datafusion/pull/17546) (rkrishn7)
+- fix: Prevent duplicate expressions in DynamicPhysicalExpr [#17551](https://github.com/apache/datafusion/pull/17551) (UBarney)
+- fix: `SortExec` `TopK` OOM [#17622](https://github.com/apache/datafusion/pull/17622) (nuno-faria)
+- fix: Change `OuterReferenceColumn` to contain the entire outer field to prevent metadata loss [#17524](https://github.com/apache/datafusion/pull/17524) (Kontinuation)
+- fix: Preserves field metadata when creating logical plan for VALUES expression [#17525](https://github.com/apache/datafusion/pull/17525) (Kontinuation)
+- fix: Ignore governance doc from typos [#17678](https://github.com/apache/datafusion/pull/17678) (rkrishn7)
+- fix: null padding for `array_reverse` on `FixedSizeList` [#17673](https://github.com/apache/datafusion/pull/17673) (chenkovsky)
+- fix: correct statistics for `NestedLoopJoinExec` [#17680](https://github.com/apache/datafusion/pull/17680) (duongcongtoai)
+- fix: Partial AggregateMode will generate duplicate field names which will fail DFSchema construct [#17706](https://github.com/apache/datafusion/pull/17706) (zhuqi-lucas)
+- fix: Remove parquet encryption feature from root deps [#17700](https://github.com/apache/datafusion/pull/17700) (Vyquos)
+- fix: Remove datafusion-macros's dependency on datafusion-expr [#17688](https://github.com/apache/datafusion/pull/17688) (yutannihilation)
+- fix: Filter out nulls properly in approx_percentile_cont_with_weight [#17780](https://github.com/apache/datafusion/pull/17780) (Jefffrey)
+- fix: ignore `DataType::Null` in possible types during csv type inference [#17796](https://github.com/apache/datafusion/pull/17796) (dqkqd)
+- fix: `ParquetSource` - `with_predicate()` don't have to reset metrics [#17858](https://github.com/apache/datafusion/pull/17858) (2010YOUY01)
+- fix: optimizer `common_sub_expression_eliminate` fails in a window function [#17852](https://github.com/apache/datafusion/pull/17852) (dqkqd)
+- fix: fix failing test compilation on main [#17955](https://github.com/apache/datafusion/pull/17955) (Jefffrey)
+- fix: update `PrimitiveGroupValueBuilder` to match NaN correctly in scalar `equal_to` [#17979](https://github.com/apache/datafusion/pull/17979) (rluvaton)
+- fix: Add overflow checks to SparkDateAdd/Sub to avoid panics [#18013](https://github.com/apache/datafusion/pull/18013) (andygrove)
+- fix: Ensure ListingTable partitions are pruned when filters are not used [#17958](https://github.com/apache/datafusion/pull/17958) (peasee)
+- fix: Improve null handling in array_to_string function [#18076](https://github.com/apache/datafusion/pull/18076) (Weijun-H)
+- fix: Re-bump latest datafusion-testing module so extended tests succeed [#18110](https://github.com/apache/datafusion/pull/18110) (Jefffrey)
+- fix: window unparsing [#17367](https://github.com/apache/datafusion/pull/17367) (chenkovsky)
+- fix: Add dictionary coercion support for numeric comparison operations [#18099](https://github.com/apache/datafusion/pull/18099) (ahmed-mez)
+- fix(substrait): schema errors for Aggregates with no groupings [#17909](https://github.com/apache/datafusion/pull/17909) (vbarua)
+- fix: `array_distinct` inner nullability causing type mismatch [#18104](https://github.com/apache/datafusion/pull/18104) (dqkqd)
+- fix: improve document ui [#18157](https://github.com/apache/datafusion/pull/18157) (getChan)
+- fix(docs): resolve extra outline on tables [#18193](https://github.com/apache/datafusion/pull/18193) (foskey51)
+- fix: Use dynamic timezone in now() function for accurate timestamp [#18017](https://github.com/apache/datafusion/pull/18017) (Weijun-H)
+- fix: UnnestExec preserves relevant equivalence properties of input [#16985](https://github.com/apache/datafusion/pull/16985) (vegarsti)
+- fix: wrong simplification for >= >, <= < [#18222](https://github.com/apache/datafusion/pull/18222) (chenkovsky)
+- fix: only fall back to listing prefixes on 404 errors [#18263](https://github.com/apache/datafusion/pull/18263) (colinmarc)
+- fix: Support Dictionary[Int32, Binary] for bitmap count spark function [#18273](https://github.com/apache/datafusion/pull/18273) (kazantsev-maksim)
+- fix: support float16 for `abs()` [#18304](https://github.com/apache/datafusion/pull/18304) (Jefffrey)
+- fix: Add WITH ORDER display in information_schema.views [#18282](https://github.com/apache/datafusion/pull/18282) (gene-bordegaray)
+- fix: correct date_trunc for times before the epoch [#18356](https://github.com/apache/datafusion/pull/18356) (mhilton)
+- fix: Preserve percent-encoding in `PartitionedFile` paths during deserialization [#18346](https://github.com/apache/datafusion/pull/18346) (lonless9)
+- fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression [#18342](https://github.com/apache/datafusion/pull/18342) (watford-ep)
+- fix: `DataFrame::select_columns` and `DataFrame::drop_columns` for qualified duplicated field names [#18236](https://github.com/apache/datafusion/pull/18236) (dqkqd)
+- fix(docs): remove navbar padding breaking ui on mobile [#18402](https://github.com/apache/datafusion/pull/18402) (foskey51)
+- fix: null cast not valid in substrait round trip [#18414](https://github.com/apache/datafusion/pull/18414) (gene-bordegaray)
+- fix: map benchmark failing [#18469](https://github.com/apache/datafusion/pull/18469) (randyli)
+- fix: eliminate warning when building without sql feature [#18480](https://github.com/apache/datafusion/pull/18480) (corasaurus-hex)
+- fix: spark array return type mismatch when inner data type is LargeList [#18485](https://github.com/apache/datafusion/pull/18485) (jizezhang)
+- fix: shuffle seed [#18518](https://github.com/apache/datafusion/pull/18518) (chenkovsky)
+
+**Documentation updates:**
+
+- Auto detect hive column partitioning with ListingTableFactory / `CREATE EXTERNAL TABLE` [#17232](https://github.com/apache/datafusion/pull/17232) (BlakeOrth)
+- Rename Blaze to Auron [#17532](https://github.com/apache/datafusion/pull/17532) (merrily01)
+- Revert #17295 (Support from-first SQL syntax) [#17520](https://github.com/apache/datafusion/pull/17520) (adriangb)
+- minor: Update doc comments on type signature [#17556](https://github.com/apache/datafusion/pull/17556) (Jefffrey)
+- docs: Update documentation on Epics and Supervising Maintainers [#17505](https://github.com/apache/datafusion/pull/17505) (alamb)
+- docs: Move Google Summer of Code 2025 pages to a section [#17504](https://github.com/apache/datafusion/pull/17504) (alamb)
+- Upgrade to arrow 56.1.0 [#17275](https://github.com/apache/datafusion/pull/17275) (alamb)
+- docs: add xorq to list of known users [#17668](https://github.com/apache/datafusion/pull/17668) (dlovell)
+- docs: deduplicate links in `introduction.md` [#17669](https://github.com/apache/datafusion/pull/17669) (Jefffrey)
+- Add explicit PMC/committers list to governance docs page [#17574](https://github.com/apache/datafusion/pull/17574) (alamb)
+- chore: Update READMEs of crates to be more consistent [#17691](https://github.com/apache/datafusion/pull/17691) (Jefffrey)
+- chore: fix wasm-pack installation link in wasmtest README [#17704](https://github.com/apache/datafusion/pull/17704) (Jefffrey)
+- docs: Remove disclaimer that `datafusion` 50.0.0 is not released [#17695](https://github.com/apache/datafusion/pull/17695) (nuno-faria)
+- Bump MSRV to 1.87.0 [#17724](https://github.com/apache/datafusion/pull/17724) (findepi)
+- docs: Fix 'Clicking a link in optimizer docs downloads the file instead of redirecting to github' [#17723](https://github.com/apache/datafusion/pull/17723) (petern48)
+- Move misplaced upgrading entry about MSRV [#17727](https://github.com/apache/datafusion/pull/17727) (findepi)
+- Introduce `avg_distinct()` and `sum_distinct()` functions to DataFrame API [#17536](https://github.com/apache/datafusion/pull/17536) (Jefffrey)
+- Support `WHERE`, `ORDER BY`, `LIMIT`, `SELECT`, `EXTEND` pipe operators [#17278](https://github.com/apache/datafusion/pull/17278) (simonvandel)
+- doc: add missing examples for multiple math functions [#17018](https://github.com/apache/datafusion/pull/17018) (Adez017)
+- chore: remove homebrew publish instructions from release steps [#17735](https://github.com/apache/datafusion/pull/17735) (Jefffrey)
+- Improve documentation for ordered set aggregate functions [#17744](https://github.com/apache/datafusion/pull/17744) (alamb)
+- docs: fix sidebar overlapping table on configuration page on website [#17738](https://github.com/apache/datafusion/pull/17738) (saimahendra282)
+- docs: add Ballista link to landing page (#17746) [#17775](https://github.com/apache/datafusion/pull/17775) (Nihallllll)
+- [DOCS] Add dbt Fusion engine and R2 Query Engine to "Known Users" [#17793](https://github.com/apache/datafusion/pull/17793) (dataders)
+- docs: update wasmtest README with instructions for Apple silicon [#17755](https://github.com/apache/datafusion/pull/17755) (Jefffrey)
+- docs: Add SedonaDB as known user of Apache DataFusion [#17806](https://github.com/apache/datafusion/pull/17806) (petern48)
+- minor: simplify docs build process & pin pip package versions [#17816](https://github.com/apache/datafusion/pull/17816) (Jefffrey)
+- Cleanup user guide known users section [#17834](https://github.com/apache/datafusion/pull/17834) (blaginin)
+- Fix the doc about row_groups pruning metrics in explain_usage.md [#17846](https://github.com/apache/datafusion/pull/17846) (xudong963)
+- Fix docs.rs build: Replace `auto_doc_cfg` with `doc_cfg` [#17845](https://github.com/apache/datafusion/pull/17845) (mbrobbel)
+- docs: Add rerun.io to known users guide [#17825](https://github.com/apache/datafusion/pull/17825) (alamb)
+- chore: fix typos & pin action hashes [#17855](https://github.com/apache/datafusion/pull/17855) (Jefffrey)
+- Clarify email reply instructions for invitations [#17851](https://github.com/apache/datafusion/pull/17851) (rluvaton)
+- Add missing parenthesis in features documentation [#17869](https://github.com/apache/datafusion/pull/17869) (Viicos)
+- Improve comments for DataSinkExec [#17873](https://github.com/apache/datafusion/pull/17873) (xudong963)
+- minor: Make `FunctionRegistry` `udafs` and `udwfs` methods mandatory [#17847](https://github.com/apache/datafusion/pull/17847) (milenkovicm)
+- docs: Improve documentation for FunctionFactory / CREATE FUNCTION [#17859](https://github.com/apache/datafusion/pull/17859) (alamb)
+- Support `AS`, `UNION`, `INTERSECTION`, `EXCEPT`, `AGGREGATE` pipe operators [#17312](https://github.com/apache/datafusion/pull/17312) (simonvandel)
+- [forward port] Change version to 50.1.0 and add changelog (#17748) [#17826](https://github.com/apache/datafusion/pull/17826) (alamb)
+- chore(deps): bump maturin from 1.9.4 to 1.9.5 in /docs [#17940](https://github.com/apache/datafusion/pull/17940) (dependabot[bot])
+- docs: `Window::try_new_with_schema` with a descriptive error message [#17926](https://github.com/apache/datafusion/pull/17926) (dqkqd)
+- Support `JOIN` pipe operator [#17969](https://github.com/apache/datafusion/pull/17969) (simonvandel)
+- Adds Object Store Profiling options/commands to CLI [#18004](https://github.com/apache/datafusion/pull/18004) (BlakeOrth)
+- docs: typo in `working-with-exprs.md` [#18033](https://github.com/apache/datafusion/pull/18033) (Weijun-H)
+- chore(deps): bump maturin from 1.9.5 to 1.9.6 in /docs [#18039](https://github.com/apache/datafusion/pull/18039) (dependabot[bot])
+- [forward port] Change version to 50.2.0 and add changelog [#18057](https://github.com/apache/datafusion/pull/18057) (xudong963)
+- Update committers on governance page [#18015](https://github.com/apache/datafusion/pull/18015) (alamb)
+- Feat: Make current_date aware of execution timezone. [#18034](https://github.com/apache/datafusion/pull/18034) (codetyri0n)
+- Add independent configs for topk/join dynamic filter [#18090](https://github.com/apache/datafusion/pull/18090) (xudong963)
+- Adds Trace and Summary to CLI instrumented stores [#18064](https://github.com/apache/datafusion/pull/18064) (BlakeOrth)
+- refactor: add dialect enum [#18043](https://github.com/apache/datafusion/pull/18043) (dariocurr)
+- #17982 Make `nvl` a thin wrapper for `coalesce` [#17991](https://github.com/apache/datafusion/pull/17991) (pepijnve)
+- minor: fix incorrect deprecation version & window docs [#18093](https://github.com/apache/datafusion/pull/18093) (Jefffrey)
+- Adding hiop as known user [#18114](https://github.com/apache/datafusion/pull/18114) (enryls)
+- Improve datafusion-cli object store profiling summary display [#18085](https://github.com/apache/datafusion/pull/18085) (alamb)
+- Feat: Make current_time aware of execution timezone. [#18040](https://github.com/apache/datafusion/pull/18040) (codetyri0n)
+- Docs: Update SQL example for current_time() and current_date(). [#18200](https://github.com/apache/datafusion/pull/18200) (codetyri0n)
+- doc: Add `Metrics` section to the user-guide [#18216](https://github.com/apache/datafusion/pull/18216) (2010YOUY01)
+- docs: Update HOWTOs for adding new functions [#18089](https://github.com/apache/datafusion/pull/18089) (Jefffrey)
+- docs: fix trim for `rust,ignore` blocks [#18239](https://github.com/apache/datafusion/pull/18239) (Jefffrey)
+- docs: refine `AggregateUDFImpl::is_ordered_set_aggregate` documentation [#17805](https://github.com/apache/datafusion/pull/17805) (Jefffrey)
+- docs: fix broken SQL & DataFrame links in root README (#18153) [#18274](https://github.com/apache/datafusion/pull/18274) (manasa-manoj-nbr)
+- doc: Contributor guide for AI-generated PRs [#18237](https://github.com/apache/datafusion/pull/18237) (2010YOUY01)
+- doc: Add Join Physical Plan documentation, and configuration flag to benchmarks [#18209](https://github.com/apache/datafusion/pull/18209) (jonathanc-n)
+- "Gentle Introduction to Arrow / Record Batches" #11336 [#18051](https://github.com/apache/datafusion/pull/18051) (sm4rtm4art)
+- Upgrade DataFusion to arrow/parquet 57.0.0 [#17888](https://github.com/apache/datafusion/pull/17888) (alamb)
+- Deduplicate range/gen_series nested functions code [#18198](https://github.com/apache/datafusion/pull/18198) (Jefffrey)
+- minor: doc fixes for timestamp output format [#18315](https://github.com/apache/datafusion/pull/18315) (Jefffrey)
+- Add PostgreSQL-style named arguments support for scalar functions [#18019](https://github.com/apache/datafusion/pull/18019) (bubulalabu)
+- Change default prefetch_hint to 512Kb to reduce number of object store requests when reading parquet files [#18160](https://github.com/apache/datafusion/pull/18160) (zhuqi-lucas)
+- Bump MSRV to 1.88.0 [#18403](https://github.com/apache/datafusion/pull/18403) (harshasiddartha)
+- Change default `time_zone` to `None` (was `"+00:00"`) [#18359](https://github.com/apache/datafusion/pull/18359) (Omega359)
+- Fix instances of "the the" to be "the" in comments/docs [#18478](https://github.com/apache/datafusion/pull/18478) (corasaurus-hex)
+- Update roadmap links for DataFusion Q1 2026 [#18495](https://github.com/apache/datafusion/pull/18495) (alamb)
+- Add a SpillingPool to manage collections of spill files [#18207](https://github.com/apache/datafusion/pull/18207) (adriangb)
+- [branch-51] Update version to 51.0.0, add Changelog [#18551](https://github.com/apache/datafusion/pull/18551) (alamb)
+- [branch-51] Revert rewrite for coalesce, `nvl` and `nvl2` simplification [#18567](https://github.com/apache/datafusion/pull/18567) (alamb)
+
+**Other:**
+
+- Extract complex default impls from AggregateUDFImpl trait [#17391](https://github.com/apache/datafusion/pull/17391) (findepi)
+- chore: make `TableFunction` clonable [#17457](https://github.com/apache/datafusion/pull/17457) (sunng87)
+- chore(deps): bump wasm-bindgen-test from 0.3.50 to 0.3.51 [#17470](https://github.com/apache/datafusion/pull/17470) (dependabot[bot])
+- chore(deps): bump log from 0.4.27 to 0.4.28 [#17471](https://github.com/apache/datafusion/pull/17471) (dependabot[bot])
+- Support csv truncated rows in datafusion [#17465](https://github.com/apache/datafusion/pull/17465) (zhuqi-lucas)
+- chore(deps): bump indexmap from 2.11.0 to 2.11.1 [#17484](https://github.com/apache/datafusion/pull/17484) (dependabot[bot])
+- chore(deps): bump chrono from 0.4.41 to 0.4.42 [#17483](https://github.com/apache/datafusion/pull/17483) (dependabot[bot])
+- Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and `PartialOrd` consistent [#17442](https://github.com/apache/datafusion/pull/17442) (findepi)
+- Fix array types coercion: preserve child element nullability for list types [#17306](https://github.com/apache/datafusion/pull/17306) (sgrebnov)
+- better preserve statistics when applying limits [#17381](https://github.com/apache/datafusion/pull/17381) (adriangb)
+- Refactor HashJoinExec to progressively accumulate dynamic filter bounds instead of computing them after data is accumulated [#17444](https://github.com/apache/datafusion/pull/17444) (adriangb)
+- Fix `PartialOrd` for logical plan nodes and expressions [#17438](https://github.com/apache/datafusion/pull/17438) (findepi)
+- chore(deps): bump sqllogictest from 0.28.3 to 0.28.4 [#17500](https://github.com/apache/datafusion/pull/17500) (dependabot[bot])
+- chore(deps): bump tempfile from 3.21.0 to 3.22.0 [#17499](https://github.com/apache/datafusion/pull/17499) (dependabot[bot])
+- refactor: Move `SMJ` tests into own file [#17495](https://github.com/apache/datafusion/pull/17495) (jonathanc-n)
+- move MinAggregator and MaxAggregator to functions-aggregate-common [#17492](https://github.com/apache/datafusion/pull/17492) (adriangb)
+- Update datafusion-testing pin to update expected output for extended tests [#17490](https://github.com/apache/datafusion/pull/17490) (alamb)
+- update physical-plan to use datafusion-functions-aggregate-common for Min/MaxAccumulator [#17502](https://github.com/apache/datafusion/pull/17502) (adriangb)
+- bug: Always use 'indent' format for explain verbose [#17481](https://github.com/apache/datafusion/pull/17481) (petern48)
+- Fix ambiguous column names in substrait conversion as a result of literals having the same name during conversion. [#17299](https://github.com/apache/datafusion/pull/17299) (xanderbailey)
+- Fix NULL Arithmetic Handling for Numerical Operators in Type Coercion [#17418](https://github.com/apache/datafusion/pull/17418) (etolbakov)
+- Prepare for Merge Queue [#17183](https://github.com/apache/datafusion/pull/17183) (blaginin)
+- bug: Support null as argument to to_local_time [#17491](https://github.com/apache/datafusion/pull/17491) (petern48)
+- Implement timestamp_cast_dtype for SqliteDialect [#17479](https://github.com/apache/datafusion/pull/17479) (krinart)
+- Disable `required_status_checks` for now [#17537](https://github.com/apache/datafusion/pull/17537) (blaginin)
+- Update Bug issue template to use Bug issue type [#17540](https://github.com/apache/datafusion/pull/17540) (findepi)
+- Fix predicate simplification for incompatible types in push_down_filter [#17521](https://github.com/apache/datafusion/pull/17521) (adriangb)
+- Add assertion that ScalarUDFImpl implementation is consistent with declared return type [#17515](https://github.com/apache/datafusion/pull/17515) (findepi)
+- Using `encode_arrow_schema` from arrow-rs. [#17543](https://github.com/apache/datafusion/pull/17543) (samueleresca)
+- Add test for decimal256 and float math [#17530](https://github.com/apache/datafusion/pull/17530) (Jefffrey)
+- Document how schema projection works. [#17250](https://github.com/apache/datafusion/pull/17250) (wiedld)
+- chore(deps): bump rust_decimal from 1.37.2 to 1.38.0 [#17564](https://github.com/apache/datafusion/pull/17564) (dependabot[bot])
+- chore(deps): bump semver from 1.0.26 to 1.0.27 [#17566](https://github.com/apache/datafusion/pull/17566) (dependabot[bot])
+- Generalize struct-to-struct casting with CastOptions and SchemaAdapter integration [#17468](https://github.com/apache/datafusion/pull/17468) (kosiew)
+- Add `TableProvider::scan_with_args` [#17336](https://github.com/apache/datafusion/pull/17336) (adriangb)
+- Use taiki-e/install-action and binstall in CI [#17573](https://github.com/apache/datafusion/pull/17573) (AdamGS)
+- Trying cargo machete to prune unused deps. [#17545](https://github.com/apache/datafusion/pull/17545) (samueleresca)
+- Fix typo in error message in `substring.rs` [#17570](https://github.com/apache/datafusion/pull/17570) (AdamGS)
+- chore(deps): bump taiki-e/install-action from 2.61.5 to 2.61.6 [#17586](https://github.com/apache/datafusion/pull/17586) (dependabot[bot])
+- datafusion/substrait: enable `unicode_expressions` in dev-dependencies to fix substring planning test [#17584](https://github.com/apache/datafusion/pull/17584) (kosiew)
+- chore: replace deprecated UnionExec API [#17588](https://github.com/apache/datafusion/pull/17588) (etolbakov)
+- minor: fix compilation issue for extended tests due to missing parquet encryption flag [#17579](https://github.com/apache/datafusion/pull/17579) (Jefffrey)
+- Update release README for new `datafusion/physical-expr-adapter` crate [#17591](https://github.com/apache/datafusion/pull/17591) (xudong963)
+- chore(deps): bump indexmap from 2.11.1 to 2.11.3 [#17587](https://github.com/apache/datafusion/pull/17587) (dependabot[bot])
+- chore(deps): bump serde_json from 1.0.143 to 1.0.145 [#17585](https://github.com/apache/datafusion/pull/17585) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.61.6 to 2.61.8 [#17615](https://github.com/apache/datafusion/pull/17615) (dependabot[bot])
+- Always run CI checks [#17538](https://github.com/apache/datafusion/pull/17538) (blaginin)
+- Revert "Always run CI checks" [#17629](https://github.com/apache/datafusion/pull/17629) (blaginin)
+- Bump datafusion-testing to latest [#17609](https://github.com/apache/datafusion/pull/17609) (Jefffrey)
+- Use `Display` formatting of `DataType`:s in error messages [#17565](https://github.com/apache/datafusion/pull/17565) (emilk)
+- `avg(distinct)` support for decimal types [#17560](https://github.com/apache/datafusion/pull/17560) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.61.8 to 2.61.9 [#17640](https://github.com/apache/datafusion/pull/17640) (dependabot[bot])
+- chore(deps): bump Swatinem/rust-cache from 2.8.0 to 2.8.1 [#17641](https://github.com/apache/datafusion/pull/17641) (dependabot[bot])
+- Validate the memory consumption in SPM created by multi level merge [#17029](https://github.com/apache/datafusion/pull/17029) (ding-young)
+- fix(SubqueryAlias): use maybe_project_redundant_column [#17478](https://github.com/apache/datafusion/pull/17478) (notfilippo)
+- minor: Ensure `datafusion-sql` package dependencies have `sql` flag [#17644](https://github.com/apache/datafusion/pull/17644) (Jefffrey)
+- optimizer: Rewrite `IS NOT DISTINCT FROM` joins as Hash Joins [#17319](https://github.com/apache/datafusion/pull/17319) (2010YOUY01)
+- chore(deps): bump serde from 1.0.223 to 1.0.225 [#17614](https://github.com/apache/datafusion/pull/17614) (dependabot[bot])
+- chore: Update dynamic filter formatting [#17647](https://github.com/apache/datafusion/pull/17647) (rkrishn7)
+- chore(deps): bump taiki-e/install-action from 2.61.9 to 2.61.10 [#17660](https://github.com/apache/datafusion/pull/17660) (dependabot[bot])
+- proto: don't include parquet feature by default [#17577](https://github.com/apache/datafusion/pull/17577) (jackkleeman)
+- minor: Ensure `proto` crate has datetime & unicode expr flags in datafusion dev dependency [#17656](https://github.com/apache/datafusion/pull/17656) (Jefffrey)
+- chore(deps): bump indexmap from 2.11.3 to 2.11.4 [#17661](https://github.com/apache/datafusion/pull/17661) (dependabot[bot])
+- Support Decimal32/64 types [#17501](https://github.com/apache/datafusion/pull/17501) (AdamGS)
+- minor: Improve hygiene for `datafusion-functions` macros [#17638](https://github.com/apache/datafusion/pull/17638) (Jefffrey)
+- [unparser] Custom timestamp format for DuckDB [#17653](https://github.com/apache/datafusion/pull/17653) (krinart)
+- Support LargeList for array_sort [#17657](https://github.com/apache/datafusion/pull/17657) (Jefffrey)
+- Support FixedSizeList for array_except [#17658](https://github.com/apache/datafusion/pull/17658) (Jefffrey)
+- chore: refactor array fn signatures & add more slt tests [#17672](https://github.com/apache/datafusion/pull/17672) (Jefffrey)
+- Support FixedSizeList for array_to_string [#17666](https://github.com/apache/datafusion/pull/17666) (Jefffrey)
+- minor: add SQLancer fuzzed SLT case for natural joins [#17683](https://github.com/apache/datafusion/pull/17683) (Jefffrey)
+- chore: Upgrade Rust version to 1.90.0 [#17677](https://github.com/apache/datafusion/pull/17677) (rkrishn7)
+- Support FixedSizeList for array_position [#17659](https://github.com/apache/datafusion/pull/17659) (Jefffrey)
+- chore(deps): bump the proto group with 2 updates [#16806](https://github.com/apache/datafusion/pull/16806) (dependabot[bot])
+- chore: update a bunch of dependencies [#17708](https://github.com/apache/datafusion/pull/17708) (Jefffrey)
+- Support FixedSizeList for array_slice via coercion to List [#17667](https://github.com/apache/datafusion/pull/17667) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.61.10 to 2.62.1 [#17710](https://github.com/apache/datafusion/pull/17710) (dependabot[bot])
+- fix(agg/corr): return NULL when variance is zero or samples < 2 [#17621](https://github.com/apache/datafusion/pull/17621) (killme2008)
+- chore(deps): bump taiki-e/install-action from 2.62.1 to 2.62.4 [#17739](https://github.com/apache/datafusion/pull/17739) (dependabot[bot])
+- chore(deps): bump tempfile from 3.22.0 to 3.23.0 [#17741](https://github.com/apache/datafusion/pull/17741) (dependabot[bot])
+- chore: make `LimitPushPastWindows` public [#17736](https://github.com/apache/datafusion/pull/17736) (linhr)
+- minor: create `OptimizerContext` with provided `ConfigOptions` [#17742](https://github.com/apache/datafusion/pull/17742) (MichaelScofield)
+- Add support for calling async UDF as aggregation expression [#17620](https://github.com/apache/datafusion/pull/17620) (simonvandel)
+- chore(deps): bump taiki-e/install-action from 2.62.4 to 2.62.5 [#17750](https://github.com/apache/datafusion/pull/17750) (dependabot[bot])
+- (fix): Lag function creates unwanted projection (#17630) [#17639](https://github.com/apache/datafusion/pull/17639) (renato2099)
+- Support `LargeList` in `array_has` simplification to `InList` [#17732](https://github.com/apache/datafusion/pull/17732) (Jefffrey)
+- chore(deps): bump wasm-bindgen-test from 0.3.51 to 0.3.53 [#17642](https://github.com/apache/datafusion/pull/17642) (dependabot[bot])
+- chore(deps): bump object_store from 0.12.3 to 0.12.4 [#17753](https://github.com/apache/datafusion/pull/17753) (dependabot[bot])
+- Update `arrow` / `parquet` to 56.2.0 [#17631](https://github.com/apache/datafusion/pull/17631) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.5 to 2.62.6 [#17766](https://github.com/apache/datafusion/pull/17766) (dependabot[bot])
+- Keep aggregate udaf schema names unique when missing an order-by [#17731](https://github.com/apache/datafusion/pull/17731) (wiedld)
+- feat : Display function alias in output column name [#17690](https://github.com/apache/datafusion/pull/17690) (devampatel03)
+- Support join cardinality estimation less conservatively [#17476](https://github.com/apache/datafusion/pull/17476) (jackkleeman)
+- chore(deps): bump libc from 0.2.175 to 0.2.176 [#17767](https://github.com/apache/datafusion/pull/17767) (dependabot[bot])
+- chore(deps): bump postgres-types from 0.2.9 to 0.2.10 [#17768](https://github.com/apache/datafusion/pull/17768) (dependabot[bot])
+- Use `Expr::qualified_name()` and `Column::new()` to extract partition keys from window and aggregate operators [#17757](https://github.com/apache/datafusion/pull/17757) (masonh22)
+- chore(deps): bump taiki-e/install-action from 2.62.6 to 2.62.8 [#17781](https://github.com/apache/datafusion/pull/17781) (dependabot[bot])
+- chore(deps): bump wasm-bindgen-test from 0.3.53 to 0.3.54 [#17784](https://github.com/apache/datafusion/pull/17784) (dependabot[bot])
+- chore: Action some old TODOs in github actions [#17694](https://github.com/apache/datafusion/pull/17694) (Jefffrey)
+- dev: Add benchmark for compilation profiles [#17754](https://github.com/apache/datafusion/pull/17754) (2010YOUY01)
+- chore(deps): bump tokio-postgres from 0.7.13 to 0.7.14 [#17785](https://github.com/apache/datafusion/pull/17785) (dependabot[bot])
+- chore(deps): bump serde from 1.0.226 to 1.0.227 [#17783](https://github.com/apache/datafusion/pull/17783) (dependabot[bot])
+- chore(deps): bump regex from 1.11.2 to 1.11.3 [#17782](https://github.com/apache/datafusion/pull/17782) (dependabot[bot])
+- Test `CAST` from temporal to `Utf8View` [#17535](https://github.com/apache/datafusion/pull/17535) (findepi)
+- chore: dependabot to run weekly [#17797](https://github.com/apache/datafusion/pull/17797) (comphead)
+- chore(deps): bump sysinfo from 0.37.0 to 0.37.1 [#17800](https://github.com/apache/datafusion/pull/17800) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.8 to 2.62.9 [#17799](https://github.com/apache/datafusion/pull/17799) (dependabot[bot])
+- Fix potential overflow when we print verbose physical plan [#17798](https://github.com/apache/datafusion/pull/17798) (zhuqi-lucas)
+- Extend datatype semantic equality check to include timestamps [#17777](https://github.com/apache/datafusion/pull/17777) (shivbhatia10)
+- dev: Add Apache license check to the lint script [#17787](https://github.com/apache/datafusion/pull/17787) (2010YOUY01)
+- Fix: common_sub_expression_eliminate optimizer rule failed [#16066](https://github.com/apache/datafusion/pull/16066) (Col-Waltz)
+- chore: remove dialect fixes in SLT tests that are outdated [#17807](https://github.com/apache/datafusion/pull/17807) (Jefffrey)
+- chore(deps): bump thiserror from 2.0.16 to 2.0.17 [#17821](https://github.com/apache/datafusion/pull/17821) (dependabot[bot])
+- chore(deps): bump quote from 1.0.40 to 1.0.41 [#17822](https://github.com/apache/datafusion/pull/17822) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.9 to 2.62.12 [#17823](https://github.com/apache/datafusion/pull/17823) (dependabot[bot])
+- chore(deps): bump serde from 1.0.227 to 1.0.228 [#17827](https://github.com/apache/datafusion/pull/17827) (dependabot[bot])
+- Temporarily disable failing `sql_planner` benchmark query [#17809](https://github.com/apache/datafusion/pull/17809) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.12 to 2.62.13 [#17836](https://github.com/apache/datafusion/pull/17836) (dependabot[bot])
+- More decimal 32/64 support - type coercsion and misc gaps [#17808](https://github.com/apache/datafusion/pull/17808) (AdamGS)
+- Implement `AsRef` for `Expr` [#17819](https://github.com/apache/datafusion/pull/17819) (findepi)
+- chore(deps): bump taiki-e/install-action from 2.62.13 to 2.62.14 [#17840](https://github.com/apache/datafusion/pull/17840) (dependabot[bot])
+- chore(deps): bump petgraph from 0.8.2 to 0.8.3 [#17842](https://github.com/apache/datafusion/pull/17842) (dependabot[bot])
+- Relax constraint that file sort order must only reference individual columns [#17419](https://github.com/apache/datafusion/pull/17419) (pepijnve)
+- minor: Include consumer name in OOM message [#17870](https://github.com/apache/datafusion/pull/17870) (andygrove)
+- Implement `partition_statistics` API for `InterleaveExec` [#17051](https://github.com/apache/datafusion/pull/17051) (liamzwbao)
+- Add `CastColumnExpr` for struct-aware column casting [#17773](https://github.com/apache/datafusion/pull/17773) (kosiew)
+- chore(deps): bump taiki-e/install-action from 2.62.14 to 2.62.16 [#17879](https://github.com/apache/datafusion/pull/17879) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.0 to 1.37.1 [#17878](https://github.com/apache/datafusion/pull/17878) (dependabot[bot])
+- Fix failing CI caused by hash collisions [#17886](https://github.com/apache/datafusion/pull/17886) (liamzwbao)
+- Minor: reuse test schemas in simplify tests [#17864](https://github.com/apache/datafusion/pull/17864) (alamb)
+- Make limit pushdown work for SortPreservingMergeExec [#17893](https://github.com/apache/datafusion/pull/17893) (Dandandan)
+- chore(deps): bump taiki-e/install-action from 2.62.16 to 2.62.17 [#17896](https://github.com/apache/datafusion/pull/17896) (dependabot[bot])
+- Consolidate `apply_schema_adapter_tests` [#17905](https://github.com/apache/datafusion/pull/17905) (alamb)
+- Improve `InListExpr` plan display [#17884](https://github.com/apache/datafusion/pull/17884) (pepijnve)
+- Export JoinSetTracerError from datafusion-common-runtime [#17877](https://github.com/apache/datafusion/pull/17877) (JanKaul)
+- Clippy to `extended_tests` [#17922](https://github.com/apache/datafusion/pull/17922) (blaginin)
+- chore: rename Schema `print_schema_tree` to `tree_string` [#17919](https://github.com/apache/datafusion/pull/17919) (comphead)
+- chore: utilize trait upcasting for AsyncScalarUDF PartialEq & Hash [#17872](https://github.com/apache/datafusion/pull/17872) (Jefffrey)
+- Refactor: Update enforce_sorting tests to use insta snapshots for easier updates [#17900](https://github.com/apache/datafusion/pull/17900) (alamb)
+- chore(deps): bump flate2 from 1.1.2 to 1.1.4 [#17938](https://github.com/apache/datafusion/pull/17938) (dependabot[bot])
+- chore(deps): bump actions/stale from 10.0.0 to 10.1.0 [#17937](https://github.com/apache/datafusion/pull/17937) (dependabot[bot])
+- chore(deps): bump aws-credential-types from 1.2.6 to 1.2.7 [#17936](https://github.com/apache/datafusion/pull/17936) (dependabot[bot])
+- chore(deps): bump rustyline from 17.0.1 to 17.0.2 [#17932](https://github.com/apache/datafusion/pull/17932) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.17 to 2.62.21 [#17934](https://github.com/apache/datafusion/pull/17934) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.1 to 1.37.2 [#17935](https://github.com/apache/datafusion/pull/17935) (dependabot[bot])
+- chore: upgrade sqlparser [#17925](https://github.com/apache/datafusion/pull/17925) (chenkovsky)
+- minor: impl Clone and Debug on CaseBuilder [#17927](https://github.com/apache/datafusion/pull/17927) (timsaucer)
+- chore: Extend backtrace coverage for `Execution` and `Internal` errors [#17921](https://github.com/apache/datafusion/pull/17921) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.21 to 2.62.22 [#17949](https://github.com/apache/datafusion/pull/17949) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.2 to 1.38.0 [#17948](https://github.com/apache/datafusion/pull/17948) (dependabot[bot])
+- Feat: [datafusion-spark] Migrate avg from comet to datafusion-spark and add tests. [#17871](https://github.com/apache/datafusion/pull/17871) (codetyri0n)
+- Update tests to use insta / make them easier to update [#17945](https://github.com/apache/datafusion/pull/17945) (alamb)
+- Minor Test refactor: avoid creating the same SchemaRef [#17951](https://github.com/apache/datafusion/pull/17951) (alamb)
+- Precision::<usize>::{add, sub, multiply}: avoid overflows [#17929](https://github.com/apache/datafusion/pull/17929) (Tpt)
+- Resolve `ListingScan` projection against table schema including partition columns [#17911](https://github.com/apache/datafusion/pull/17911) (mach-kernel)
+- chore(deps): bump crate-ci/typos from 1.38.0 to 1.38.1 [#17960](https://github.com/apache/datafusion/pull/17960) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.22 to 2.62.23 [#17959](https://github.com/apache/datafusion/pull/17959) (dependabot[bot])
+- bench: fix `vectorized_equal_to` bench mutated between iterations [#17968](https://github.com/apache/datafusion/pull/17968) (rluvaton)
+- fix docs and broken example from #17956 [#17980](https://github.com/apache/datafusion/pull/17980) (adriangb)
+- Refactor: Update `replace_with_order_preserving_variants` tests to use insta snapshots for easier updates [#17962](https://github.com/apache/datafusion/pull/17962) (blaginin)
+- Support repartitioned() method in RepartitionExec [#17990](https://github.com/apache/datafusion/pull/17990) (gabotechs)
+- Adds Instrumented Object Store to CLI [#17984](https://github.com/apache/datafusion/pull/17984) (BlakeOrth)
+- Migrate `join_selection` tests to snapshot-based testing [#17974](https://github.com/apache/datafusion/pull/17974) (blaginin)
+- bench: fix actually generate a lot of unique values in benchmark table [#17967](https://github.com/apache/datafusion/pull/17967) (rluvaton)
+- Adds Instrument Mode for InstrumentedObjectStore in datafusion-cli [#18000](https://github.com/apache/datafusion/pull/18000) (BlakeOrth)
+- minor: refactor Spark ascii function to reuse DataFusion ascii function code [#17965](https://github.com/apache/datafusion/pull/17965) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.62.23 to 2.62.24 [#17989](https://github.com/apache/datafusion/pull/17989) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.24 to 2.62.25 [#18007](https://github.com/apache/datafusion/pull/18007) (dependabot[bot])
+- Clarify documentation that ScalarUDFImpl::simplity must not change the schema [#17981](https://github.com/apache/datafusion/pull/17981) (alamb)
+- Expose trace_future and trace_block outside of common-runtime [#17976](https://github.com/apache/datafusion/pull/17976) (AdamGS)
+- Adds instrumentation to get requests for datafusion-cli [#18016](https://github.com/apache/datafusion/pull/18016) (BlakeOrth)
+- chore(deps): bump half from 2.6.0 to 2.7.0 [#18036](https://github.com/apache/datafusion/pull/18036) (dependabot[bot])
+- chore(deps): bump aws-config from 1.8.6 to 1.8.7 [#18038](https://github.com/apache/datafusion/pull/18038) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.25 to 2.62.28 [#18037](https://github.com/apache/datafusion/pull/18037) (dependabot[bot])
+- refactor: cleanup naming and macro usages for binary operator [#17985](https://github.com/apache/datafusion/pull/17985) (sunng87)
+- Impl `gather_filters_for_pushdown` for `CoalescePartitionsExec` [#18046](https://github.com/apache/datafusion/pull/18046) (xudong963)
+- Fix bug in LimitPushPastWindows [#18029](https://github.com/apache/datafusion/pull/18029) (avantgardnerio)
+- Fix `SortPreservingMergeExec` tree formatting with limit [#18009](https://github.com/apache/datafusion/pull/18009) (AdamGS)
+- chore(deps): bump actions/setup-node from 5.0.0 to 6.0.0 [#18049](https://github.com/apache/datafusion/pull/18049) (dependabot[bot])
+- chore(deps): bump sysinfo from 0.37.1 to 0.37.2 [#18035](https://github.com/apache/datafusion/pull/18035) (dependabot[bot])
+- FileScanConfig: Preserve schema metadata across ser/de boundary [#17966](https://github.com/apache/datafusion/pull/17966) (mach-kernel)
+- physical-plan: push filters down to UnionExec children [#18054](https://github.com/apache/datafusion/pull/18054) (asubiotto)
+- Add `min_max_bytes` benchmark (Reproduce quadratic runtime in min_max_bytes) [#18041](https://github.com/apache/datafusion/pull/18041) (ctsk)
+- Adds summary output to CLI instrumented object stores [#18045](https://github.com/apache/datafusion/pull/18045) (BlakeOrth)
+- Impl spark bit not function [#18018](https://github.com/apache/datafusion/pull/18018) (kazantsev-maksim)
+- chore: revert tests [#18065](https://github.com/apache/datafusion/pull/18065) (comphead)
+- chore: Use an enum to express the different kinds of nullability in an array [#18048](https://github.com/apache/datafusion/pull/18048) (martin-g)
+- chore(deps): bump taiki-e/install-action from 2.62.28 to 2.62.29 [#18069](https://github.com/apache/datafusion/pull/18069) (dependabot[bot])
+- Split up monster test_window_partial_constant_and_set_monotonicity into smaller functions [#17952](https://github.com/apache/datafusion/pull/17952) (alamb)
+- Push Down Filter Subexpressions in Nested Loop Joins as Projections [#17906](https://github.com/apache/datafusion/pull/17906) (tobixdev)
+- ci: Use PR description for merge commit body in squash merges [#18027](https://github.com/apache/datafusion/pull/18027) (Weijun-H)
+- Fix extended tests on main to get CI green [#18096](https://github.com/apache/datafusion/pull/18096) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.29 to 2.62.31 [#18094](https://github.com/apache/datafusion/pull/18094) (dependabot[bot])
+- chore: run extended suite on PRs for critical areas [#18088](https://github.com/apache/datafusion/pull/18088) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.31 to 2.62.33 [#18113](https://github.com/apache/datafusion/pull/18113) (dependabot[bot])
+- chore: remove unnecessary `skip_failed_rules` config in slt [#18117](https://github.com/apache/datafusion/pull/18117) (Jefffrey)
+- Refactor repartition to use `insta` [#18106](https://github.com/apache/datafusion/pull/18106) (blaginin)
+- refactor: move ListingTable over to the catalog-listing-table crate [#18080](https://github.com/apache/datafusion/pull/18080) (timsaucer)
+- refactor: move arrow datasource to new `datafusion-datasource-arrow` crate [#18082](https://github.com/apache/datafusion/pull/18082) (timsaucer)
+- Adds instrumentation to LIST operations in CLI [#18103](https://github.com/apache/datafusion/pull/18103) (BlakeOrth)
+- Add extra case_when benchmarks [#18097](https://github.com/apache/datafusion/pull/18097) (pepijnve)
+- Adds instrumentation to delimited LIST operations in CLI [#18134](https://github.com/apache/datafusion/pull/18134) (BlakeOrth)
+- test: `to_timestamp(double)` for vectorized input [#18147](https://github.com/apache/datafusion/pull/18147) (dqkqd)
+- Fix `concat_elements_utf8view` capacity initialization. [#18003](https://github.com/apache/datafusion/pull/18003) (samueleresca)
+- Use < instead of = in case benchmark predicates, use Integers [#18144](https://github.com/apache/datafusion/pull/18144) (pepijnve)
+- Adds instrumentation to PUT ops in the CLI [#18139](https://github.com/apache/datafusion/pull/18139) (BlakeOrth)
+- [main] chore: Fix `no space left on device` (#18141) [#18151](https://github.com/apache/datafusion/pull/18151) (alamb)
+- Fix `DISTINCT ON` for tables with no columns (ReplaceDistinctWithAggregate: do not fail when on input without columns) [#18133](https://github.com/apache/datafusion/pull/18133) (Tpt)
+- Fix quadratic runtime in min_max_bytes [#18044](https://github.com/apache/datafusion/pull/18044) (ctsk)
+- chore(deps): bump getrandom from 0.3.3 to 0.3.4 [#18163](https://github.com/apache/datafusion/pull/18163) (dependabot[bot])
+- chore(deps): bump tokio from 1.47.1 to 1.48.0 [#18164](https://github.com/apache/datafusion/pull/18164) (dependabot[bot])
+- chore(deps): bump indexmap from 2.11.4 to 2.12.0 [#18162](https://github.com/apache/datafusion/pull/18162) (dependabot[bot])
+- chore(deps): bump bzip2 from 0.6.0 to 0.6.1 [#18165](https://github.com/apache/datafusion/pull/18165) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.33 to 2.62.34 [#18194](https://github.com/apache/datafusion/pull/18194) (dependabot[bot])
+- Fix COPY TO does not produce an output file for the empty set [#18074](https://github.com/apache/datafusion/pull/18074) (bert-beyondloops)
+- Add Projection struct w/ helper methods to manipulate projections [#18176](https://github.com/apache/datafusion/pull/18176) (adriangb)
+- Add TableSchema helper to encapsulate file schema + partition fields [#18178](https://github.com/apache/datafusion/pull/18178) (adriangb)
+- Add spilling to RepartitionExec [#18014](https://github.com/apache/datafusion/pull/18014) (adriangb)
+- Adds DELETE and HEAD instrumentation to CLI [#18206](https://github.com/apache/datafusion/pull/18206) (BlakeOrth)
+- [branch-50] Prepare 50.3.0 release version number and README (#18173) [#18182](https://github.com/apache/datafusion/pull/18182) (alamb)
+- Fix array_has simplification with null argument [#18186](https://github.com/apache/datafusion/pull/18186) (joroKr21)
+- chore(deps): bump taiki-e/install-action from 2.62.34 to 2.62.35 [#18215](https://github.com/apache/datafusion/pull/18215) (dependabot[bot])
+- bench: create benchmark for lookup table like `CASE WHEN` [#18203](https://github.com/apache/datafusion/pull/18203) (rluvaton)
+- Adds instrumentation to COPY operations in the CLI [#18227](https://github.com/apache/datafusion/pull/18227) (BlakeOrth)
+- Consolidate core_integration/datasource and rename parquet_source --> parquet_integration [#18226](https://github.com/apache/datafusion/pull/18226) (alamb)
+- CoalescePartitionsExec fetch is not consistent with one partition and more than one partition [#18245](https://github.com/apache/datafusion/pull/18245) (zhuqi-lucas)
+- Migrate core test to insta part 3 [#16978](https://github.com/apache/datafusion/pull/16978) (Chen-Yuan-Lai)
+- chore(deps): bump taiki-e/install-action from 2.62.35 to 2.62.36 [#18240](https://github.com/apache/datafusion/pull/18240) (dependabot[bot])
+- Fix: Do not normalize table names when deserializing from protobuf [#18187](https://github.com/apache/datafusion/pull/18187) (drin)
+- Revert "chore: revert tests (#18065)" [#18255](https://github.com/apache/datafusion/pull/18255) (dqkqd)
+- Refactor `nvl2` Function to Support Lazy Evaluation and Simplification via CASE Expression [#18191](https://github.com/apache/datafusion/pull/18191) (kosiew)
+- fix null count stats computation [#18276](https://github.com/apache/datafusion/pull/18276) (adriangb)
+- Improve docs and examples for `DataTypeExt` and `FieldExt` [#18271](https://github.com/apache/datafusion/pull/18271) (alamb)
+- Easier construction of ScalarAndMetadata [#18272](https://github.com/apache/datafusion/pull/18272) (alamb)
+- Add integration test for IO operations for listing tables queries [#18229](https://github.com/apache/datafusion/pull/18229) (alamb)
+- Fix: Error rather than silently ignore extra parameter passed to ceil/floor [#18265](https://github.com/apache/datafusion/pull/18265) (toxicteddy00077)
+- chore(deps): Update `half` to 2.7.1, ignore `RUSTSEC-2025-0111` [#18287](https://github.com/apache/datafusion/pull/18287) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.36 to 2.62.38 [#18293](https://github.com/apache/datafusion/pull/18293) (dependabot[bot])
+- chore(deps): bump regex from 1.11.3 to 1.12.2 [#18294](https://github.com/apache/datafusion/pull/18294) (dependabot[bot])
+- chore(deps): bump clap from 4.5.48 to 4.5.50 [#18292](https://github.com/apache/datafusion/pull/18292) (dependabot[bot])
+- chore(deps): bump syn from 2.0.106 to 2.0.108 [#18291](https://github.com/apache/datafusion/pull/18291) (dependabot[bot])
+- Enforce unique names for `is_set` on `first_value` and `last_value` [#18303](https://github.com/apache/datafusion/pull/18303) (marc-pydantic)
+- chore(deps): update testcontainers to `0.25.2` and drop ignore of `RUSTSEC-2025-0111` [#18305](https://github.com/apache/datafusion/pull/18305) (DDtKey)
+- Using `try_append_value` from arrow-rs 57.0.0 [#18313](https://github.com/apache/datafusion/pull/18313) (samueleresca)
+- minor: Add documentation to function `concat_elements_utf8view` [#18316](https://github.com/apache/datafusion/pull/18316) (2010YOUY01)
+- chore(deps): bump taiki-e/install-action from 2.62.38 to 2.62.40 [#18318](https://github.com/apache/datafusion/pull/18318) (dependabot[bot])
+- Fix: Add projection to generate_series [#18298](https://github.com/apache/datafusion/pull/18298) (mkleen)
+- Do not accept null is_set for first_value/last_value [#18301](https://github.com/apache/datafusion/pull/18301) (marc-pydantic)
+- Optimize merging of partial case expression results [#18152](https://github.com/apache/datafusion/pull/18152) (pepijnve)
+- chore: Format examples in doc strings - execution [#18339](https://github.com/apache/datafusion/pull/18339) (CuteChuanChuan)
+- chore: Format examples in doc strings - common [#18336](https://github.com/apache/datafusion/pull/18336) (CuteChuanChuan)
+- chore: Format examples in doc strings - crate datafusion [#18333](https://github.com/apache/datafusion/pull/18333) (CuteChuanChuan)
+- chore: Format examples in doc strings - expr [#18340](https://github.com/apache/datafusion/pull/18340) (CuteChuanChuan)
+- chore: Format examples in doc strings - datasource crates [#18338](https://github.com/apache/datafusion/pull/18338) (CuteChuanChuan)
+- Insta for enforce_distrubution (easy ones) [#18248](https://github.com/apache/datafusion/pull/18248) (blaginin)
+- chore: Format examples in doc strings - macros and optmizer [#18354](https://github.com/apache/datafusion/pull/18354) (CuteChuanChuan)
+- chore: Format examples in doc strings - proto, pruning, and session [#18358](https://github.com/apache/datafusion/pull/18358) (CuteChuanChuan)
+- chore: Format examples in doc strings - catalog listing [#18335](https://github.com/apache/datafusion/pull/18335) (CuteChuanChuan)
+- ci: fix temporary file creation in tests and tighten CI check [#18374](https://github.com/apache/datafusion/pull/18374) (2010YOUY01)
+- Run extended tests when there are changes to datafusion-testing pin [#18310](https://github.com/apache/datafusion/pull/18310) (alamb)
+- Add simple unit test for `merge` in case expression [#18369](https://github.com/apache/datafusion/pull/18369) (pepijnve)
+- chore(deps): bump taiki-e/install-action from 2.62.40 to 2.62.41 [#18377](https://github.com/apache/datafusion/pull/18377) (dependabot[bot])
+- Refactor `range`/`gen_series` signature away from user defined [#18317](https://github.com/apache/datafusion/pull/18317) (Jefffrey)
+- Adds Partitioned CSV test to object store access tests [#18370](https://github.com/apache/datafusion/pull/18370) (BlakeOrth)
+- Add reproducer for consecutive RepartitionExec [#18343](https://github.com/apache/datafusion/pull/18343) (NGA-TRAN)
+- chore: bump substrait version to `0.60.0` to use substrait spec v0.75.0 [#17866](https://github.com/apache/datafusion/pull/17866) (benbellick)
+- Use the upstream arrow-rs coalesce kernel [#17193](https://github.com/apache/datafusion/pull/17193) (zhuqi-lucas)
+- Extract out super slow planning benchmark to it's own benchmark [#18388](https://github.com/apache/datafusion/pull/18388) (Omega359)
+- minor: Fix parquet pruning metrics display order [#18379](https://github.com/apache/datafusion/pull/18379) (2010YOUY01)
+- chore: use enum as `date_trunc` granularity [#18390](https://github.com/apache/datafusion/pull/18390) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.41 to 2.62.43 [#18398](https://github.com/apache/datafusion/pull/18398) (dependabot[bot])
+- Project record batches to avoid filtering unused columns in `CASE` evaluation [#18329](https://github.com/apache/datafusion/pull/18329) (pepijnve)
+- catch errors when simplifying cast(lit(...), ...) and bubble those up [#18332](https://github.com/apache/datafusion/pull/18332) (adriangb)
+- Align `NowFunc::new()` with canonical `ConfigOptions` timezone and enhance documentation [#18347](https://github.com/apache/datafusion/pull/18347) (kosiew)
+- chore: Format examples in doc strings - physical expr, optimizer, and plan [#18357](https://github.com/apache/datafusion/pull/18357) (CuteChuanChuan)
+- Fix: spark bit_count function [#18322](https://github.com/apache/datafusion/pull/18322) (kazantsev-maksim)
+- chore: bump workspace rust version to 1.91.0 [#18422](https://github.com/apache/datafusion/pull/18422) (randyli)
+- Minor: Remove unneccessary vec! in SortMergeJoinStream initialization [#18430](https://github.com/apache/datafusion/pull/18430) (mapleFU)
+- minor: refactor array reverse internals [#18445](https://github.com/apache/datafusion/pull/18445) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.62.43 to 2.62.45 [#18465](https://github.com/apache/datafusion/pull/18465) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.38.1 to 1.39.0 [#18464](https://github.com/apache/datafusion/pull/18464) (dependabot[bot])
+- chore(deps): bump rstest from 0.25.0 to 0.26.1 [#18463](https://github.com/apache/datafusion/pull/18463) (dependabot[bot])
+- chore(deps): bump wasm-bindgen-test from 0.3.54 to 0.3.55 [#18462](https://github.com/apache/datafusion/pull/18462) (dependabot[bot])
+- chore(deps): bump postgres-types from 0.2.10 to 0.2.11 [#18461](https://github.com/apache/datafusion/pull/18461) (dependabot[bot])
+- chore(deps): bump ctor from 0.4.3 to 0.6.1 [#18460](https://github.com/apache/datafusion/pull/18460) (dependabot[bot])
+- chore(deps): bump libc from 0.2.176 to 0.2.177 [#18459](https://github.com/apache/datafusion/pull/18459) (dependabot[bot])
+- chore: Format examples in doc strings - functions [#18353](https://github.com/apache/datafusion/pull/18353) (CuteChuanChuan)
+- Feat: Support array flatten() on `List(LargeList(_))` types [#18363](https://github.com/apache/datafusion/pull/18363) (sdf-jkl)
+- Reproducer tests for #18380 (resorting sorted inputs) [#18352](https://github.com/apache/datafusion/pull/18352) (rgehan)
+- Update criterion to 0.7.\* [#18472](https://github.com/apache/datafusion/pull/18472) (Omega359)
+- chore(deps): bump taiki-e/install-action from 2.62.45 to 2.62.46 [#18484](https://github.com/apache/datafusion/pull/18484) (dependabot[bot])
+- Consolidate flight examples (#18142) [#18442](https://github.com/apache/datafusion/pull/18442) (cj-zhukov)
+- Support reverse for ListView [#18424](https://github.com/apache/datafusion/pull/18424) (vegarsti)
+- Complete migrating `enforce_distrubution` tests to insta [#18185](https://github.com/apache/datafusion/pull/18185) (blaginin)
+- Add benchmark for array_reverse [#18425](https://github.com/apache/datafusion/pull/18425) (vegarsti)
+- chore: simplify map const [#18440](https://github.com/apache/datafusion/pull/18440) (chenkovsky)
+- Fix an out of date comment for `snapshot_physical_expr` [#18498](https://github.com/apache/datafusion/pull/18498) (AdamGS)
+- Disable `parquet_encryption` by default in datafusion-sqllogictests [#18492](https://github.com/apache/datafusion/pull/18492) (zhuqi-lucas)
+- Make extended test to use optional parquet_encryption feature [#18507](https://github.com/apache/datafusion/pull/18507) (zhuqi-lucas)
+- Consolidate udf examples (#18142) [#18493](https://github.com/apache/datafusion/pull/18493) (cj-zhukov)
+- test: add prepare alias slt test [#18522](https://github.com/apache/datafusion/pull/18522) (dqkqd)
+- CI: add `clippy::needless_pass_by_value` rule [#18468](https://github.com/apache/datafusion/pull/18468) (2010YOUY01)
+- Refactor create_hashes to accept array references [#18448](https://github.com/apache/datafusion/pull/18448) (adriangb)
+- chore: Format examples in doc strings - spark, sql, sqllogictest, sibstrait [#18443](https://github.com/apache/datafusion/pull/18443) (CuteChuanChuan)
+- refactor: simplify `calculate_binary_math` in datafusion-functions [#18525](https://github.com/apache/datafusion/pull/18525) (Jefffrey)
+- ci: enforce needless_pass_by_value for datafusion-optimzer [#18533](https://github.com/apache/datafusion/pull/18533) (jizezhang)
+- Add comments to Cargo.toml about workspace overrides [#18526](https://github.com/apache/datafusion/pull/18526) (alamb)
+- minor: Remove inconsistent comment [#18539](https://github.com/apache/datafusion/pull/18539) (2010YOUY01)
+- Refactor `log()` signature to use coercion API + fixes [#18519](https://github.com/apache/datafusion/pull/18519) (Jefffrey)
+- [branch-51] Update Changelog [#18592](https://github.com/apache/datafusion/pull/18592) (alamb)
+- [branch-51] bugfix: correct regression on TableType in into_view in DF51 [#18618](https://github.com/apache/datafusion/pull/18618) (timsaucer)
+- [branch-51]: Add timezone to date_trunc fast path (#18596) [#18629](https://github.com/apache/datafusion/pull/18629) (hareshkh)
+- [branch-51] bugfix: select_columns should validate column names [#18624](https://github.com/apache/datafusion/pull/18624) (timsaucer)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    88	dependabot[bot]
+    49	Jeffrey Vo
+    35	Andrew Lamb
+    20	Yongting You
+    19	Adrian Garcia Badaracco
+    14	Blake Orth
+    12	Pepijn Van Eeckhoudt
+    12	Piotr Findeisen
+    11	Chen Chongchen
+    11	Dmitrii Blaginin
+    11	Yu-Chuan Hung
+     9	Jonathan Chen
+     9	Khanh Duong
+     9	Oleks V
+     9	Peter Nguyen
+     8	Alex Huang
+     8	Qi Zhu
+     8	Raz Luvaton
+     7	Adam Gutglick
+     7	Rohan Krishnaswamy
+     7	Tim Saucer
+     7	kosiew
+     6	xudong.w
+     5	Nuno Faria
+     4	Dhanush
+     4	Samuele Resca
+     4	Simon Vandel Sillesen
+     4	Sriram Sundar
+     4	Vegard Stikbakke
+     3	Bruce Ritchie
+     3	David López
+     3	EeshanBembi
+     3	Jack Kleeman
+     3	Kazantsev Maksim
+     3	Marko Milenković
+     3	Thomas Tanon
+     2	Andy Grove
+     2	Bruno Volpato
+     2	Christian
+     2	Colin Marc
+     2	Cora Sutton
+     2	David Stancu
+     2	Devam Patel
+     2	Eugene Tolbakov
+     2	Evgenii Glotov
+     2	Kristin Cowalcijk
+     2	Liam Bao
+     2	Marc Brinkmann
+     2	Michael Kleen
+     2	Namgung Chan
+     2	Ning Sun
+     2	Randy
+     2	Sergey Zhukov
+     2	Viktor Yershov
+     2	bubulalabu
+     2	dennis zhuang
+     2	jizezhang
+     2	wiedld
+     1	Ahmed Mezghani
+     1	Aldrin M
+     1	Alfonso Subiotto Marqués
+     1	Anders
+     1	Artem Medvedev
+     1	Aryamaan Singh
+     1	Ben Bellick
+     1	Berkay Şahin
+     1	Bert Vermeiren
+     1	Brent Gardner
+     1	Christopher Watford
+     1	Dan Lovell
+     1	Daniël Heres
+     1	Dewey Dunnington
+     1	Douglas Anderson
+     1	Duong Cong Toai
+     1	Emil Ernerfeldt
+     1	Emily Matheys
+     1	Enrico La Sala
+     1	Eshed Schacham
+     1	Filippo Rossi
+     1	Gabriel
+     1	Gene Bordegaray
+     1	Georgi Krastev
+     1	Haresh Khanna
+     1	Heran Lin
+     1	Hiroaki Yutani
+     1	Ian Lai
+     1	Ilya Ostanevich
+     1	JanKaul
+     1	Kosta Tarasov
+     1	LFC
+     1	Leonardo Yvens
+     1	Lía Adriana
+     1	Manasa Manoj
+     1	Martin
+     1	Martin Grigorov
+     1	Martin Hilton
+     1	Mason
+     1	Matt Butrovich
+     1	Matthew Kim
+     1	Matthijs Brobbel
+     1	Nga Tran
+     1	Nihal Rajak
+     1	Rafael Fernández
+     1	Renan GEHAN
+     1	Renato Marroquin
+     1	Rok Mihevc
+     1	Ruilei Ma
+     1	Sai Mahendra
+     1	Sergei Grebnov
+     1	Shiv Bhatia
+     1	Tobias Schwarzinger
+     1	UBarney
+     1	Victor Barua
+     1	Victorien
+     1	Vyquos
+     1	Weston Pace
+     1	XL Liang
+     1	Xander
+     1	Zhen Wang
+     1	aditya singh rathore
+     1	dario curreri
+     1	ding-young
+     1	feniljain
+     1	gene-bordegaray
+     1	harshasiddartha
+     1	mwish
+     1	peasee
+     1	r1b
+     1	theirix
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/dev/depcheck/Cargo.toml b/dev/depcheck/Cargo.toml
index 23cefaec43be4..3e4bf39cced42 100644
--- a/dev/depcheck/Cargo.toml
+++ b/dev/depcheck/Cargo.toml
@@ -18,8 +18,9 @@
 # Circular dependency checker for DataFusion
 [package]
 name = "depcheck"
+edition = "2024"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-cargo = "0.81.0"
+cargo = "0.92.0"
diff --git a/dev/depcheck/rust-toolchain.toml b/dev/depcheck/rust-toolchain.toml
new file mode 100644
index 0000000000000..55d572362d142
--- /dev/null
+++ b/dev/depcheck/rust-toolchain.toml
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file specifies the default version of Rust used
+# to compile this workspace and run CI jobs.
+
+[toolchain]
+channel = "1.89.0"
+components = ["rustfmt", "clippy"]
diff --git a/dev/depcheck/src/main.rs b/dev/depcheck/src/main.rs
index 80feefcd1b1c5..ebd79faa6f465 100644
--- a/dev/depcheck/src/main.rs
+++ b/dev/depcheck/src/main.rs
@@ -48,7 +48,7 @@ fn main() -> CargoResult<()> {
         root_cargo_toml.display()
     );
     let workspace = cargo::core::Workspace::new(&root_cargo_toml, &gctx)?;
-    let (_, resolve) = cargo::ops::resolve_ws(&workspace)?;
+    let (_, resolve) = cargo::ops::resolve_ws(&workspace, false)?;
 
     let mut package_deps = HashMap::new();
     for package_id in resolve
diff --git a/dev/release/README.md b/dev/release/README.md
index 1b78f8d13be98..898bceb6f7f41 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -23,55 +23,91 @@ DataFusion typically has major releases around once per month, including breakin
 
 Patch releases are made on an adhoc basis, but we try and avoid them given the frequent major releases.
 
-## Branching Policy
+## Release Process Overview
 
-- When we prepare a new release, we create a release branch, such as `branch-37` in the Apache repository (not in a fork)
-- We update the crate version and generate the changelog in this branch and create a PR against the main branch
-- Once the PR is approved and merged, we tag the rc in the release branch, and release from the release branch
-- Bug fixes can be merged to the release branch and patch releases can be created from the release branch
+New development happens on the `main` branch.
+Releases are made from branches, e.g. `branch-50` for the `50.x.y` release series.
 
-#### How to backport (add changes) to `branch-*` branch
+To prepare for a new release series, we:
 
-If you would like to propose your change for inclusion in a release branch for a
-patch release:
+- Create a new branch from `main`, such as `branch-50` in the Apache repository (not in a fork)
+- Continue merging new features changes to `main` branch
+- Prepare the release branch for release:
+  - Update version numbers in `Cargo.toml` files and create `CHANGELOG.md`
+  - Add additional changes to the release branch as needed
+- When the code is ready, create GitHub tags release candidate (rc) artifacts from the release branch.
+- After the release is approved, publish to [crates.io], the ASF distribution servers, and GitHub tags.
+
+To add changes to the release branch, depending on the change we either:
+
+- Fix the issue on `main` and then backport the change to the release branch (e.g. [#18129])
+- Fix the issue on the release branch and then forward-port the change back to `main` (e.g.[#18057])
+
+[crates.io]: https://crates.io/crates/datafusion
+[#18129]: https://github.com/apache/datafusion/pull/18129
+[#18057]: https://github.com/apache/datafusion/pull/18057
+
+## Backporting (add changes) to `branch-*` branch
+
+If you would like to propose your change for inclusion in a patch release, the
+change must be applied to the relevant release branch. To do so please follow
+these steps:
 
 1. Find (or create) the issue for the incremental release ([example release issue]) and discuss the proposed change there with the maintainers.
 2. Follow normal workflow to create PR to `main` branch and wait for its approval and merge.
-3. After PR is squash merged to `main`, branch from most recent release branch (e.g. `branch-37`), cherry-pick the commit and create a PR targeting the release branch [example backport PR].
+3. After PR is squash merged to `main`, branch from most recent release branch (e.g. `branch-50`), cherry-pick the commit and create a PR targeting the release branch [example backport PR].
 
-For example, to backport commit `12345` from `main` to `branch-43`:
+For example, to backport commit `12345` from `main` to `branch-50`:
 
 ```shell
-git checkout branch-43
-git checkout -b backport_to_43
-git cherry-pick 12345
+git checkout branch-50
+git checkout -b backport_to_50
+git cherry-pick 12345 # your git commit hash
 git push -u <your fork>
-# make a PR as normal
+# make a PR as normal targeting branch-50, prefixed with [branch-50]
 ```
 
-[example release issue]: https://github.com/apache/datafusion/issues/9904
-[example backport pr]: https://github.com/apache/datafusion/pull/10123
+It is also acceptable to fix the issue directly on the release branch first
+and then cherry-pick the change back to `main` branch in a new PR.
+
+[example release issue]: https://github.com/apache/datafusion/issues/18072
+[example backport pr]: https://github.com/apache/datafusion/pull/18131
+
+## Release Prerequisites
+
+### Add git remote for `apache` repo
+
+The instructions below assume the upstream git repo `git@github.com:apache/datafusion.git` in remote `apache`.
+
+```shell
+git remote add apache git@github.com:apache/datafusion.git
+```
 
-## Release Prerequisite
+### Create GitHub Personal Access Token (PAT)
 
-- Have upstream git repo `git@github.com:apache/datafusion.git` add as git remote `apache`.
-- Created a personal access token in GitHub for changelog automation script.
-  - Github PAT should be created with `repo` access
-- Make sure your signing key is added to the following files in SVN:
-  - https://dist.apache.org/repos/dist/dev/datafusion/KEYS
-  - https://dist.apache.org/repos/dist/release/datafusion/KEYS
+A personal access token (PAT) is needed for changelog automation script. If you
+do not already have one, create a token with the `repo` access by navigating to
+[GitHub Developer Settings] page, and [follow these steps].
 
-### How to add signing key
+[github developer settings]: https://github.com/settings/developers
+[follow these steps]: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
+
+### Add GPG Public Key to SVN `KEYS` file
+
+If you will be releasing the final tarball, your GPG public key must be present in the following SVN files:
+
+- https://dist.apache.org/repos/dist/dev/datafusion/KEYS
+- https://dist.apache.org/repos/dist/release/datafusion/KEYS
 
 See instructions at https://infra.apache.org/release-signing.html#generate for generating keys.
 
-Committers can add signing keys in Subversion client with their ASF account. e.g.:
+Committers can add signing keys using the Subversion client and their ASF account:
 
 ```shell
 $ svn co https://dist.apache.org/repos/dist/dev/datafusion
 $ cd datafusion
-$ editor KEYS
-$ svn ci KEYS
+$ editor KEYS # add your key here
+$ svn ci KEYS # commit changes
 ```
 
 Follow the instructions in the header of the KEYS file to append your key. Here is an example:
@@ -81,175 +117,228 @@ Follow the instructions in the header of the KEYS file to append your key. Here
 svn commit KEYS -m "Add key for John Doe"
 ```
 
-## Process Overview
+## Release Process: Step by Step
 
 As part of the Apache governance model, official releases consist of signed
 source tarballs approved by the PMC.
+We then publish the code in the approved artifacts to crates.io.
 
-We then use the code in the approved artifacts to release to crates.io and
-PyPI.
+### 1. Create Release Branch
 
-### Change Log
+First create a new release branch from `main` in the apache repository.
 
-We maintain a `CHANGELOG.md` so our users know what has been changed between releases.
+For example, to create the `branch-50` branch for the `50.x.y` release series:
+
+```shell
+git fetch apache             # make sure we are up to date
+git checkout apache/main     # checkout current latest development branch
+git checkout -b branch-50    # create local branch
+git push -u apache branch-50 # push branch to apache remote
+```
 
-You will need a GitHub Personal Access Token for the following steps. Follow
-[these instructions](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token)
-to generate one if you do not already have one.
+### 2. Add a protection to release candidate branch
 
-The changelog is generated using a Python script. There is a dependency on `PyGitHub`, which can be installed using pip:
+To protect a release candidate branch from accidental merges, run:
 
 ```shell
-pip3 install PyGitHub
+./dev/release/add-branch-protection.sh 50
 ```
 
-To generate the changelog, set the `GITHUB_TOKEN` environment variable to a valid token and then run the script
-providing two commit ids or tags followed by the version number of the release being created. The following
-example generates a change log of all changes between the first commit and the current HEAD revision.
+The script will modify `.asf.yaml` and add following block:
 
-```shell
-export GITHUB_TOKEN=<your-token-here>
-./dev/release/generate-changelog.py 24.0.0 HEAD 25.0.0 > dev/changelog/25.0.0.md
+```yaml
+branch-50:
+  required_pull_request_reviews:
+    required_approving_review_count: 1
 ```
 
-This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for
-titles starting with `feat:`, `fix:`, or `docs:`.
+- Create a PR.
+- Merge to `main`.
 
-Once the change log is generated, run `prettier` to format the document:
+### 3. Prepare PR to Update Changelog and the Release Version
+
+First, prepare a PR to update the changelog and versions to reflect the planned
+release. See [#18173](https://github.com/apache/datafusion/pull/18173) for an example.
+
+#### Update Version Numbers
+
+Manually update the DataFusion version in the root `Cargo.toml` to reflect the new release version.
+
+Ensure Cargo.lock is updated accordingly by running:
 
 ```shell
-prettier -w dev/changelog/25.0.0md
+cargo check -p datafusion
 ```
 
-## Prepare release commits and PR
+#### Changelog Generation
 
-Prepare a PR to update `CHANGELOG.md` and versions to reflect the planned
-release.
+We maintain a [changelog] so our users know what has been changed between releases.
 
-See [#9697](https://github.com/apache/datafusion/pull/9697) for an example.
+[changelog]: ../changelog
 
-Modify `asf.yaml` to protect future release candidate branch to prevent accidental merges:
+The changelog is generated using a Python script.
 
-```yaml
-# needs to be updated as part of the release process
-branch-50:
-  required_pull_request_reviews:
-    required_approving_review_count: 1
-```
+To run the script, you will need a GitHub Personal Access Token (described in the prerequisites section) and the `PyGitHub` library. First install the `PyGitHub` dependency via `pip`:
 
-Here are the commands that could be used to prepare the `38.0.0` release:
+```shell
+pip3 install PyGitHub
+```
 
-### Update Version
+To generate the changelog, set the `GITHUB_TOKEN` environment variable and then run `./dev/release/generate-changelog.py`
+providing two commit ids or tags followed by the version number of the release being created. For example,
+to generate a change log of all changes between the `50.3.0` tag and `branch-51`, in preparation for release `51.0.0`:
 
-Checkout the main commit to be released
+> [!NOTE]
+>
+> If you see errors such as the following, it is likely due to not setting
+> the `GITHUB_TOKEN` environment variable.
+>
+> ```
+> Request GET ... failed with 403: rate limit exceeded
+> ```
 
 ```shell
-git fetch apache
-git checkout apache/main
+export GITHUB_TOKEN=<your-token-here>
+./dev/release/generate-changelog.py 50.3.0 branch-51 51.0.0 > dev/changelog/51.0.0.md
 ```
 
-Manually update the datafusion version in the root `Cargo.toml` to `38.0.0`.
+This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for
+titles starting with `feat:`, `fix:`, or `docs:`.
 
-Run `cargo test` to re-generate some example files:
+Once the change log is generated, run `prettier` to format the document:
 
 ```shell
-cargo test
+prettier -w dev/changelog/51.0.0.md
 ```
 
-Lastly commit the version change:
+#### Commit and PR
+
+Then commit the changes and create a PR targeting the release branch.
 
 ```shell
 git commit -a -m 'Update version'
 ```
 
-## Prepare release candidate artifacts
+Remember to merge any fixes back to `main` branch as well.
+
+### 4. Prepare Release Candidate Artifacts
 
 After the PR gets merged, you are ready to create release artifacts based off the
 merged commit.
 
 (Note you need to be a committer to run these scripts as they upload to the apache svn distribution servers)
 
-### Pick a Release Candidate (RC) number
+#### Pick a Release Candidate (RC) number
 
-Pick numbers in sequential order, with `0` for `rc0`, `1` for `rc1`, etc.
+Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc.
 
-### Create git tag for the release:
+#### Create git Tag for the Release:
 
 While the official release artifacts are signed tarballs and zip files, we also
-tag the commit it was created for convenience and code archaeology.
+tag the commit it was created for convenience and code archaeology. Release tags
+have the format `<version>` (e.g. `38.0.0`), and release candidates have the
+format `<version>-rc<rc>` (e.g. `38.0.0-rc0`). See [the list of existing
+tags].
+
+[the list of existing tags]: https://github.com/apache/datafusion/tags
+
+Using a string such as `38.0.0` as the `<version>`, create and push the rc tag by running these commands:
+
+```shell
+git fetch apache
+git tag <version>-<rc> apache/branch-X # create tag from the release branch
+git push apache <version>-<rc>         # push tag to Github remote
+```
 
-Using a string such as `38.0.0` as the `<version>`, create and push the tag by running these commands:
+For example, to create the `50.3.0-rc1 tag from `branch-50`:
 
 ```shell
 git fetch apache
-git tag <version>-<rc> apache/main
-# push tag to Github remote
-git push apache <version>
+git tag 50.3.0-rc1 apache/branch-50
+git push apache 50.3.0-rc1
 ```
 
-### Create, sign, and upload artifacts
+#### Create, Sign, and Upload Artifacts
+
+Run the `create-tarball.sh` script with the `<version>` tag and `<rc>` and you determined in previous steps:
 
-Run `create-tarball.sh` with the `<version>` tag and `<rc>` and you found in previous steps:
+For example, to create the `50.3.0-rc1` artifacts:
 
 ```shell
-GH_TOKEN=<TOKEN> ./dev/release/create-tarball.sh 38.0.0 0
+GH_TOKEN=<TOKEN> ./dev/release/create-tarball.sh 50.3.0 1
 ```
 
 The `create-tarball.sh` script
 
-1. creates and uploads all release candidate artifacts to the [datafusion
+1. Creates and uploads all release candidate artifacts to the [datafusion
    dev](https://dist.apache.org/repos/dist/dev/datafusion) location on the
-   apache distribution svn server
+   apache distribution SVN server
 
-2. provide you an email template to
+2. Provides you an email template to
    send to dev@datafusion.apache.org for release voting.
 
-### Vote on Release Candidate artifacts
+### 5. Vote on Release Candidate Artifacts
 
 Send the email output from the script to dev@datafusion.apache.org.
 
-For the release to become "official" it needs at least three PMC members to vote +1 on it.
+In order to publish the release on crates.io, it must be "official". To become
+official it needs at least three PMC members to vote +1 on it.
 
-### Verifying Release Candidates
+#### Verifying Release Candidates
 
 The `dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like:
 
 ```shell
-./dev/release/verify-release-candidate.sh 38.0.0 0
+./dev/release/verify-release-candidate.sh 50.3.0 1
 ```
 
-#### If the release is not approved
+#### If the Release is not Approved
 
 If the release is not approved, fix whatever the problem is, merge changelog
-changes into main if there is any and try again with the next RC number.
+changes into the release branch and try again with the next RC number.
+
+Remember to merge any fixes back to `main` branch as well.
 
-## Finalize the release
+#### If the Release is Approved: Call the Vote
+
+Call the vote on the Arrow dev list by replying to the RC voting thread. The
+reply should have a new subject constructed by adding `[RESULT]` prefix to the
+old subject line.
+
+Sample announcement template:
+
+```
+The vote has passed with <NUMBER> +1 votes. Thank you to all who helped
+with the release verification.
+```
+
+### 6. Finalize the Release
 
 NOTE: steps in this section can only be done by PMC members.
 
-### After the release is approved
+#### After the release is approved
 
 Move artifacts to the release location in SVN, e.g.
-https://dist.apache.org/repos/dist/release/datafusion/datafusion-38.0.0/, using
+https://dist.apache.org/repos/dist/release/datafusion/datafusion-50.3.0/, using
 the `release-tarball.sh` script:
 
 ```shell
-./dev/release/release-tarball.sh 38.0.0 0
+./dev/release/release-tarball.sh 50.3.0 1
 ```
 
 Congratulations! The release is now official!
 
-### Create release git tags
+### 7. Create Release git tags
 
 Tag the same release candidate commit with the final release tag
 
 ```shell
-git co apache/38.0.0-rc0
-git tag 38.0.0
-git push apache 38.0.0
+git co apache/50.3.0-rc1
+git tag 50.3.0
+git push apache 50.3.0
 ```
 
-### Publish on Crates.io
+### 8. Publish on Crates.io
 
 Only approved releases of the tarball should be published to
 crates.io, in order to conform to Apache Software Foundation
@@ -261,7 +350,7 @@ been made to crates.io using the following instructions.
 Follow [these
 instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to
 create an account and login to crates.io before asking to be added as an owner
-to all of the DataFusion crates.
+to all DataFusion crates.
 
 Download and unpack the official release tarball
 
@@ -312,30 +401,21 @@ Verify that the Cargo.toml in the tarball contains the correct version
 
 ### Publish datafusion-cli on Homebrew
 
-[`datafusion` formula](https://formulae.brew.sh/formula/datafusion) is [updated automatically](https://github.com/Homebrew/homebrew-core/pulls?q=is%3Apr+datafusion+is%3Aclosed),
+Note: [`datafusion` formula](https://formulae.brew.sh/formula/datafusion) is [updated automatically](https://github.com/Homebrew/homebrew-core/pulls?q=is%3Apr+datafusion+is%3Aclosed),
 so no action is needed.
 
-### Call the vote
+### 9: Add the release to Apache Reporter
 
-Call the vote on the Arrow dev list by replying to the RC voting thread. The
-reply should have a new subject constructed by adding `[RESULT]` prefix to the
-old subject line.
-
-Sample announcement template:
-
-```
-The vote has passed with <NUMBER> +1 votes. Thank you to all who helped
-with the release verification.
-```
-
-### Add the release to Apache Reporter
-
-Add the release to https://reporter.apache.org/addrelease.html?datafusion using the version number e.g. 38.0.0.
+When you have published the release, please help the project by adding the release to
+[Apache Reporter](https://reporter.apache.org/). The reporter system should
+send you a reminder email, but in case you miss it, you can add
+the release to https://reporter.apache.org/addrelease.html?datafusion following
+the examples from previous releases.
 
 The release information is used to generate a template for a board report (see example from Apache Arrow project
 [here](https://github.com/apache/arrow/pull/14357)).
 
-### Delete old RCs and Releases
+### 10: Delete old RCs and Releases
 
 See the ASF documentation on [when to archive](https://www.apache.org/legal/release-policy.html#when-to-archive)
 for more information.
@@ -353,7 +433,7 @@ svn ls https://dist.apache.org/repos/dist/dev/datafusion
 Delete a release candidate:
 
 ```shell
-svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-38.0.0-rc1/
+svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-50.0.0-rc1/
 ```
 
 #### Deleting old releases from `release` svn
@@ -369,31 +449,5 @@ svn ls https://dist.apache.org/repos/dist/release/datafusion
 Delete a release:
 
 ```shell
-svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-37.0.0
-```
-
-### Optional: Write a blog post announcing the release
-
-We typically crowd source release announcements by collaborating on a Google document, usually starting
-with a copy of the previous release announcement.
-
-Run the following commands to get the number of commits and number of unique contributors for inclusion in the blog post.
-
-```shell
-git log --pretty=oneline 37.0.0..38.0.0 datafusion datafusion-cli datafusion-examples | wc -l
-git shortlog -sn 37.0.0..38.0.0 datafusion datafusion-cli datafusion-examples | wc -l
+svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-50.0.0
 ```
-
-Once there is consensus on the contents of the post, create a PR to add a blog post to the
-[arrow-site](https://github.com/apache/arrow-site) repository. Note that there is no need for a formal
-PMC vote on the blog post contents since this isn't considered to be a "release".
-
-Here is an example blog post PR:
-
-- https://github.com/apache/arrow-site/pull/217
-
-Once the PR is merged, a GitHub action will publish the new blog post to https://arrow.apache.org/blog/.
-
-### Update the version on the download page
-
-Update the version on the [download page](https://datafusion.apache.org/download) to point to the latest release [here](../../docs/source/download.md).
diff --git a/dev/release/add-branch-protection.sh b/dev/release/add-branch-protection.sh
new file mode 100755
index 0000000000000..735bae7f90fd9
--- /dev/null
+++ b/dev/release/add-branch-protection.sh
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+# Script to add branch protection for a new release branch in .asf.yaml
+#
+# This script automates the process of adding branch protection rules to .asf.yaml
+# for new release branches. It ensures the branch protection block doesn't already
+# exist before adding it.
+#
+# Usage:
+#   ./dev/release/add-branch-protection.sh <release_number>
+#
+# Examples:
+#   ./dev/release/add-branch-protection.sh 52
+#   ./dev/release/add-branch-protection.sh 53
+#
+# The script will:
+#   1. Validate the release number is a positive integer
+#   2. Check if branch protection already exists for branch-<release_number>
+#   3. Add the branch protection block to .asf.yaml if it doesn't exist
+#   4. Error out if the block already exists
+
+# Check if release number is provided
+if [ $# -eq 0 ]; then
+    echo "Error: Release number is required"
+    echo "Usage: $0 <release_number>"
+    echo "Example: $0 52"
+    exit 1
+fi
+
+RELEASE_NUM=$1
+BRANCH_NAME="branch-${RELEASE_NUM}"
+ASF_YAML_FILE=".asf.yaml"
+
+# Validate release number is a positive integer
+if ! [[ "$RELEASE_NUM" =~ ^[0-9]+$ ]]; then
+    echo "Error: Release number must be a positive integer"
+    echo "Provided: $RELEASE_NUM"
+    echo "Example: ./dev/release/add-branch-protection.sh 52"
+    exit 1
+fi
+
+# Check if .asf.yaml exists
+if [ ! -f "$ASF_YAML_FILE" ]; then
+    echo "Error: $ASF_YAML_FILE not found in current directory"
+    echo "Please run this script from the repository root"
+    exit 1
+fi
+
+# Check if the branch exists in the official Apache DataFusion repository
+GITHUB_API_URL="https://api.github.com/repos/apache/datafusion/branches/${BRANCH_NAME}"
+HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$GITHUB_API_URL")
+
+if [ "$HTTP_STATUS" != "200" ]; then
+    echo "Error: Branch ${BRANCH_NAME} does not exist in the official Apache DataFusion repository"
+    echo "Please create the branch '${BRANCH_NAME}' first before adding branch protection"
+    echo ""
+    echo "To check existing branches, visit:"
+    echo "  https://github.com/apache/datafusion/branches"
+    exit 1
+fi
+
+# Check if branch protection already exists for this release
+if grep -q "^[[:space:]]*${BRANCH_NAME}:" "$ASF_YAML_FILE"; then
+    echo "Error: Branch protection for ${BRANCH_NAME} already exists in $ASF_YAML_FILE"
+    exit 1
+fi
+
+# Create a temporary file
+TEMP_FILE=$(mktemp)
+
+# Read the file and insert the new branch protection block
+# We'll insert it after the last branch-XX block
+awk -v branch="$BRANCH_NAME" '
+/^[[:space:]]*branch-[0-9]+:/ {
+    last_branch_line = NR
+    last_branch_content = $0
+}
+{
+    lines[NR] = $0
+}
+END {
+    if (last_branch_line == 0) {
+        print "Error: No existing branch protection blocks found" > "/dev/stderr"
+        exit 1
+    }
+    
+    # Print all lines up to and including the last branch block
+    for (i = 1; i <= last_branch_line; i++) {
+        print lines[i]
+    }
+    
+    # Print the required_pull_request_reviews lines after the last branch
+    for (i = last_branch_line + 1; i <= NR; i++) {
+        print lines[i]
+        # After printing the required_approving_review_count line, insert new branch
+        if (lines[i] ~ /required_approving_review_count:/) {
+            # Check if this belongs to the last branch block by looking ahead
+            next_non_empty = i + 1
+            while (next_non_empty <= NR && lines[next_non_empty] ~ /^[[:space:]]*$/) {
+                next_non_empty++
+            }
+            # If next non-empty line is not indented more than branch level, we found the end
+            if (next_non_empty > NR || lines[next_non_empty] !~ /^[[:space:]]{6,}/) {
+                print "    " branch ":"
+                print "      required_pull_request_reviews:"
+                print "        required_approving_review_count: 1"
+                # Skip to next iteration to avoid double printing
+                for (j = i + 1; j <= NR; j++) {
+                    i = j
+                    if (j <= NR) print lines[j]
+                }
+                break
+            }
+        }
+    }
+}
+' "$ASF_YAML_FILE" > "$TEMP_FILE"
+
+# Check if awk succeeded
+if [ $? -ne 0 ]; then
+    rm -f "$TEMP_FILE"
+    exit 1
+fi
+
+# Verify the new content was added
+if ! grep -q "^[[:space:]]*${BRANCH_NAME}:" "$TEMP_FILE"; then
+    echo "Error: Failed to add branch protection block"
+    rm -f "$TEMP_FILE"
+    exit 1
+fi
+
+# Replace the original file with the modified version
+mv "$TEMP_FILE" "$ASF_YAML_FILE"
+
+echo "Successfully added branch protection for ${BRANCH_NAME} to $ASF_YAML_FILE"
+echo ""
+echo "Added block:"
+echo "    ${BRANCH_NAME}:"
+echo "      required_pull_request_reviews:"
+echo "        required_approving_review_count: 1"
+echo ""
+echo "Please review the changes and commit them."
\ No newline at end of file
diff --git a/dev/rust_lint.sh b/dev/rust_lint.sh
index 8fe7220085c93..21d4611846413 100755
--- a/dev/rust_lint.sh
+++ b/dev/rust_lint.sh
@@ -19,6 +19,10 @@
 
 # This script runs all the Rust lints locally the same way the
 # DataFusion CI does
+#
+# Note: The installed checking tools (e.g., taplo) are not guaranteed to match
+# the CI versions for simplicity, there might be some minor differences. Check
+# `.github/workflows` for the CI versions.
 
 # For `.toml` format checking
 set -e
@@ -33,8 +37,16 @@ if ! command -v hawkeye &> /dev/null; then
     cargo install hawkeye --locked
 fi
 
+# For spelling checks
+if ! command -v typos &> /dev/null; then
+    echo "Installing typos using cargo"
+    cargo install typos-cli --locked
+fi
+
 ci/scripts/rust_fmt.sh
 ci/scripts/rust_clippy.sh
 ci/scripts/rust_toml_fmt.sh
 ci/scripts/rust_docs.sh
-ci/scripts/license_header.sh
\ No newline at end of file
+ci/scripts/license_header.sh
+ci/scripts/typos_check.sh
+ci/scripts/doc_prettier_check.sh
diff --git a/dev/update_function_docs.sh b/dev/update_function_docs.sh
index 6ed760bd22ff4..63f4f2475c471 100755
--- a/dev/update_function_docs.sh
+++ b/dev/update_function_docs.sh
@@ -78,6 +78,36 @@ FROM employees;
 ```
 
 Note: When no rows pass the filter, `COUNT` returns `0` while `SUM`/`AVG`/`MIN`/`MAX` return `NULL`.
+
+## WITHIN GROUP / Ordered-set aggregates
+
+Some aggregate functions accept the SQL `WITHIN GROUP (ORDER BY ...)` clause to specify the ordering the
+aggregate relies on. In DataFusion this is opt-in: only aggregate functions whose implementation returns
+`true` from `AggregateUDFImpl::supports_within_group_clause()` accept the `WITHIN GROUP` clause. Attempting to
+use `WITHIN GROUP` with a regular aggregate (for example, `SELECT SUM(x) WITHIN GROUP (ORDER BY x)`) will fail
+during planning with an error: "WITHIN GROUP is only supported for ordered-set aggregate functions".
+
+Currently, the built-in aggregate functions that support `WITHIN GROUP` are:
+
+- `percentile_cont` — exact percentile aggregate (also available as `percentile_cont(column, percentile)`)
+- `approx_percentile_cont` — approximate percentile using the t-digest algorithm
+- `approx_percentile_cont_with_weight` — approximate weighted percentile using the t-digest algorithm
+
+Note: rank-like functions such as `rank()`, `dense_rank()`, and `percent_rank()` are window functions and
+use the `OVER (...)` clause; they are not ordered-set aggregates that accept `WITHIN GROUP` in DataFusion.
+
+Example (ordered-set aggregate):
+
+```sql
+percentile_cont(0.5) WITHIN GROUP (ORDER BY value)
+```
+
+Example (invalid usage — planner will error):
+
+```sql
+-- This will fail: SUM is not an ordered-set aggregate
+SELECT SUM(x) WITHIN GROUP (ORDER BY x) FROM t;
+```
 EOF
 
 echo "Running CLI and inserting aggregate function docs table"
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 355cd347ef582..3e1059afa64bc 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -16,9 +16,9 @@
 # under the License.
 
 sphinx==8.2.3
-sphinx-reredirects==1.0.0
+sphinx-reredirects==1.1.0
 pydata-sphinx-theme==0.16.1
 myst-parser==4.0.1
-maturin==1.9.6
+maturin==1.10.2
 jinja2==3.1.6
 setuptools==80.9.0
diff --git a/docs/source/contributor-guide/development_environment.md b/docs/source/contributor-guide/development_environment.md
index 53f2eb97c6fb2..77910b3540dc1 100644
--- a/docs/source/contributor-guide/development_environment.md
+++ b/docs/source/contributor-guide/development_environment.md
@@ -77,7 +77,7 @@ DataFusion is written in Rust and it uses a standard rust toolkit:
 
 - `rustup update stable` DataFusion generally uses the latest stable release of Rust, though it may lag when new Rust toolchains release
   - See which toolchain is currently pinned in the [`rust-toolchain.toml`](https://github.com/apache/datafusion/blob/main/rust-toolchain.toml) file
-  - This can cause issues such as not having the rust-analyzer component installed for the specified toolchain, in which case just install it manually, e.g. `rustup component add --toolchain 1.88 rust-analyzer`
+  - This can cause issues such as not having the rust-analyzer component installed for the specified toolchain, in which case just install it manually, e.g. `rustup component add --toolchain 1.88.0 rust-analyzer`
 - `cargo build`
 - `cargo fmt` to format the code
 - etc.
diff --git a/docs/source/contributor-guide/governance.md b/docs/source/contributor-guide/governance.md
index 857a82fa9613f..e08308ad7a816 100644
--- a/docs/source/contributor-guide/governance.md
+++ b/docs/source/contributor-guide/governance.md
@@ -64,6 +64,7 @@ Notes:
 | QP Hou                  | houqp            | [houqp](https://github.com/houqp)                       |                | PMC       |
 | Jie Wen                 | jakevin          | [jackwener](https://github.com/jackwener)               |                | PMC       |
 | Jay Zhan                | jayzhan          | [jayzhan211](https://github.com/jayzhan211)             |                | PMC       |
+| Jeffrey Vo              | jeffreyvo        | [Jefffrey](https://github.com/Jefffrey)                 |                | PMC       |
 | Jonah Gao               | jonah            | [jonahgao](https://github.com/jonahgao)                 |                | PMC       |
 | Kun Liu                 | liukun           | [liukun4515](https://github.com/liukun4515)             |                | PMC       |
 | Mehmet Ozan Kabak       | ozankabak        | [ozankabak](https://github.com/ozankabak)               | Synnada, Inc   | PMC       |
@@ -73,14 +74,15 @@ Notes:
 | Wes McKinney            | wesm             | [wesm](https://github.com/wesm)                         | Posit          | PMC       |
 | Will Jones              | wjones127        | [wjones127](https://github.com/wjones127)               | LanceDB        | PMC       |
 | Xudong Wang             | xudong963        | [xudong963](https://github.com/xudong963)               | Polygon.io     | PMC       |
+| Yongting You            | ytyou            | [2010YOUY01](https://github.com/2010YOUY01)             | Independent    | PMC       |
 | Adrian Garcia Badaracco | adriangb         | [adriangb](https://github.com/adriangb)                 | Pydantic       | Committer |
 | Brent Gardner           | avantgardner     | [avantgardnerio](https://github.com/avantgardnerio)     | Coralogix      | Committer |
 | Dmitrii Blaginin        | blaginin         | [blaginin](https://github.com/blaginin)                 | SpiralDB       | Committer |
 | Piotr Findeisen         | findepi          | [findepi](https://github.com/findepi)                   | dbt Labs       | Committer |
+| Gabriel Musat           | gabotechs        | [gabotechs](https://github.com/gabotechs)               | DataDog        | Committer |
 | Jax Liu                 | goldmedal        | [goldmedal](https://github.com/goldmedal)               | Canner         | Committer |
 | Huaxin Gao              | huaxingao        | [huaxingao](https://github.com/huaxingao)               |                | Committer |
 | Ifeanyi Ubah            | iffyio           | [iffyio](https://github.com/iffyio)                     | Validio        | Committer |
-| Jeffrey Vo              | jeffreyvo        | [Jefffrey](https://github.com/Jefffrey)                 |                | Committer |
 | Liu Jiayu               | jiayuliu         | [jimexist](https://github.com/jimexist)                 |                | Committer |
 | Ruiqiu Cao              | kamille          | [Rachelint](https://github.com/Rachelint)               | Tencent        | Committer |
 | Kazuyuki Tanimura       | kazuyukitanimura | [kazuyukitanimura](https://github.com/kazuyukitanimura) |                | Committer |
@@ -106,7 +108,6 @@ Notes:
 | Yang Jiang              | yangjiang        | [Ted-jiang](https://github.com/Ted-jiang)               | Ebay           | Committer |
 | Yoav Cohen              | ycohen           | [yoavcloud](https://github.com/yoavcloud)               |                | Committer |
 | Yijie Shen              | yjshen           | [yjshen](https://github.com/yjshen)                     | DataPelago     | Committer |
-| Yongting You            | ytyou            | [2010YOUY01](https://github.com/2010YOUY01)             | Independent    | Committer |
 | Qi Zhu                  | zhuqi            | [zhuqi-lucas](https://github.com/zhuqi-lucas)           | Polygon.io     | Committer |
 
 <!-- End Auto-Generated Committer List -->
diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 24b63865cb71b..1b38e95bf35d6 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -64,10 +64,10 @@ function types (e.g. scalar, nested, aggregate) are grouped together in the sing
 [`partitionevaluator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.PartitionEvaluator.html
 [`tablefunctionimpl`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableFunctionImpl.html
 [`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
-[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
-[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
-[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
-[`simple_udtf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs
+[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udf.rs
+[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udaf.rs
+[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs
+[`simple_udtf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udtf.rs
 [rust feature]: https://doc.rust-lang.org/cargo/reference/features.html
 
 **Testing**
diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index df664975a84ae..ea42329f2c00f 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -32,7 +32,8 @@ community as well as get more familiar with Rust and the relevant codebases.
 
 ## Development Environment
 
-You can find how to setup build and testing environment [here](https://datafusion.apache.org/contributor-guide/development_environment.html)
+Setup your development environment [here](development_environment.md), and learn
+how to test the code [here](testing.md).
 
 ## Finding and Creating Issues to Work On
 
@@ -59,9 +60,8 @@ If you want to work on an issue which is not already assigned to someone else
 and there are no comment indicating that someone is already working on that
 issue then you can assign the issue to yourself by submitting a single word
 comment `take`. This will assign the issue to yourself. However, if you are
-unable to make progress you should unassign the issue by using the `unassign me`
-link at the top of the issue page (and ask for help if are stuck) so that
-someone else can get involved in the work.
+unable to make progress you should unassign the issue by commenting a single
+word `untake`.
 
 # Developer's guide
 
diff --git a/docs/source/contributor-guide/inviting.md b/docs/source/contributor-guide/inviting.md
index a61e16c9a65b7..9696bd1238a4a 100644
--- a/docs/source/contributor-guide/inviting.md
+++ b/docs/source/contributor-guide/inviting.md
@@ -126,7 +126,7 @@ explicitly add them to the roster on the [Whimsy Roster Tool].
 ### Step 4: Announce and Celebrate the New Committer
 
 Email to Send an email such as the following to
-[dev@datafusion.apache.org](mailto:dev@datafusion.apache.org]) to celebrate and
+[dev@datafusion.apache.org](mailto:dev@datafusion.apache.org) to celebrate and
 acknowledge the new committer to the community.
 
 ```
diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md
index dd22e1236081a..81ceabb646bf3 100644
--- a/docs/source/contributor-guide/testing.md
+++ b/docs/source/contributor-guide/testing.md
@@ -46,19 +46,41 @@ cargo nextest run
 ## Unit tests
 
 Tests for code in an individual module are defined in the same source file with a `test` module, following Rust convention.
-The [test_util](https://github.com/apache/datafusion/tree/main/datafusion/common/src/test_util.rs) module provides useful macros to write unit tests effectively, such as `assert_batches_sorted_eq` and `assert_batches_eq` for RecordBatches and `assert_contains` / `assert_not_contains` which are used extensively in the codebase.
+
+For example, to run tests in the `datafusion` crate:
+
+```shell
+cargo test -p datafusion
+```
+
+The [test_util] module provides useful macros to write unit tests effectively, such as [`assert_batches_sorted_eq`] and [`assert_batches_eq`] for RecordBatches and [`assert_contains`] / [`assert_not_contains`] which are used extensively in the codebase.
+
+[test_util]: https://github.com/apache/datafusion/tree/main/datafusion/common/src/test_util.rs
+[`assert_batches_sorted_eq`]: https://docs.rs/datafusion/latest/datafusion/macro.assert_batches_sorted_eq.html
+[`assert_batches_eq`]: https://docs.rs/datafusion/latest/datafusion/macro.assert_batches_eq.html
+[`assert_contains`]: https://docs.rs/datafusion/latest/datafusion/common/macro.assert_contains.html
+[`assert_not_contains`]: https://docs.rs/datafusion/latest/datafusion/common/macro.assert_not_contains.html
 
 ## sqllogictests Tests
 
-DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like other tests using `cargo test --test sqllogictests`.
+DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest). You can run these tests with commands like:
+
+```shell
+# Run all tests
+cargo test --profile=ci --test sqllogictests
+# Run a specific test file
+cargo test --profile=ci --test sqllogictests -- aggregate.slt
+# Run and update expected outputs
+cargo test --profile=ci --test sqllogictests -- --complete
+```
 
-`sqllogictests` tests may be less convenient for new contributors who are familiar with writing `.rs` tests as they require learning another tool. However, `sqllogictest` based tests are much easier to develop and maintain as they 1) do not require a slow recompile/link cycle and 2) can be automatically updated via `cargo test --test sqllogictests -- --complete`.
+`sqllogictests` may be less convenient for new contributors who are familiar with writing `.rs` tests as they require learning another tool. However, `sqllogictest` based tests are much easier to develop and maintain as they 1) do not require a slow recompile/link cycle and 2) can be automatically updated.
 
 Like similar systems such as [DuckDB](https://duckdb.org/dev/testing), DataFusion has chosen to trade off a slightly higher barrier to contribution for longer term maintainability.
 
 DataFusion has integrated [sqlite's test suite](https://sqlite.org/sqllogictest/doc/trunk/about.wiki) as a supplemental test suite that is run whenever a PR is merged into DataFusion. To run it manually please refer to the [README](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md#running-tests-sqlite) file for instructions.
 
-## Snapshot testing
+## Snapshot testing (`cargo insta`)
 
 [Insta](https://github.com/mitsuhiko/insta) is used for snapshot testing. Snapshots are generated
 and compared on each test run. If the output changes, tests will fail.
@@ -90,7 +112,7 @@ There are several public interface tests for the DataFusion library in the [test
 You can run these tests individually using `cargo` as normal command such as
 
 ```shell
-cargo test -p datafusion --test parquet_exec
+cargo test -p datafusion --test parquet_integration
 ```
 
 ## SQL "Fuzz" testing
diff --git a/docs/source/download.md b/docs/source/download.md
index 33a6d70088779..7a62e398c02b5 100644
--- a/docs/source/download.md
+++ b/docs/source/download.md
@@ -19,19 +19,25 @@
 
 # Download
 
-While DataFusion is also distributed via the Rust [crates.io] package manager as a convenience, the
+Most users use DataFusion as a library in their Rust projects by adding it as a dependency
+in their `Cargo.toml` file and downloading it from the Rust [crates.io] package registry.
+
+For example:
+
+```toml
+[dependencies]
+datafusion = "41.0.0"
+```
+
+While DataFusion is distributed via [crates.io] as a convenience, the
 official Apache DataFusion releases are provided as source artifacts.
 
 [crates.io]: https://crates.io/crates/datafusion
 
 ## Releases
 
-The latest source release is [41.0.0][source-link] ([asc][asc-link],
-[sha512][sha512-link]).
-
-[source-link]: https://www.apache.org/dyn/closer.lua/datafusion/datafusion-41.0.0/apache-datafusion-41.0.0.tar.gz?action=download
-[asc-link]: https://downloads.apache.org/datafusion/datafusion-41.0.0/apache-datafusion-41.0.0.tar.gz.asc
-[sha512-link]: https://downloads.apache.org/datafusion/datafusion-41.0.0/apache-datafusion-41.0.0.tar.gz.sha512
+You can find the latest releases, signatures and checksums on
+the [ASF Release Page](https://dist.apache.org/repos/dist/release/datafusion)
 
 For previous releases, please check the [archive](https://archive.apache.org/dist/datafusion/).
 
@@ -40,8 +46,10 @@ For releases earlier than 37.0.0, please check [Arrow's archive](https://archive
 ## Notes
 
 - When downloading a release, please verify the OpenPGP compatible signature (or failing that, check the SHA-512); these should be fetched from the main Apache site.
-- The KEYS file contains the public keys used for signing release. It is recommended that (when possible) a web of trust is used to confirm the identity of these keys.
-- Please download the [KEYS](https://downloads.apache.org/datafusion/KEYS) as well as the .asc signature files.
+- The [KEYS] file contains the public keys used for signing release. It is recommended that (when possible) a web of trust is used to confirm the identity of these keys.
+- Please download the [KEYS] file as well as the .asc signature files.
+
+[keys]: https://downloads.apache.org/datafusion/KEYS
 
 ### To verify the signature of the release artifact
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index b589c9ce4047d..9764e6c99526b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -136,6 +136,7 @@ To get started, see
    library-user-guide/upgrading
    library-user-guide/extensions
    library-user-guide/using-the-sql-api
+   library-user-guide/extending-sql
    library-user-guide/working-with-exprs
    library-user-guide/using-the-dataframe-api
    library-user-guide/building-logical-plans
diff --git a/docs/source/library-user-guide/catalogs.md b/docs/source/library-user-guide/catalogs.md
index d4e6633d40ba7..daa329523afee 100644
--- a/docs/source/library-user-guide/catalogs.md
+++ b/docs/source/library-user-guide/catalogs.md
@@ -19,7 +19,7 @@
 
 # Catalogs, Schemas, and Tables
 
-This section describes how to create and manage catalogs, schemas, and tables in DataFusion. For those wanting to dive into the code quickly please see the [example](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs).
+This section describes how to create and manage catalogs, schemas, and tables in DataFusion. For those wanting to dive into the code quickly please see the [example](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/data_io/catalog.rs).
 
 ## General Concepts
 
diff --git a/docs/source/library-user-guide/custom-table-providers.md b/docs/source/library-user-guide/custom-table-providers.md
index 695cb16ac8604..8e1dee9e843ac 100644
--- a/docs/source/library-user-guide/custom-table-providers.md
+++ b/docs/source/library-user-guide/custom-table-providers.md
@@ -569,6 +569,6 @@ More abstractly, see the following traits for more information on how to impleme
 - `FileFormat` - a trait for reading a file format
 - `ListingTableProvider` - a useful trait for implementing a `TableProvider` that lists files in a directory
 
-[ex]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion-examples/examples/custom_datasource.rs#L214C1-L276
+[ex]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/custom_datasource.rs
 [csv]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/csv.rs#L57-L70
 [parquet]: https://github.com/apache/datafusion/blob/a5e86fae3baadbd99f8fd0df83f45fde22f7b0c6/datafusion/core/src/datasource/physical_plan/parquet.rs#L77-L104
diff --git a/docs/source/library-user-guide/extending-operators.md b/docs/source/library-user-guide/extending-operators.md
index 5c28d1e670586..0a169531757c2 100644
--- a/docs/source/library-user-guide/extending-operators.md
+++ b/docs/source/library-user-guide/extending-operators.md
@@ -17,9 +17,12 @@
   under the License.
 -->
 
-# Extending DataFusion's operators: custom LogicalPlan and Execution Plans
+# Extending Operators
 
-DataFusion supports extension of operators by transforming logical plan and execution plan through customized [optimizer rules](https://docs.rs/datafusion/latest/datafusion/optimizer/trait.OptimizerRule.html). This section will use the µWheel project to illustrate such capabilities.
+DataFusion supports extending operators by transforming [`LogicalPlan`] and [`ExecutionPlan`] through customized [optimizer rules](https://docs.rs/datafusion/latest/datafusion/optimizer/trait.OptimizerRule.html). This section will use the µWheel project to illustrate such capabilities.
+
+[`logicalplan`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.LogicalPlan.html
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
 
 ## About DataFusion µWheel
 
diff --git a/docs/source/library-user-guide/extending-sql.md b/docs/source/library-user-guide/extending-sql.md
new file mode 100644
index 0000000000000..409a0fb89a321
--- /dev/null
+++ b/docs/source/library-user-guide/extending-sql.md
@@ -0,0 +1,339 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Extending SQL Syntax
+
+DataFusion provides a flexible extension system that allows you to customize SQL
+parsing and planning without modifying the core codebase. This is useful when you
+need to:
+
+- Support custom operators from other SQL dialects (e.g., PostgreSQL's `->` for JSON)
+- Add custom data types not natively supported
+- Implement SQL constructs like `TABLESAMPLE`, `PIVOT`/`UNPIVOT`, or `MATCH_RECOGNIZE`
+
+## Architecture Overview
+
+When DataFusion processes a SQL query, it goes through these stages:
+
+```text
+┌─────────────┐    ┌─────────┐    ┌──────────────────────┐    ┌─────────────┐
+│ SQL String  │───▶│ Parser  │───▶│      SqlToRel        │───▶│ LogicalPlan │
+└─────────────┘    └─────────┘    │ (SQL to LogicalPlan) │    └─────────────┘
+                                  └──────────────────────┘
+                                              │
+                                              │ uses
+                                              ▼
+                                  ┌───────────────────────┐
+                                  │  Extension Planners   │
+                                  │  • ExprPlanner        │
+                                  │  • TypePlanner        │
+                                  │  • RelationPlanner    │
+                                  └───────────────────────┘
+```
+
+The extension planners intercept specific parts of the SQL AST during the
+`SqlToRel` phase and allow you to customize how they are converted to DataFusion's
+logical plan.
+
+## Extension Points
+
+DataFusion provides three planner traits for extending SQL:
+
+| Trait               | Purpose                                 | Registration Method                        |
+| ------------------- | --------------------------------------- | ------------------------------------------ |
+| [`ExprPlanner`]     | Custom expressions and operators        | `ctx.register_expr_planner()`              |
+| [`TypePlanner`]     | Custom SQL data types                   | `SessionStateBuilder::with_type_planner()` |
+| [`RelationPlanner`] | Custom FROM clause elements (relations) | `ctx.register_relation_planner()`          |
+
+**Planner Precedence**: Multiple [`ExprPlanner`]s and [`RelationPlanner`]s can be
+registered; they are invoked in reverse registration order (last registered wins).
+Return `Original(...)` to delegate to the next planner. Only one `TypePlanner`
+can be active at a time.
+
+### ExprPlanner: Custom Expressions and Operators
+
+Use [`ExprPlanner`] to customize how SQL expressions are converted to DataFusion
+logical expressions. This is useful for:
+
+- Custom binary operators (e.g., `->`, `->>`, `@>`, `?`)
+- Custom field access patterns
+- Custom aggregate or window function handling
+
+#### Available Methods
+
+| Category           | Methods                                                                            |
+| ------------------ | ---------------------------------------------------------------------------------- |
+| Operators          | `plan_binary_op`, `plan_any`                                                       |
+| Literals           | `plan_array_literal`, `plan_dictionary_literal`, `plan_struct_literal`             |
+| Functions          | `plan_extract`, `plan_substring`, `plan_overlay`, `plan_position`, `plan_make_map` |
+| Identifiers        | `plan_field_access`, `plan_compound_identifier`                                    |
+| Aggregates/Windows | `plan_aggregate`, `plan_window`                                                    |
+
+See the [ExprPlanner API documentation] for full method signatures.
+
+#### Example: Custom Arrow Operator
+
+This example maps the `->` operator to string concatenation:
+
+```rust
+# use std::sync::Arc;
+# use datafusion::common::DFSchema;
+# use datafusion::error::Result;
+# use datafusion::logical_expr::Operator;
+# use datafusion::prelude::*;
+# use datafusion::sql::sqlparser::ast::BinaryOperator;
+use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
+# use datafusion_expr::BinaryExpr;
+
+#[derive(Debug)]
+struct MyCustomPlanner;
+
+impl ExprPlanner for MyCustomPlanner {
+    fn plan_binary_op(
+        &self,
+        expr: RawBinaryExpr,
+        _schema: &DFSchema,
+    ) -> Result<PlannerResult<RawBinaryExpr>> {
+        match &expr.op {
+            // Map `->` to string concatenation
+            BinaryOperator::Arrow => {
+                Ok(PlannerResult::Planned(Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(expr.left.clone()),
+                    right: Box::new(expr.right.clone()),
+                    op: Operator::StringConcat,
+                })))
+            }
+            _ => Ok(PlannerResult::Original(expr)),
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // Use postgres dialect to enable `->` operator parsing
+    let config = SessionConfig::new()
+        .set_str("datafusion.sql_parser.dialect", "postgres");
+    let mut ctx = SessionContext::new_with_config(config);
+
+    // Register the custom planner
+    ctx.register_expr_planner(Arc::new(MyCustomPlanner))?;
+
+    // Now `->` works as string concatenation
+    let results = ctx.sql("SELECT 'hello'->'world'").await?.collect().await?;
+    // Returns: "helloworld"
+    Ok(())
+}
+```
+
+For more details, see the [ExprPlanner API documentation] and the
+[expr_planner test examples].
+
+### TypePlanner: Custom Data Types
+
+Use [`TypePlanner`] to map SQL data types to Arrow/DataFusion types. This is useful
+when you need to support SQL types that aren't natively recognized.
+
+#### Example: Custom DATETIME Type
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, TimeUnit};
+# use datafusion::error::Result;
+# use datafusion::prelude::*;
+# use datafusion::execution::SessionStateBuilder;
+use datafusion_expr::planner::TypePlanner;
+# use sqlparser::ast;
+
+#[derive(Debug)]
+struct MyTypePlanner;
+
+impl TypePlanner for MyTypePlanner {
+    fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
+        match sql_type {
+            // Map DATETIME(precision) to Arrow Timestamp
+            ast::DataType::Datetime(precision) => {
+                let time_unit = match precision {
+                    Some(0) => TimeUnit::Second,
+                    Some(3) => TimeUnit::Millisecond,
+                    Some(6) => TimeUnit::Microsecond,
+                    None | Some(9) => TimeUnit::Nanosecond,
+                    _ => return Ok(None), // Let default handling take over
+                };
+                Ok(Some(DataType::Timestamp(time_unit, None)))
+            }
+            _ => Ok(None), // Return None for types we don't handle
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let state = SessionStateBuilder::new()
+        .with_default_features()
+        .with_type_planner(Arc::new(MyTypePlanner))
+        .build();
+
+    let ctx = SessionContext::new_with_state(state);
+
+    // Now DATETIME type is recognized
+    ctx.sql("CREATE TABLE events (ts DATETIME(3))").await?;
+    Ok(())
+}
+```
+
+For more details, see the [TypePlanner API documentation].
+
+### RelationPlanner: Custom FROM Clause Elements
+
+Use [`RelationPlanner`] to handle custom relations in the FROM clause. This
+enables you to implement SQL constructs like:
+
+- `TABLESAMPLE` for sampling data
+- `PIVOT` / `UNPIVOT` for data reshaping
+- `MATCH_RECOGNIZE` for pattern matching
+- Any custom relation syntax parsed by sqlparser
+
+#### The RelationPlannerContext
+
+When implementing [`RelationPlanner`], you receive a [`RelationPlannerContext`] that
+provides utilities for planning:
+
+| Method                      | Purpose                                         |
+| --------------------------- | ----------------------------------------------- |
+| `plan(relation)`            | Recursively plan a nested relation              |
+| `sql_to_expr(expr, schema)` | Convert SQL expression to DataFusion Expr       |
+| `context_provider()`        | Access session configuration, tables, functions |
+
+See the [RelationPlanner API documentation] for additional methods like
+`normalize_ident()` and `object_name_to_table_reference()`.
+
+#### Implementation Strategies
+
+There are two main approaches when implementing a [`RelationPlanner`]:
+
+1. **Rewrite to Standard SQL**: Transform custom syntax into equivalent standard
+   operations that DataFusion already knows how to execute (e.g., PIVOT → GROUP BY
+   with CASE expressions). This is the simplest approach when possible.
+
+2. **Custom Logical and Physical Nodes**: Create a [`UserDefinedLogicalNode`] to
+   represent the operation in the logical plan, along with a custom [`ExecutionPlan`]
+   to execute it. Both are required for end-to-end execution.
+
+#### Example: Basic RelationPlanner Structure
+
+```rust
+# use std::sync::Arc;
+# use datafusion::error::Result;
+# use datafusion::prelude::*;
+use datafusion_expr::planner::{
+    PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
+};
+use datafusion_sql::sqlparser::ast::TableFactor;
+
+#[derive(Debug)]
+struct MyRelationPlanner;
+
+impl RelationPlanner for MyRelationPlanner {
+    fn plan_relation(
+        &self,
+        relation: TableFactor,
+        ctx: &mut dyn RelationPlannerContext,
+    ) -> Result<RelationPlanning> {
+        match relation {
+            // Handle your custom relation
+            TableFactor::Pivot { table, alias, .. } => {
+                // Plan the input table
+                let input = ctx.plan(*table)?;
+
+                // Transform or wrap the plan as needed
+                // ...
+
+                Ok(RelationPlanning::Planned(PlannedRelation::new(input, alias)))
+            }
+
+            // Return Original for relations you don't handle
+            other => Ok(RelationPlanning::Original(other)),
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    // Register the custom planner
+    ctx.register_relation_planner(Arc::new(MyRelationPlanner))?;
+
+    Ok(())
+}
+```
+
+## Complete Examples
+
+The DataFusion repository includes comprehensive examples demonstrating each
+approach:
+
+### TABLESAMPLE (Custom Logical and Physical Nodes)
+
+The [table_sample.rs] example shows a complete end-to-end implementation of how to
+support queries such as:
+
+```sql
+SELECT * FROM table TABLESAMPLE BERNOULLI(10 PERCENT) REPEATABLE(42)
+```
+
+### PIVOT/UNPIVOT (Rewrite Strategy)
+
+The [pivot_unpivot.rs] example demonstrates rewriting custom syntax to standard SQL
+for queries such as:
+
+```sql
+SELECT * FROM sales
+  PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))
+```
+
+## Recap
+
+1. Use [`ExprPlanner`] for custom operators and expression handling
+2. Use [`TypePlanner` for custom SQL data types
+3. Use [`RelationPlanner`] for custom FROM clause syntax (TABLESAMPLE, PIVOT, etc.)
+4. Register planners via [`SessionContext`] or [`SessionStateBuilder`]
+
+## See Also
+
+- API Documentation: [`ExprPlanner`], [`TypePlanner`], [`RelationPlanner`]
+- [relation_planner examples] - Complete TABLESAMPLE, PIVOT/UNPIVOT implementations
+- [expr_planner test examples] - Custom operator examples
+- [Custom Expression Planning](functions/adding-udfs.md#custom-expression-planning) in the UDF guide
+
+[`exprplanner`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html
+[`typeplanner`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.TypePlanner.html
+[`relationplanner`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.RelationPlanner.html
+[`userdefinedlogicalnode`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.UserDefinedLogicalNode.html
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
+[`sessionstatebuilder`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionStateBuilder.html
+[`relationplannercontext`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/trait.RelationPlannerContext.html
+[exprplanner api documentation]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html
+[typeplanner api documentation]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.TypePlanner.html
+[relationplanner api documentation]: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.RelationPlanner.html
+[expr_planner test examples]: https://github.com/apache/datafusion/blob/main/datafusion/core/tests/user_defined/expr_planner.rs
+[relation_planner examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples/examples/relation_planner
+[table_sample.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/relation_planner/table_sample.rs
+[pivot_unpivot.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/relation_planner/pivot_unpivot.rs
diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md
index e56790a4b7d83..5d033ae3f9e97 100644
--- a/docs/source/library-user-guide/functions/adding-udfs.md
+++ b/docs/source/library-user-guide/functions/adding-udfs.md
@@ -31,14 +31,14 @@ This page covers how to add UDFs to DataFusion. In particular, it covers how to
 | Table          | A function that takes parameters and returns a `TableProvider` to be used in an query plan.                | [simple_udtf.rs]                      |
 | Scalar (async) | A scalar function for performing `async` operations (such as network or I/O calls) within the UDF.         | [async_udf.rs]                        |
 
-[simple_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
-[advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
-[simple_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs
-[advanced_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
-[simple_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs
-[advanced_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
-[simple_udtf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs
-[async_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/async_udf.rs
+[simple_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udf.rs
+[advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udf.rs
+[simple_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udwf.rs
+[advanced_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs
+[simple_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udaf.rs
+[advanced_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udaf.rs
+[simple_udtf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/simple_udtf.rs
+[async_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/async_udf.rs
 
 First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different
 types of UDFs.
@@ -579,12 +579,12 @@ After registration, you can use these async UDFs directly in SQL queries, for ex
 SELECT async_upper('datafusion');
 ```
 
-For async UDF implementation details, see [`async_udf.rs`](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/async_udf.rs).
+For async UDF implementation details, see [`async_udf.rs`](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/async_udf.rs).
 
 [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html
 [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html
 [`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html
-[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udf.rs
 
 ## Named Arguments
 
@@ -820,7 +820,7 @@ let smooth_it = create_udwf(
 
 [`windowudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.WindowUDF.html
 [`create_udwf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udwf.html
-[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
+[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs
 
 The `create_udwf` has five arguments to check:
 
@@ -1348,7 +1348,7 @@ async fn main() -> Result<()> {
 
 [`aggregateudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.AggregateUDF.html
 [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html
-[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
+[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udaf.rs
 
 ## Adding a Table UDF
 
@@ -1492,7 +1492,9 @@ async fn main() -> Result<()> {
 
 ## Custom Expression Planning
 
-DataFusion provides native support for common SQL operators by default such as `+`, `-`, `||`. However it does not provide support for other operators such as `@>`. To override DataFusion's default handling or support unsupported operators, developers can extend DataFusion by implementing custom expression planning, a core feature of DataFusion
+DataFusion provides native support for common SQL operators and constructs by default such as `+`, `-`, `||`. However it does not provide support for other operators such as `@>` or constructs like `TABLESAMPLE` which are less common or vary more between SQL dialects. To override DataFusion's default handling or support these unsupported features, developers can extend DataFusion by implementing custom expression planning, a core feature of DataFusion.
+
+For a comprehensive guide on extending SQL syntax including `ExprPlanner`, `TypePlanner`, and `RelationPlanner`, see [Extending DataFusion's SQL Syntax](../extending-sql.md)
 
 ### Implementing Custom Expression Planning
 
diff --git a/docs/source/library-user-guide/query-optimizer.md b/docs/source/library-user-guide/query-optimizer.md
index 877ff8c754ad5..8ed6593d56203 100644
--- a/docs/source/library-user-guide/query-optimizer.md
+++ b/docs/source/library-user-guide/query-optimizer.md
@@ -17,7 +17,7 @@
   under the License.
 -->
 
-# DataFusion Query Optimizer
+# Query Optimizer
 
 [DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
 format.
@@ -25,9 +25,12 @@ format.
 DataFusion has modular design, allowing individual crates to be re-used in other projects.
 
 This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and
-contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so
+contains an extensive set of [`OptimizerRule`]s and [`PhysicalOptimizerRules`] that may rewrite the plan and/or its expressions so
 they execute more quickly while still computing the same result.
 
+[`optimizerrule`]: https://docs.rs/datafusion/latest/datafusion/optimizer/trait.OptimizerRule.html
+[`physicaloptimizerrules`]: https://docs.rs/datafusion/latest/datafusion/physical_optimizer/trait.PhysicalOptimizerRule.html
+
 ## Running the Optimizer
 
 The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
@@ -68,7 +71,7 @@ fn observer(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
 ## Writing Optimization Rules
 
 Please refer to the
-[optimizer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/optimizer_rule.rs)
+[optimizer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/optimizer_rule.rs)
 example to learn more about the general approach to writing optimizer rules and
 then move onto studying the existing rules.
 
@@ -478,13 +481,10 @@ fn analyze_filter_example() -> Result<()> {
     let schema = Arc::new(Schema::new(vec![age]));
 
     // Define column statistics
-    let column_stats = ColumnStatistics {
-        null_count: Precision::Exact(0),
-        max_value: Precision::Exact(ScalarValue::Int64(Some(79))),
-        min_value: Precision::Exact(ScalarValue::Int64(Some(14))),
-        distinct_count: Precision::Absent,
-        sum_value: Precision::Absent,
-    };
+    let column_stats = ColumnStatistics::default()
+        .with_min_value(Precision::Exact(ScalarValue::Int64(Some(14))))
+        .with_max_value(Precision::Exact(ScalarValue::Int64(Some(79))))
+        .with_null_count(Precision::Exact(0));
 
     // Create expression: age > 18 AND age <= 25
     let expr = col("age")
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 0b227000f73d9..39d52bd5903a4 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -19,11 +19,564 @@
 
 # Upgrade Guides
 
-## DataFusion `51.0.0`
+## DataFusion `52.0.0`
+
+**Note:** DataFusion `52.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
+
+You can see the current [status of the `52.0.0`release here](https://github.com/apache/datafusion/issues/18566)
+
+### Changes to DFSchema API
+
+To permit more efficient planning, several methods on `DFSchema` have been
+changed to return references to the underlying [`&FieldRef`] rather than
+[`&Field`]. This allows planners to more cheaply copy the references via
+`Arc::clone` rather than cloning the entire `Field` structure.
+
+You may need to change code to use `Arc::clone` instead of `.as_ref().clone()`
+directly on the `Field`. For example:
+
+```diff
+- let field = df_schema.field("my_column").as_ref().clone();
++ let field = Arc::clone(df_schema.field("my_column"));
+```
+
+### ListingTableProvider now caches `LIST` commands
+
+In prior versions, `ListingTableProvider` would issue `LIST` commands to
+the underlying object store each time it needed to list files for a query.
+To improve performance, `ListingTableProvider` now caches the results of
+`LIST` commands for the lifetime of the `ListingTableProvider` instance.
+
+Note that by default the cache has no expiration time, so if files are added or removed
+from the underlying object store, the `ListingTableProvider` will not see
+those changes until the `ListingTableProvider` instance is dropped and recreated.
+
+You will be able to configure the maximum cache size and cache expiration time via a configuration option:
+
+See <https://github.com/apache/datafusion/issues/19056> for more details.
+
+Note that the internal API has changed to use a trait `ListFilesCache` instead of a type alias.
+
+### `newlines_in_values` moved from `FileScanConfig` to `CsvOptions`
+
+The CSV-specific `newlines_in_values` configuration option has been moved from `FileScanConfig` to `CsvOptions`, as it only applies to CSV file parsing.
+
+**Who is affected:**
+
+- Users who set `newlines_in_values` via `FileScanConfigBuilder::with_newlines_in_values()`
+
+**Migration guide:**
+
+Set `newlines_in_values` in `CsvOptions` instead of on `FileScanConfigBuilder`:
+
+**Before:**
+
+```rust,ignore
+let source = Arc::new(CsvSource::new(file_schema.clone()));
+let config = FileScanConfigBuilder::new(object_store_url, source)
+    .with_newlines_in_values(true)
+    .build();
+```
+
+**After:**
+
+```rust,ignore
+let options = CsvOptions {
+    newlines_in_values: Some(true),
+    ..Default::default()
+};
+let source = Arc::new(CsvSource::new(file_schema.clone())
+    .with_csv_options(options));
+let config = FileScanConfigBuilder::new(object_store_url, source)
+    .build();
+```
+
+### Removal of `pyarrow` feature
+
+The `pyarrow` feature flag has been removed. This feature has been migrated to
+the `datafusion-python` repository since version `44.0.0`.
+
+### Adaptive filter representation in Parquet filter pushdown
+
+As of Arrow 57.1.0, DataFusion uses a new adaptive filter strategy when
+evaluating pushed down filters for Parquet files. This new strategy improves
+performance for certain types of queries where the results of filtering are
+more efficiently represented with a bitmask rather than a selection.
+See [arrow-rs #5523] for more details.
+
+This change only applies to the built-in Parquet data source with filter-pushdown enabled (
+which is [not yet the default behavior]).
+
+You can disable the new behavior by setting the
+`datafusion.execution.parquet.force_filter_selections` [configuration setting] to true.
+
+```sql
+> set datafusion.execution.parquet.force_filter_selections = true;
+```
+
+[arrow-rs #5523]: https://github.com/apache/arrow-rs/issues/5523
+[configuration setting]: https://datafusion.apache.org/user-guide/configs.html
+[not yet the default behavior]: https://github.com/apache/datafusion/issues/3463
+
+### Statistics handling moved from `FileSource` to `FileScanConfig`
+
+Statistics are now managed directly by `FileScanConfig` instead of being delegated to `FileSource` implementations. This simplifies the `FileSource` trait and provides more consistent statistics handling across all file formats.
+
+**Who is affected:**
+
+- Users who have implemented custom `FileSource` implementations
+
+**Breaking changes:**
 
-**Note:** DataFusion `51.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
+Two methods have been removed from the `FileSource` trait:
 
-You can see the current [status of the `51.0.0`release here](https://github.com/apache/datafusion/issues/17558)
+- `with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>`
+- `statistics(&self) -> Result<Statistics>`
+
+**Migration guide:**
+
+If you have a custom `FileSource` implementation, you need to:
+
+1. Remove the `with_statistics` method implementation
+2. Remove the `statistics` method implementation
+3. Remove any internal state that was storing statistics
+
+**Before:**
+
+```rust,ignore
+#[derive(Clone)]
+struct MyCustomSource {
+    table_schema: TableSchema,
+    projected_statistics: Option<Statistics>,
+    // other fields...
+}
+
+impl FileSource for MyCustomSource {
+    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
+        Arc::new(Self {
+            table_schema: self.table_schema.clone(),
+            projected_statistics: Some(statistics),
+            // other fields...
+        })
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(self.projected_statistics.clone().unwrap_or_else(||
+            Statistics::new_unknown(self.table_schema.file_schema())
+        ))
+    }
+
+    // other methods...
+}
+```
+
+**After:**
+
+```rust,ignore
+#[derive(Clone)]
+struct MyCustomSource {
+    table_schema: TableSchema,
+    // projected_statistics field removed
+    // other fields...
+}
+
+impl FileSource for MyCustomSource {
+    // with_statistics method removed
+    // statistics method removed
+
+    // other methods...
+}
+```
+
+**Accessing statistics:**
+
+Statistics are now accessed through `FileScanConfig` instead of `FileSource`:
+
+```diff
+- let stats = config.file_source.statistics()?;
++ let stats = config.statistics();
+```
+
+Note that `FileScanConfig::statistics()` automatically marks statistics as inexact when filters are present, ensuring correctness when filters are pushed down.
+
+### Partition column handling moved out of `PhysicalExprAdapter`
+
+Partition column replacement is now a separate preprocessing step performed before expression rewriting via `PhysicalExprAdapter`. This change provides better separation of concerns and makes the adapter more focused on schema differences rather than partition value substitution.
+
+**Who is affected:**
+
+- Users who have custom implementations of `PhysicalExprAdapterFactory` that handle partition columns
+- Users who directly use the `FilePruner` API
+
+**Breaking changes:**
+
+1. `FilePruner::try_new()` signature changed: the `partition_fields` parameter has been removed since partition column handling is now done separately
+2. Partition column replacement must now be done via `replace_columns_with_literals()` before expressions are passed to the adapter
+
+**Migration guide:**
+
+If you have code that creates a `FilePruner` with partition fields:
+
+**Before:**
+
+```rust,ignore
+use datafusion_pruning::FilePruner;
+
+let pruner = FilePruner::try_new(
+    predicate,
+    file_schema,
+    partition_fields,  // This parameter is removed
+    file_stats,
+)?;
+```
+
+**After:**
+
+```rust,ignore
+use datafusion_pruning::FilePruner;
+
+// Partition fields are no longer needed
+let pruner = FilePruner::try_new(
+    predicate,
+    file_schema,
+    file_stats,
+)?;
+```
+
+If you have custom code that relies on `PhysicalExprAdapter` to handle partition columns, you must now call `replace_columns_with_literals()` separately:
+
+**Before:**
+
+```rust,ignore
+// Adapter handled partition column replacement internally
+let adapted_expr = adapter.rewrite(expr)?;
+```
+
+**After:**
+
+```rust,ignore
+use datafusion_physical_expr_adapter::replace_columns_with_literals;
+
+// Replace partition columns first
+let expr_with_literals = replace_columns_with_literals(expr, &partition_values)?;
+// Then apply the adapter
+let adapted_expr = adapter.rewrite(expr_with_literals)?;
+```
+
+### `build_row_filter` signature simplified
+
+The `build_row_filter` function in `datafusion-datasource-parquet` has been simplified to take a single schema parameter instead of two.
+The expectation is now that the filter has been adapted to the physical file schema (the arrow representation of the parquet file's schema) before being passed to this function
+using a `PhysicalExprAdapter` for example.
+
+**Who is affected:**
+
+- Users who call `build_row_filter` directly
+
+**Breaking changes:**
+
+The function signature changed from:
+
+```rust,ignore
+pub fn build_row_filter(
+    expr: &Arc<dyn PhysicalExpr>,
+    physical_file_schema: &SchemaRef,
+    predicate_file_schema: &SchemaRef,  // removed
+    metadata: &ParquetMetaData,
+    reorder_predicates: bool,
+    file_metrics: &ParquetFileMetrics,
+) -> Result<Option<RowFilter>>
+```
+
+To:
+
+```rust,ignore
+pub fn build_row_filter(
+    expr: &Arc<dyn PhysicalExpr>,
+    file_schema: &SchemaRef,
+    metadata: &ParquetMetaData,
+    reorder_predicates: bool,
+    file_metrics: &ParquetFileMetrics,
+) -> Result<Option<RowFilter>>
+```
+
+**Migration guide:**
+
+Remove the duplicate schema parameter from your call:
+
+```diff
+- build_row_filter(&predicate, &file_schema, &file_schema, metadata, reorder, metrics)
++ build_row_filter(&predicate, &file_schema, metadata, reorder, metrics)
+```
+
+### Planner now requires explicit opt-in for WITHIN GROUP syntax
+
+The SQL planner now enforces the aggregate UDF contract more strictly: the
+`WITHIN GROUP (ORDER BY ...)` syntax is accepted only if the aggregate UDAF
+explicitly advertises support by returning `true` from
+`AggregateUDFImpl::supports_within_group_clause()`.
+
+Previously the planner forwarded a `WITHIN GROUP` clause to order-sensitive
+aggregates even when they did not implement ordered-set semantics, which could
+cause queries such as `SUM(x) WITHIN GROUP (ORDER BY x)` to plan successfully.
+This behavior was too permissive and has been changed to match PostgreSQL and
+the documented semantics.
+
+Migration: If your UDAF intentionally implements ordered-set semantics and
+wants to accept the `WITHIN GROUP` SQL syntax, update your implementation to
+return `true` from `supports_within_group_clause()` and handle the ordering
+semantics in your accumulator implementation. If your UDAF is merely
+order-sensitive (but not an ordered-set aggregate), do not advertise
+`supports_within_group_clause()` and clients should use alternative function
+signatures (for example, explicit ordering as a function argument) instead.
+
+### `AggregateUDFImpl::supports_null_handling_clause` now defaults to `false`
+
+This method specifies whether an aggregate function allows `IGNORE NULLS`/`RESPECT NULLS`
+during SQL parsing, with the implication it respects these configs during computation.
+
+Most DataFusion aggregate functions silently ignored this syntax in prior versions
+as they did not make use of it and it was permitted by default. We change this so
+only the few functions which do respect this clause (e.g. `array_agg`, `first_value`,
+`last_value`) need to implement it.
+
+Custom user defined aggregate functions will also error if this syntax is used,
+unless they explicitly declare support by overriding the method.
+
+For example, SQL parsing will now fail for queries such as this:
+
+```sql
+SELECT median(c1) IGNORE NULLS FROM table
+```
+
+Instead of silently succeeding.
+
+### API change for `CacheAccessor` trait
+
+The remove API no longer requires a mutable instance
+
+### FFI crate updates
+
+Many of the structs in the `datafusion-ffi` crate have been updated to allow easier
+conversion to the underlying trait types they represent. This simplifies some code
+paths, but also provides an additional improvement in cases where library code goes
+through a round trip via the foreign function interface.
+
+To update your code, suppose you have a `FFI_SchemaProvider` called `ffi_provider`
+and you wish to use this as a `SchemaProvider`. In the old approach you would do
+something like:
+
+```rust,ignore
+    let foreign_provider: ForeignSchemaProvider = ffi_provider.into();
+    let foreign_provider = Arc::new(foreign_provider) as Arc<dyn SchemaProvider>;
+```
+
+This code should now be written as:
+
+```rust,ignore
+    let foreign_provider: Arc<dyn SchemaProvider + Send> = ffi_provider.into();
+    let foreign_provider = foreign_provider as Arc<dyn SchemaProvider>;
+```
+
+For the case of user defined functions, the updates are similar but you
+may need to change the way you call the creation of the `ScalarUDF`.
+Aggregate and window functions follow the same pattern.
+
+Previously you may write:
+
+```rust,ignore
+    let foreign_udf: ForeignScalarUDF = ffi_udf.try_into()?;
+    let foreign_udf: ScalarUDF = foreign_udf.into();
+```
+
+Instead this should now be:
+
+```rust,ignore
+    let foreign_udf: Arc<dyn ScalarUDFImpl> = ffi_udf.into();
+    let foreign_udf = ScalarUDF::new_from_shared_impl(foreign_udf);
+```
+
+When creating any of the following structs, we now require the user to
+provide a `TaskContextProvider` and optionally a `LogicalExtensionCodec`:
+
+- `FFI_CatalogListProvider`
+- `FFI_CatalogProvider`
+- `FFI_SchemaProvider`
+- `FFI_TableProvider`
+- `FFI_TableFunction`
+
+Each of these structs has a `new()` and a `new_with_ffi_codec()` method for
+instantiation. For example, when you previously would write
+
+```rust,ignore
+   let table = Arc::new(MyTableProvider::new());
+   let ffi_table = FFI_TableProvider::new(table, None);
+```
+
+Now you will need to provide a `TaskContextProvider`. The most common
+implementation of this trait is `SessionContext`.
+
+```rust,ignore
+   let ctx = Arc::new(SessionContext::default());
+   let table = Arc::new(MyTableProvider::new());
+   let ffi_table = FFI_TableProvider::new(table, None, ctx, None);
+```
+
+The alternative function to create these structures may be more convenient
+if you are doing many of these operations. A `FFI_LogicalExtensionCodec` will
+store the `TaskContextProvider` as well.
+
+```rust,ignore
+   let codec = Arc::new(DefaultLogicalExtensionCodec {});
+   let ctx = Arc::new(SessionContext::default());
+   let ffi_codec = FFI_LogicalExtensionCodec::new(codec, None, ctx);
+   let table = Arc::new(MyTableProvider::new());
+   let ffi_table = FFI_TableProvider::new_with_ffi_codec(table, None, ffi_codec);
+```
+
+Additional information about the usage of the `TaskContextProvider` can be
+found in the crate README.
+
+Additionally, the FFI structure for Scalar UDF's no longer contains a
+`return_type` call. This code was not used since the `ForeignScalarUDF`
+struct implements the `return_field_from_args` instead.
+
+### Projection handling moved from FileScanConfig to FileSource
+
+Projection handling has been moved from `FileScanConfig` into `FileSource` implementations. This enables format-specific projection pushdown (e.g., Parquet can push down struct field access, Vortex can push down computed expressions into un-decoded data).
+
+**Who is affected:**
+
+- Users who have implemented custom `FileSource` implementations
+- Users who use `FileScanConfigBuilder::with_projection_indices` directly
+
+**Breaking changes:**
+
+1. **`FileSource::with_projection` replaced with `try_pushdown_projection`:**
+
+   The `with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>` method has been removed and replaced with `try_pushdown_projection(&self, projection: &ProjectionExprs) -> Result<Option<Arc<dyn FileSource>>>`.
+
+2. **`FileScanConfig.projection_exprs` field removed:**
+
+   Projections are now stored in the `FileSource` directly, not in `FileScanConfig`.
+   Various public helper methods that access projection information have been removed from `FileScanConfig`.
+
+3. **`FileScanConfigBuilder::with_projection_indices` now returns `Result<Self>`:**
+
+   This method can now fail if the projection pushdown fails.
+
+4. **`FileSource::create_file_opener` now returns `Result<Arc<dyn FileOpener>>`:**
+
+   Previously returned `Arc<dyn FileOpener>` directly.
+   Any `FileSource` implementation that may fail to create a `FileOpener` should now return an appropriate error.
+
+5. **`DataSource::try_swapping_with_projection` signature changed:**
+
+   Parameter changed from `&[ProjectionExpr]` to `&ProjectionExprs`.
+
+**Migration guide:**
+
+If you have a custom `FileSource` implementation:
+
+**Before:**
+
+```rust,ignore
+impl FileSource for MyCustomSource {
+    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
+        // Apply projection from config
+        Arc::new(Self { /* ... */ })
+    }
+
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        Arc::new(MyOpener { /* ... */ })
+    }
+}
+```
+
+**After:**
+
+```rust,ignore
+impl FileSource for MyCustomSource {
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        // Return None if projection cannot be pushed down
+        // Return Some(new_source) with projection applied if it can
+        Ok(Some(Arc::new(Self {
+            projection: Some(projection.clone()),
+            /* ... */
+        })))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        self.projection.as_ref()
+    }
+
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        partition: usize,
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(MyOpener { /* ... */ }))
+    }
+}
+```
+
+We recommend you look at [#18627](https://github.com/apache/datafusion/pull/18627)
+that introduced these changes for more examples for how this was handled for the various built in file sources.
+
+We have added [`SplitProjection`](https://docs.rs/datafusion-datasource/latest/datafusion_datasource/projection/struct.SplitProjection.html) and [`ProjectionOpener`](https://docs.rs/datafusion-datasource/latest/datafusion_datasource/projection/struct.ProjectionOpener.html) helpers to make it easier to handle projections in your `FileSource` implementations.
+
+For file sources that can only handle simple column selections (not computed expressions), use the `SplitProjection` and `ProjectionOpener` helpers to split the projection into pushdownable and non-pushdownable parts:
+
+```rust,ignore
+use datafusion_datasource::projection::{SplitProjection, ProjectionOpener};
+
+// In try_pushdown_projection:
+let split = SplitProjection::new(projection, self.table_schema())?;
+// Use split.file_projection() for what to push down to the file format
+// The ProjectionOpener wrapper will handle the rest
+```
+
+**For `FileScanConfigBuilder` users:**
+
+```diff
+let config = FileScanConfigBuilder::new(url, source)
+-   .with_projection_indices(Some(vec![0, 2, 3]))
++   .with_projection_indices(Some(vec![0, 2, 3]))?
+    .build();
+```
+
+### `SchemaAdapter` and `SchemaAdapterFactory` completely removed
+
+Following the deprecation announced in [DataFusion 49.0.0](#deprecating-schemaadapterfactory-and-schemaadapter), `SchemaAdapterFactory` has been fully removed from Parquet scanning. This applies to both:
+
+The following symbols have been deprecated and will be removed in the next release:
+
+- `SchemaAdapter` trait
+- `SchemaAdapterFactory` trait
+- `SchemaMapper` trait
+- `SchemaMapping` struct
+- `DefaultSchemaAdapterFactory` struct
+
+These types were previously used to adapt record batch schemas during file reading.
+This functionality has been replaced by `PhysicalExprAdapterFactory`, which rewrites expressions at planning time rather than transforming batches at runtime.
+If you were using a custom `SchemaAdapterFactory` for schema adaptation (e.g., default column values, type coercion), you should now implement `PhysicalExprAdapterFactory` instead.
+See the [default column values example](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs) for how to implement a custom `PhysicalExprAdapterFactory`.
+
+**Migration guide:**
+
+If you implemented a custom `SchemaAdapterFactory`, migrate to `PhysicalExprAdapterFactory`.
+See the [default column values example](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs) for a complete implementation.
+
+## DataFusion `51.0.0`
 
 ### `arrow` / `parquet` updated to 57.0.0
 
@@ -150,7 +703,7 @@ let projection_exprs = config.projection_exprs;
 The `FileScanConfigBuilder::with_projection()` method has been deprecated in favor of `with_projection_indices()`:
 
 ```diff
-let config = FileScanConfigBuilder::new(url, schema, file_source)
+let config = FileScanConfigBuilder::new(url, file_source)
 -   .with_projection(Some(vec![0, 2, 3]))
 +   .with_projection_indices(Some(vec![0, 2, 3]))
     .build();
@@ -190,6 +743,91 @@ TIMEZONE = '+00:00';
 This change was made to better support using the default timezone in scalar UDF functions such as
 `now`, `current_date`, `current_time`, and `to_timestamp` among others.
 
+### Refactoring of `FileSource` constructors and `FileScanConfigBuilder` to accept schemas upfront
+
+The way schemas are passed to file sources and scan configurations has been significantly refactored. File sources now require the schema (including partition columns) to be provided at construction time, and `FileScanConfigBuilder` no longer takes a separate schema parameter.
+
+**Who is affected:**
+
+- Users who create `FileScanConfig` or file sources (`ParquetSource`, `CsvSource`, `JsonSource`, `AvroSource`) directly
+- Users who implement custom `FileFormat` implementations
+
+**Key changes:**
+
+1. **FileSource constructors now require TableSchema**: All built-in file sources now take the schema in their constructor:
+
+   ```diff
+   - let source = ParquetSource::default();
+   + let source = ParquetSource::new(table_schema);
+   ```
+
+2. **FileScanConfigBuilder no longer takes schema as a parameter**: The schema is now passed via the FileSource:
+
+   ```diff
+   - FileScanConfigBuilder::new(url, schema, source)
+   + FileScanConfigBuilder::new(url, source)
+   ```
+
+3. **Partition columns are now part of TableSchema**: The `with_table_partition_cols()` method has been removed from `FileScanConfigBuilder`. Partition columns are now passed as part of the `TableSchema` to the FileSource constructor:
+
+   ```diff
+   + let table_schema = TableSchema::new(
+   +     file_schema,
+   +     vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+   + );
+   + let source = ParquetSource::new(table_schema);
+     let config = FileScanConfigBuilder::new(url, source)
+   -     .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
+         .with_file(partitioned_file)
+         .build();
+   ```
+
+4. **FileFormat::file_source() now takes TableSchema parameter**: Custom `FileFormat` implementations must be updated:
+   ```diff
+   impl FileFormat for MyFileFormat {
+   -   fn file_source(&self) -> Arc<dyn FileSource> {
+   +   fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+   -       Arc::new(MyFileSource::default())
+   +       Arc::new(MyFileSource::new(table_schema))
+       }
+   }
+   ```
+
+**Migration examples:**
+
+For Parquet files:
+
+```diff
+- let source = Arc::new(ParquetSource::default());
+- let config = FileScanConfigBuilder::new(url, schema, source)
++ let table_schema = TableSchema::new(schema, vec![]);
++ let source = Arc::new(ParquetSource::new(table_schema));
++ let config = FileScanConfigBuilder::new(url, source)
+      .with_file(partitioned_file)
+      .build();
+```
+
+For CSV files with partition columns:
+
+```diff
+- let source = Arc::new(CsvSource::new(true, b',', b'"'));
+- let config = FileScanConfigBuilder::new(url, file_schema, source)
+-     .with_table_partition_cols(vec![Field::new("year", DataType::Int32, false)])
++ let options = CsvOptions {
++     has_header: Some(true),
++     delimiter: b',',
++     quote: b'"',
++     ..Default::default()
++ };
++ let table_schema = TableSchema::new(
++     file_schema,
++     vec![Arc::new(Field::new("year", DataType::Int32, false))],
++ );
++ let source = Arc::new(CsvSource::new(table_schema).with_csv_options(options));
++ let config = FileScanConfigBuilder::new(url, source)
+      .build();
+```
+
 ### Introduction of `TableSchema` and changes to `FileSource::with_schema()` method
 
 A new `TableSchema` struct has been introduced in the `datafusion-datasource` crate to better manage table schemas with partition columns. This struct helps distinguish between:
@@ -751,7 +1389,7 @@ By default if you do not use a custom `SchemaAdapterFactory` we will use express
 If you do set a custom `SchemaAdapterFactory` we will continue to use it but emit a warning about that code path being deprecated.
 
 To resolve this you need to implement a custom `PhysicalExprAdapterFactory` and use that instead of a `SchemaAdapterFactory`.
-See the [default values](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs) for an example of how to do this.
+See the [default values](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs) for an example of how to do this.
 Opting into the new APIs will set you up for future changes since we plan to expand use of `PhysicalExprAdapterFactory` to other areas of DataFusion.
 
 See [#16800] for details.
@@ -1137,7 +1775,7 @@ Pattern in DataFusion `47.0.0`:
 
 ```rust
 # /* comment to avoid running
-let config = FileScanConfigBuilder::new(url, schema, Arc::new(file_source))
+let config = FileScanConfigBuilder::new(url, Arc::new(file_source))
   .with_statistics(stats)
   ...
   .build();
diff --git a/docs/source/library-user-guide/using-the-dataframe-api.md b/docs/source/library-user-guide/using-the-dataframe-api.md
index 7f3e28c255c6e..024eff5d20834 100644
--- a/docs/source/library-user-guide/using-the-dataframe-api.md
+++ b/docs/source/library-user-guide/using-the-dataframe-api.md
@@ -198,7 +198,7 @@ async fn main() -> Result<()> {
 }
 ```
 
-[`custom_file_format.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_file_format.rs
+[`custom_file_format.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/custom_file_format.rs
 
 The output file will look like (Example Output):
 
diff --git a/docs/source/library-user-guide/working-with-exprs.md b/docs/source/library-user-guide/working-with-exprs.md
index bdcaaeae0a6e2..472ab2481360e 100644
--- a/docs/source/library-user-guide/working-with-exprs.md
+++ b/docs/source/library-user-guide/working-with-exprs.md
@@ -71,7 +71,7 @@ From DFSchema to Schema: Since the `Into` trait has been implemented for DFSchem
 
 ## Creating and Evaluating `Expr`s
 
-Please see [expr_api.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs) for well commented code for creating, evaluating, simplifying, and analyzing `Expr`s.
+Please see [expr_api.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs) for well commented code for creating, evaluating, simplifying, and analyzing `Expr`s.
 
 ## A Scalar UDF Example
 
@@ -123,9 +123,9 @@ If you'd like to learn more about `Expr`s, before we get into the details of cre
 
 There are several examples of rewriting and working with `Expr`s:
 
-- [expr_api.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs)
-- [analyzer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/analyzer_rule.rs)
-- [optimizer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/optimizer_rule.rs)
+- [expr_api.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs)
+- [analyzer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/analyzer_rule.rs)
+- [optimizer_rule.rs](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/optimizer_rule.rs)
 
 Rewriting Expressions is the process of taking an `Expr` and transforming it into another `Expr`. This is useful for a number of reasons, including:
 
diff --git a/docs/source/user-guide/cli/functions.md b/docs/source/user-guide/cli/functions.md
index 305b53c16f65e..f3b0163534c41 100644
--- a/docs/source/user-guide/cli/functions.md
+++ b/docs/source/user-guide/cli/functions.md
@@ -138,5 +138,37 @@ The columns of the returned table are:
 | hits                | UInt64    | Number of times the cached metadata has been accessed                                     |
 | extra               | Utf8      | Extra information about the cached metadata (e.g., if page index information is included) |
 
+## `statistics_cache`
+
+Similarly to the `metadata_cache`, the `statistics_cache` function can be used to show information
+about the File Statistics Cache that is used by the [`ListingTable`] implementation in DataFusion.
+For the statistics to be collected, the config `datafusion.execution.collect_statistics` must be
+enabled.
+
+You can inspect the statistics cache by querying the `statistics_cache` function. For example:
+
+```sql
+> select * from statistics_cache();
++------------------+---------------------+-----------------+------------------------+---------+-----------------+-------------+--------------------+-----------------------+
+| path             | file_modified       | file_size_bytes | e_tag                  | version | num_rows        | num_columns | table_size_bytes   | statistics_size_bytes |
++------------------+---------------------+-----------------+------------------------+---------+-----------------+-------------+--------------------+-----------------------+
+| .../hits.parquet | 2022-06-25T22:22:22 | 14779976446     | 0-5e24d1ee16380-370f48 | NULL    | Exact(99997497) | 105         | Exact(36445943240) | 0                     |
++------------------+---------------------+-----------------+------------------------+---------+-----------------+-------------+--------------------+-----------------------+
+```
+
+The columns of the returned table are:
+
+| column_name           | data_type | Description                                                                  |
+| --------------------- | --------- | ---------------------------------------------------------------------------- |
+| path                  | Utf8      | File path relative to the object store / filesystem root                     |
+| file_modified         | Timestamp | Last modified time of the file                                               |
+| file_size_bytes       | UInt64    | Size of the file in bytes                                                    |
+| e_tag                 | Utf8      | [Entity Tag] (ETag) of the file if available                                 |
+| version               | Utf8      | Version of the file if available (for object stores that support versioning) |
+| num_rows              | Utf8      | Number of rows in the table                                                  |
+| num_columns           | UInt64    | Number of columns in the table                                               |
+| table_size_bytes      | Utf8      | Size of the table, in bytes                                                  |
+| statistics_size_bytes | UInt64    | Size of the cached statistics in memory                                      |
+
 [`listingtable`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingTable.html
 [entity tag]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 9f2a3c6085083..c9222afe8ceb5 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -63,124 +63,132 @@ SET datafusion.execution.target_partitions = '1';
 
 The following configuration settings are available:
 
-| key                                                                     | default                   | description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| ----------------------------------------------------------------------- | ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| datafusion.catalog.create_default_catalog_and_schema                    | true                      | Whether the default catalog and schema should be created automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.catalog.default_catalog                                      | datafusion                | The default catalog name - this impacts what SQL queries use if not specified                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.catalog.default_schema                                       | public                    | The default schema name - this impacts what SQL queries use if not specified                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.catalog.information_schema                                   | false                     | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.catalog.location                                             | NULL                      | Location scanned to load tables for `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.catalog.format                                               | NULL                      | Type of `TableProvider` to use when loading `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.catalog.has_header                                           | true                      | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.catalog.newlines_in_values                                   | false                     | Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.batch_size                                         | 8192                      | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.coalesce_batches                                   | true                      | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.collect_statistics                                 | true                      | Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.target_partitions                                  | 0                         | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.time_zone                                          | NULL                      | The default time zone Some functions, e.g. `now` return timestamps in this time zone                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.enable_page_index                          | true                      | (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.pruning                                    | true                      | (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.skip_metadata                              | true                      | (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.metadata_size_hint                         | 524288                    | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer Default setting to 512 KiB, which should be sufficient for most parquet files, it can reduce one I/O operation per parquet file. If the metadata is larger than the hint, two reads will still be performed.                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.pushdown_filters                           | false                     | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.execution.parquet.schema_force_view_types                    | true                      | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.binary_as_string                           | false                     | (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.coerce_int96                               | NULL                      | (reading) If true, parquet reader will read columns of physical type int96 as originating from a different resolution than nanosecond. This is useful for reading data from systems like Spark which stores microsecond resolution timestamps in an int96 allowing it to write values with a larger date range than 64-bit timestamps with nanosecond resolution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.bloom_filter_on_read                       | true                      | (reading) Use any available bloom filters when reading parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.max_predicate_cache_size                   | NULL                      | (reading) The maximum predicate cache size, in bytes. When `pushdown_filters` is enabled, sets the maximum memory used to cache the results of predicate evaluation between filter evaluation and output generation. Decreasing this value will reduce memory usage, but may increase IO and CPU usage. None means use the default parquet reader setting. 0 means no caching.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.execution.parquet.writer_version                             | 1.0                       | (writing) Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.execution.parquet.skip_arrow_metadata                        | false                     | (writing) Skip encoding the embedded arrow metadata in the KV_meta This is analogous to the `ArrowWriterOptions::with_skip_arrow_metadata`. Refer to <https://docs.rs/parquet/53.3.0/parquet/arrow/arrow_writer/struct.ArrowWriterOptions.html#method.with_skip_arrow_metadata>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.dictionary_enabled                         | true                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.created_by                                 | datafusion version 50.3.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.bloom_filter_on_write                      | false                     | (writing) Write bloom filters for all columns when creating parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.bloom_filter_fpp                           | NULL                      | (writing) Sets bloom filter false positive probability. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.bloom_filter_ndv                           | NULL                      | (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.parquet.allow_single_file_parallelism              | true                      | (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.skip_physical_aggregate_schema_check               | false                     | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.spill_compression                                  | uncompressed              | Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed.                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.sort_in_place_threshold_bytes                      | 1048576                   | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.execution.max_spill_file_size_bytes                          | 134217728                 | Maximum size in bytes for individual spill files before rotating to a new file. When operators spill data to disk (e.g., RepartitionExec), they write multiple batches to the same file until this size limit is reached, then rotate to a new file. This reduces syscall overhead compared to one-file-per-batch while preventing files from growing too large. A larger value reduces file creation overhead but may hold more disk space. A smaller value creates more files but allows finer-grained space reclamation as files can be deleted once fully consumed. Now only `RepartitionExec` supports this spill file rotation feature, other spilling operators may create spill files larger than the limit. Default: 128 MB                                                                                                                                                                                    |
-| datafusion.execution.meta_fetch_concurrency                             | 32                        | Number of files to read in parallel when inferring schema and statistics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.execution.minimum_parallel_output_files                      | 4                         | Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.execution.soft_max_rows_per_output_file                      | 50000000                  | Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| datafusion.execution.max_buffered_batches_per_output_file               | 2                         | This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.listing_table_ignore_subdirectory                  | true                      | Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.listing_table_factory_infer_partitions             | true                      | Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.execution.enable_recursive_ctes                              | true                      | Should DataFusion support recursive CTEs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.execution.split_file_groups_by_statistics                    | false                     | Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.skip_partial_aggregation_probe_ratio_threshold     | 0.8                       | Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.use_row_number_estimates_to_optimize_partitioning  | false                     | Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.execution.enforce_batch_size_in_joins                        | false                     | Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.objectstore_writer_buffer_size                     | 10485760                  | Size (bytes) of data buffer DataFusion uses when writing output files. This affects the size of the data chunks that are uploaded to remote object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being written, it may be necessary to increase this size to avoid errors from the remote end point.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.optimizer.enable_window_limits                               | true                      | When set to true, the optimizer will attempt to push limit operations past window functions, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_topk_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_join_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.                                                                                                                       |
-| datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.optimizer.repartition_joins                                  | true                      | Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| datafusion.optimizer.allow_symmetric_joins_without_pruning              | true                      | Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.repartition_file_scans                             | true                      | When set to `true`, datasource partitions will be repartitioned to achieve maximum parallelism. This applies to both in-memory partitions and FileSource's file groups (1 group is 1 partition). For FileSources, only Parquet and CSV formats are currently supported. If set to `true` for a FileSource, all files will be repartitioned evenly (i.e., a single large file might be partitioned into smaller chunks) for parallel scanning. If set to `false` for a FileSource, different files will be read in parallel, but repartitioning won't happen within a single file. If set to `true` for an in-memory source, all memtable's partitions will have their batches repartitioned evenly to the desired number of `target_partitions`. Repartitioning can change the total number of partitions and batches per partition, but does not slice the initial record tables provided to the MemTable on creation. |
-| datafusion.optimizer.repartition_windows                                | true                      | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.optimizer.repartition_sorts                                  | true                      | Should DataFusion execute sorts in a per-partition fashion and merge afterwards instead of coalescing first and sorting globally. With this flag is enabled, plans in the form below `text "SortExec: [a@0 ASC]", " CoalescePartitionsExec", " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ` would turn into the plan below which performs better in multithreaded environments `text "SortPreservingMergeExec: [a@0 ASC]", " SortExec: [a@0 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", `                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.optimizer.prefer_existing_sort                               | false                     | When true, DataFusion will opportunistically remove sorts when the data is already sorted, (i.e. setting `preserve_order` to true on `RepartitionExec` and using `SortPreservingMergeExec`) When false, DataFusion will maximize plan parallelism using `RepartitionExec` even if this requires subsequently resorting data using a `SortExec`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.optimizer.skip_failed_rules                                  | false                     | When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.optimizer.max_passes                                         | 3                         | Number of times that the optimizer will attempt to optimize the plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.optimizer.top_down_join_key_reordering                       | true                      | When set to true, the physical plan optimizer will run a top down process to reorder the join keys                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| datafusion.optimizer.prefer_hash_join                                   | true                      | When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.optimizer.enable_piecewise_merge_join                        | false                     | When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.optimizer.hash_join_single_partition_threshold               | 1048576                   | The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.optimizer.hash_join_single_partition_threshold_rows          | 131072                    | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.optimizer.default_filter_selectivity                         | 20                        | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.optimizer.prefer_existing_union                              | false                     | When set to true, the optimizer will not attempt to convert Union to Interleave                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.optimizer.expand_views_at_output                             | false                     | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.explain.logical_plan_only                                    | false                     | When set to true, the explain statement will only print logical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.explain.physical_plan_only                                   | false                     | When set to true, the explain statement will only print physical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.explain.show_statistics                                      | false                     | When set to true, the explain statement will print operator statistics for physical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.explain.show_sizes                                           | true                      | When set to true, the explain statement will print the partition sizes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.explain.format                                               | indent                    | Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.explain.tree_maximum_render_width                            | 240                       | (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.explain.analyze_level                                        | dev                       | Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.sql_parser.enable_options_value_normalization                | false                     | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.sql_parser.dialect                                           | generic                   | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.sql_parser.support_varchar_with_length                       | true                      | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.sql_parser.map_string_types_to_utf8view                      | true                      | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.sql_parser.collect_spans                                     | false                     | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.sql_parser.recursion_limit                                   | 50                        | Specifies the recursion depth limit when parsing complex SQL Queries                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.sql_parser.default_null_ordering                             | nulls_max                 | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.format.safe                                                  | true                      | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.format.null                                                  |                           | Format string for nulls                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.format.date_format                                           | %Y-%m-%d                  | Date format for date arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.format.datetime_format                                       | %Y-%m-%dT%H:%M:%S%.f      | Format for DateTime arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.format.timestamp_format                                      | %Y-%m-%dT%H:%M:%S%.f      | Timestamp format for timestamp arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.format.timestamp_tz_format                                   | NULL                      | Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.format.time_format                                           | %H:%M:%S%.f               | Time format for time arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.format.duration_format                                       | pretty                    | Duration format. Can be either `"pretty"` or `"ISO8601"`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.format.types_info                                            | false                     | Show types in visual representation batches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| key                                                                     | default                   | description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| ----------------------------------------------------------------------- | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| datafusion.catalog.create_default_catalog_and_schema                    | true                      | Whether the default catalog and schema should be created automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| datafusion.catalog.default_catalog                                      | datafusion                | The default catalog name - this impacts what SQL queries use if not specified                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.catalog.default_schema                                       | public                    | The default schema name - this impacts what SQL queries use if not specified                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.catalog.information_schema                                   | false                     | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.catalog.location                                             | NULL                      | Location scanned to load tables for `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.catalog.format                                               | NULL                      | Type of `TableProvider` to use when loading `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.catalog.has_header                                           | true                      | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.catalog.newlines_in_values                                   | false                     | Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.batch_size                                         | 8192                      | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.coalesce_batches                                   | true                      | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.execution.collect_statistics                                 | true                      | Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.execution.target_partitions                                  | 0                         | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.time_zone                                          | NULL                      | The default time zone Some functions, e.g. `now` return timestamps in this time zone                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.enable_page_index                          | true                      | (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.pruning                                    | true                      | (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.skip_metadata                              | true                      | (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.parquet.metadata_size_hint                         | 524288                    | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer Default setting to 512 KiB, which should be sufficient for most parquet files, it can reduce one I/O operation per parquet file. If the metadata is larger than the hint, two reads will still be performed.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.pushdown_filters                           | false                     | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.execution.parquet.force_filter_selections                    | false                     | (reading) Force the use of RowSelections for filter results, when pushdown_filters is enabled. If false, the reader will automatically choose between a RowSelection and a Bitmap based on the number and pattern of selected rows.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.schema_force_view_types                    | true                      | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.binary_as_string                           | false                     | (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.coerce_int96                               | NULL                      | (reading) If true, parquet reader will read columns of physical type int96 as originating from a different resolution than nanosecond. This is useful for reading data from systems like Spark which stores microsecond resolution timestamps in an int96 allowing it to write values with a larger date range than 64-bit timestamps with nanosecond resolution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.parquet.bloom_filter_on_read                       | true                      | (reading) Use any available bloom filters when reading parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.max_predicate_cache_size                   | NULL                      | (reading) The maximum predicate cache size, in bytes. When `pushdown_filters` is enabled, sets the maximum memory used to cache the results of predicate evaluation between filter evaluation and output generation. Decreasing this value will reduce memory usage, but may increase IO and CPU usage. None means use the default parquet reader setting. 0 means no caching.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.writer_version                             | 1.0                       | (writing) Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.execution.parquet.skip_arrow_metadata                        | false                     | (writing) Skip encoding the embedded arrow metadata in the KV_meta This is analogous to the `ArrowWriterOptions::with_skip_arrow_metadata`. Refer to <https://docs.rs/parquet/53.3.0/parquet/arrow/arrow_writer/struct.ArrowWriterOptions.html#method.with_skip_arrow_metadata>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.parquet.dictionary_enabled                         | true                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.execution.parquet.created_by                                 | datafusion version 51.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.bloom_filter_on_write                      | false                     | (writing) Write bloom filters for all columns when creating parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.bloom_filter_fpp                           | NULL                      | (writing) Sets bloom filter false positive probability. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.bloom_filter_ndv                           | NULL                      | (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.allow_single_file_parallelism              | true                      | (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.skip_physical_aggregate_schema_check               | false                     | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.spill_compression                                  | uncompressed              | Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.sort_in_place_threshold_bytes                      | 1048576                   | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.execution.max_spill_file_size_bytes                          | 134217728                 | Maximum size in bytes for individual spill files before rotating to a new file. When operators spill data to disk (e.g., RepartitionExec), they write multiple batches to the same file until this size limit is reached, then rotate to a new file. This reduces syscall overhead compared to one-file-per-batch while preventing files from growing too large. A larger value reduces file creation overhead but may hold more disk space. A smaller value creates more files but allows finer-grained space reclamation as files can be deleted once fully consumed. Now only `RepartitionExec` supports this spill file rotation feature, other spilling operators may create spill files larger than the limit. Default: 128 MB                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.meta_fetch_concurrency                             | 32                        | Number of files to read in parallel when inferring schema and statistics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.minimum_parallel_output_files                      | 4                         | Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.execution.soft_max_rows_per_output_file                      | 50000000                  | Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.execution.max_buffered_batches_per_output_file               | 2                         | This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.execution.listing_table_ignore_subdirectory                  | true                      | Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.execution.listing_table_factory_infer_partitions             | true                      | Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.enable_recursive_ctes                              | true                      | Should DataFusion support recursive CTEs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.split_file_groups_by_statistics                    | false                     | Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.skip_partial_aggregation_probe_ratio_threshold     | 0.8                       | Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.use_row_number_estimates_to_optimize_partitioning  | false                     | Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.execution.enforce_batch_size_in_joins                        | false                     | Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.objectstore_writer_buffer_size                     | 10485760                  | Size (bytes) of data buffer DataFusion uses when writing output files. This affects the size of the data chunks that are uploaded to remote object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being written, it may be necessary to increase this size to avoid errors from the remote end point.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.enable_ansi_mode                                   | false                     | Whether to enable ANSI SQL mode. The flag is experimental and relevant only for DataFusion Spark built-in functions When `enable_ansi_mode` is set to `true`, the query engine follows ANSI SQL semantics for expressions, casting, and error handling. This means: - **Strict type coercion rules:** implicit casts between incompatible types are disallowed. - **Standard SQL arithmetic behavior:** operations such as division by zero, numeric overflow, or invalid casts raise runtime errors rather than returning `NULL` or adjusted values. - **Consistent ANSI behavior** for string concatenation, comparisons, and `NULL` handling. When `enable_ansi_mode` is `false` (the default), the engine uses a more permissive, non-ANSI mode designed for user convenience and backward compatibility. In this mode: - Implicit casts between types are allowed (e.g., string to integer when possible). - Arithmetic operations are more lenient — for example, `abs()` on the minimum representable integer value returns the input value instead of raising overflow. - Division by zero or invalid casts may return `NULL` instead of failing. # Default `false` — ANSI SQL mode is disabled by default.                          |
+| datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.optimizer.enable_window_limits                               | true                      | When set to true, the optimizer will attempt to push limit operations past window functions, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.enable_topk_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.enable_join_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown           | true                      | When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.optimizer.repartition_joins                                  | true                      | Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.optimizer.allow_symmetric_joins_without_pruning              | true                      | Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.repartition_file_scans                             | true                      | When set to `true`, datasource partitions will be repartitioned to achieve maximum parallelism. This applies to both in-memory partitions and FileSource's file groups (1 group is 1 partition). For FileSources, only Parquet and CSV formats are currently supported. If set to `true` for a FileSource, all files will be repartitioned evenly (i.e., a single large file might be partitioned into smaller chunks) for parallel scanning. If set to `false` for a FileSource, different files will be read in parallel, but repartitioning won't happen within a single file. If set to `true` for an in-memory source, all memtable's partitions will have their batches repartitioned evenly to the desired number of `target_partitions`. Repartitioning can change the total number of partitions and batches per partition, but does not slice the initial record tables provided to the MemTable on creation.                                                                                                                                                                                                                                                                                                                      |
+| datafusion.optimizer.preserve_file_partitions                           | 0                         | Minimum number of distinct partition values required to group files by their Hive partition column values (enabling Hash partitioning declaration). How the option is used: - preserve_file_partitions=0: Disable it. - preserve_file_partitions=1: Always enable it. - preserve_file_partitions=N, actual file partitions=M: Only enable when M >= N. This threshold preserves I/O parallelism when file partitioning is below it. Note: This may reduce parallelism, rooting from the I/O level, if the number of distinct partitions is less than the target_partitions.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.optimizer.repartition_windows                                | true                      | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| datafusion.optimizer.repartition_sorts                                  | true                      | Should DataFusion execute sorts in a per-partition fashion and merge afterwards instead of coalescing first and sorting globally. With this flag is enabled, plans in the form below `text "SortExec: [a@0 ASC]", " CoalescePartitionsExec", " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ` would turn into the plan below which performs better in multithreaded environments `text "SortPreservingMergeExec: [a@0 ASC]", " SortExec: [a@0 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", `                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.optimizer.subset_repartition_threshold                       | 4                         | Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): `text Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a]) If current partitions (3) < threshold (4), repartition: AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)] RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3 AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3) If current partitions (8) >= threshold (4), use subset satisfaction: AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ` |
+| datafusion.optimizer.prefer_existing_sort                               | false                     | When true, DataFusion will opportunistically remove sorts when the data is already sorted, (i.e. setting `preserve_order` to true on `RepartitionExec` and using `SortPreservingMergeExec`) When false, DataFusion will maximize plan parallelism using `RepartitionExec` even if this requires subsequently resorting data using a `SortExec`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.optimizer.skip_failed_rules                                  | false                     | When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.optimizer.max_passes                                         | 3                         | Number of times that the optimizer will attempt to optimize the plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.optimizer.top_down_join_key_reordering                       | true                      | When set to true, the physical plan optimizer will run a top down process to reorder the join keys                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| datafusion.optimizer.prefer_hash_join                                   | true                      | When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.optimizer.enable_piecewise_merge_join                        | false                     | When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.optimizer.hash_join_single_partition_threshold               | 1048576                   | The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.optimizer.hash_join_single_partition_threshold_rows          | 131072                    | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.hash_join_inlist_pushdown_max_size                 | 131072                    | Maximum size in bytes for the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides larger than this will use hash table lookups instead. Set to 0 to always use hash table lookups. InList pushdown can be more efficient for small build sides because it can result in better statistics pruning as well as use any bloom filters present on the scan side. InList expressions are also more transparent and easier to serialize over the network in distributed uses of DataFusion. On the other hand InList pushdown requires making a copy of the data and thus adds some overhead to the build side and uses more memory. This setting is per-partition, so we may end up using `hash_join_inlist_pushdown_max_size` \* `target_partitions` memory. The default is 128kB per partition. This should allow point lookup joins (e.g. joining on a unique primary key) to use InList pushdown in most cases but avoids excessive memory usage or overhead for larger joins.                                                                                                                                                                                                             |
+| datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values      | 150                       | Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: <https://trino.io/docs/current/admin/dynamic-filtering.html#dynamic-filter-collection-thresholds>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.optimizer.default_filter_selectivity                         | 20                        | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.optimizer.prefer_existing_union                              | false                     | When set to true, the optimizer will not attempt to convert Union to Interleave                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.optimizer.expand_views_at_output                             | false                     | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.optimizer.enable_sort_pushdown                               | true                      | Enable sort pushdown optimization. When enabled, attempts to push sort requirements down to data sources that can natively handle them (e.g., by reversing file/row group read order). Returns **inexact ordering**: Sort operator is kept for correctness, but optimized input enables early termination for TopK queries (ORDER BY ... LIMIT N), providing significant speedup. Memory: No additional overhead (only changes read order). Future: Will add option to detect perfectly sorted data and eliminate Sort completely. Default: true                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.explain.logical_plan_only                                    | false                     | When set to true, the explain statement will only print logical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.explain.physical_plan_only                                   | false                     | When set to true, the explain statement will only print physical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.explain.show_statistics                                      | false                     | When set to true, the explain statement will print operator statistics for physical plans                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.explain.show_sizes                                           | true                      | When set to true, the explain statement will print the partition sizes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.explain.format                                               | indent                    | Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.explain.tree_maximum_render_width                            | 240                       | (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.explain.analyze_level                                        | dev                       | Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.sql_parser.enable_options_value_normalization                | false                     | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.sql_parser.dialect                                           | generic                   | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.sql_parser.support_varchar_with_length                       | true                      | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.sql_parser.map_string_types_to_utf8view                      | true                      | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.sql_parser.collect_spans                                     | false                     | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.sql_parser.recursion_limit                                   | 50                        | Specifies the recursion depth limit when parsing complex SQL Queries                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.sql_parser.default_null_ordering                             | nulls_max                 | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.format.safe                                                  | true                      | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.format.null                                                  |                           | Format string for nulls                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| datafusion.format.date_format                                           | %Y-%m-%d                  | Date format for date arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.format.datetime_format                                       | %Y-%m-%dT%H:%M:%S%.f      | Format for DateTime arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.format.timestamp_format                                      | %Y-%m-%dT%H:%M:%S%.f      | Timestamp format for timestamp arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| datafusion.format.timestamp_tz_format                                   | NULL                      | Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.format.time_format                                           | %H:%M:%S%.f               | Time format for time arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.format.duration_format                                       | pretty                    | Duration format. Can be either `"pretty"` or `"ISO8601"`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.format.types_info                                            | false                     | Show types in visual representation batches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 
 # Runtime Configuration Settings
 
@@ -196,6 +204,8 @@ The following runtime configuration settings are available:
 
 | key                                        | default | description                                                                                                                                                               |
 | ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| datafusion.runtime.list_files_cache_limit  | 1M      | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.                             |
+| datafusion.runtime.list_files_cache_ttl    | NULL    | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes.                                       |
 | datafusion.runtime.max_temp_directory_size | 100G    | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.                                  |
 | datafusion.runtime.memory_limit            | NULL    | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.                               |
 | datafusion.runtime.metadata_cache_limit    | 50M     | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. |
diff --git a/docs/source/user-guide/crate-configuration.md b/docs/source/user-guide/crate-configuration.md
index eecf7f5bde6e1..83a46b50c004f 100644
--- a/docs/source/user-guide/crate-configuration.md
+++ b/docs/source/user-guide/crate-configuration.md
@@ -92,6 +92,36 @@ lto = true
 codegen-units = 1
 ```
 
+### Profile Guided Optimization (PGO)
+
+Profile Guided Optimization can improve DataFusion performance by up to 25%. It works by compiling with instrumentation, running representative workloads to collect profile data, then recompiling with optimizations based on that data.
+
+Build with instrumentation:
+
+```shell
+RUSTFLAGS="-C profile-generate=/tmp/pgo-data" cargo build --release
+```
+
+Run your workloads to collect profile data. Use benchmarks like TPCH or Clickbench, or your actual production queries:
+
+```shell
+./target/release/your-datafusion-app --benchmark
+```
+
+Rebuild using the collected profile:
+
+```shell
+RUSTFLAGS="-C profile-use=/tmp/pgo-data" cargo build --release
+```
+
+Tips:
+
+- Use workloads that match your production patterns
+- Run multiple iterations during profiling for better coverage
+- Combine with LTO and CPU-specific optimizations for best results
+
+See the [Rust compiler guide](https://rustc-dev-guide.rust-lang.org/building/optimized-build.html#profile-guided-optimization) for more details. Discussion and results in [issue #9507](https://github.com/apache/datafusion/issues/9507).
+
 ### Alternate Allocator: `snmalloc`
 
 You can also use [snmalloc-rs](https://crates.io/crates/snmalloc-rs) crate as
diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md
index 56e4369a9b8b5..56d78ac473f14 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -179,8 +179,8 @@ select log(-1), log(0), sqrt(-1);
 | ascii(character)                               | Returns a numeric representation of the character (`character`). Example: `ascii('a') -> 97`                                                                                                                                             |
 | bit_length(text)                               | Returns the length of the string (`text`) in bits. Example: `bit_length('spider') -> 48`                                                                                                                                                 |
 | btrim(text, characters)                        | Removes all specified characters (`characters`) from both the beginning and the end of the string (`text`). Example: `btrim('aabchelloccb', 'abc') -> hello`                                                                             |
-| char_length(text)                              | Returns number of characters in the string (`text`). The same as `character_length` and `length`. Example: `character_length('lion') -> 4`                                                                                               |
-| character_length(text)                         | Returns number of characters in the string (`text`). The same as `char_length` and `length`. Example: `char_length('lion') -> 4`                                                                                                         |
+| char_length(text)                              | Returns number of characters in the string (`text`). The same as `character_length` and `length`. Example: `char_length('lion') -> 4`                                                                                                    |
+| character_length(text)                         | Returns number of characters in the string (`text`). The same as `char_length` and `length`. Example: `character_length('lion') -> 4`                                                                                                    |
 | concat(value1, [value2 [, ...]])               | Concatenates the text representations (`value1, [value2 [, ...]]`) of all the arguments. NULL arguments are ignored. Example: `concat('aaa', 'bbc', NULL, 321) -> aaabbc321`                                                             |
 | concat_ws(separator, value1, [value2 [, ...]]) | Concatenates the text representations (`value1, [value2 [, ...]]`) of all the arguments with the separator (`separator`). NULL arguments are ignored. `concat_ws('/', 'path', 'to', NULL, 'my', 'folder', 123) -> path/to/my/folder/123` |
 | chr(integer)                                   | Returns a character by its numeric representation (`integer`). Example: `chr(90) -> 8`                                                                                                                                                   |
@@ -313,7 +313,7 @@ select log(-1), log(0), sqrt(-1);
 
 You can also use the `ExprFunctionExt` trait to more easily build Aggregate arguments `Expr`.
 
-See `datafusion-examples/examples/expr_api.rs` for example usage.
+See `datafusion-examples/examples/query_planning/expr_api.rs` for example usage.
 
 | Syntax                                                                  | Equivalent to                       |
 | ----------------------------------------------------------------------- | ----------------------------------- |
diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md
index 778562d55ffcb..66076e6b73ffc 100644
--- a/docs/source/user-guide/introduction.md
+++ b/docs/source/user-guide/introduction.md
@@ -82,6 +82,7 @@ Here are some example systems built using DataFusion:
 - Streaming data platforms such as [Synnada]
 - Tools for reading / sorting / transcoding Parquet, CSV, AVRO, and JSON files such as [qv]
 - Native Spark runtime replacement such as [Auron]
+- Distributed data cache to boost GPU utilization of AI workloads with [Kubeflow Trainer](https://www.kubeflow.org/docs/components/trainer/user-guides/data-cache/)
 
 By using DataFusion, projects are freed to focus on their specific
 features, and avoid reimplementing general (but still necessary)
@@ -114,6 +115,8 @@ Here are some active projects using DataFusion:
 - [Iceberg-rust](https://github.com/apache/iceberg-rust) Rust implementation of Apache Iceberg
 - [InfluxDB] Time Series Database
 - [Kamu] Planet-scale streaming data pipeline
+- [Kubeflow Trainer](https://github.com/kubeflow/trainer) Kubernetes-native project designed for
+  scalable LLMs fine-tuning and distributed AI model training.
 - [LakeSoul](https://github.com/lakesoul-io/LakeSoul) Open source LakeHouse framework with native IO in Rust.
 - [Lance](https://github.com/lancedb/lance) Modern columnar data format for ML
 - [OpenObserve] Distributed cloud native observability platform
diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md
index 1fb2f4a5c7700..7e0363f4ceb9b 100644
--- a/docs/source/user-guide/metrics.md
+++ b/docs/source/user-guide/metrics.md
@@ -32,7 +32,16 @@ DataFusion operators expose runtime metrics so you can understand where time is
 | elapsed_compute | CPU time the operator actively spends processing work.                                                                                                                                             |
 | output_rows     | Total number of rows the operator produces.                                                                                                                                                        |
 | output_bytes    | Memory usage of all output batches. Note: This value may be overestimated. If multiple output `RecordBatch` instances share underlying memory buffers, their sizes will be counted multiple times. |
+| output_batches  | Total number of output batches the operator produces.                                                                                                                                              |
 
 ## Operator-specific Metrics
 
-TODO
+### FilterExec
+
+| Metric      | Description                                                       |
+| ----------- | ----------------------------------------------------------------- |
+| selectivity | Selectivity of the filter, calculated as output_rows / input_rows |
+
+## TODO
+
+Add metrics for the remaining operators
diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md
index f17e09f2ce9d0..ba9c6ae12477b 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -48,6 +48,36 @@ FROM employees;
 
 Note: When no rows pass the filter, `COUNT` returns `0` while `SUM`/`AVG`/`MIN`/`MAX` return `NULL`.
 
+## WITHIN GROUP / Ordered-set aggregates
+
+Some aggregate functions accept the SQL `WITHIN GROUP (ORDER BY ...)` clause to specify the ordering the
+aggregate relies on. In DataFusion this is opt-in: only aggregate functions whose implementation returns
+`true` from `AggregateUDFImpl::supports_within_group_clause()` accept the `WITHIN GROUP` clause. Attempting to
+use `WITHIN GROUP` with a regular aggregate (for example, `SELECT SUM(x) WITHIN GROUP (ORDER BY x)`) will fail
+during planning with an error: "WITHIN GROUP is only supported for ordered-set aggregate functions".
+
+Currently, the built-in aggregate functions that support `WITHIN GROUP` are:
+
+- `percentile_cont` — exact percentile aggregate (also available as `percentile_cont(column, percentile)`)
+- `approx_percentile_cont` — approximate percentile using the t-digest algorithm
+- `approx_percentile_cont_with_weight` — approximate weighted percentile using the t-digest algorithm
+
+Note: rank-like functions such as `rank()`, `dense_rank()`, and `percent_rank()` are window functions and
+use the `OVER (...)` clause; they are not ordered-set aggregates that accept `WITHIN GROUP` in DataFusion.
+
+Example (ordered-set aggregate):
+
+```sql
+percentile_cont(0.5) WITHIN GROUP (ORDER BY value)
+```
+
+Example (invalid usage — planner will error):
+
+```sql
+-- This will fail: SUM is not an ordered-set aggregate
+SELECT SUM(x) WITHIN GROUP (ORDER BY x) FROM t;
+```
+
 ## General Functions
 
 - [array_agg](#array_agg)
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index bd41f691bf90b..3a5c934ae8156 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -71,7 +71,7 @@ LOCATION <literal>
 
 <ordered_column_list> := (<column_name> <sort_clause>, ...)
 
-<key_value_list> := (<literal> <literal, <literal> <literal>, ...)
+<key_value_list> := (<literal> <literal>, <literal> <literal>, ...)
 ```
 
 For a comprehensive list of format-specific options that can be specified in the `OPTIONS` clause, see [Format Options](format_options.md).
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index 1caadcc291416..23101632625b1 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -70,19 +70,10 @@ to see the high level structure of the plan
 |               | │      RepartitionExec      │ |
 |               | │    --------------------   │ |
 |               | │   input_partition_count:  │ |
-|               | │             16            │ |
-|               | │                           │ |
-|               | │    partitioning_scheme:   │ |
-|               | │      Hash([b@0], 16)      │ |
-|               | └─────────────┬─────────────┘ |
-|               | ┌─────────────┴─────────────┐ |
-|               | │      RepartitionExec      │ |
-|               | │    --------------------   │ |
-|               | │   input_partition_count:  │ |
 |               | │             1             │ |
 |               | │                           │ |
 |               | │    partitioning_scheme:   │ |
-|               | │    RoundRobinBatch(16)    │ |
+|               | │      Hash([b@0], 16)      │ |
 |               | └─────────────┬─────────────┘ |
 |               | ┌─────────────┴─────────────┐ |
 |               | │       AggregateExec       │ |
@@ -126,10 +117,9 @@ Elapsed 0.004 seconds.
 | physical_plan | ProjectionExec: expr=[sum(t.x)@1 as sum(t.x)]                                 |
 |               |   AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(t.x)]       |
 |               |     CoalesceBatchesExec: target_batch_size=8192                               |
-|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=16      |
-|               |         RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1 |
-|               |           AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]        |
-|               |             DataSourceExec: partitions=1, partition_sizes=[1]                 |
+|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=1       |
+|               |         AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]          |
+|               |           DataSourceExec: partitions=1, partition_sizes=[1]                   |
 |               |                                                                               |
 +---------------+-------------------------------------------------------------------------------+
 2 row(s) fetched.
diff --git a/docs/source/user-guide/sql/format_options.md b/docs/source/user-guide/sql/format_options.md
index e8008eafb166c..d349bc1c98c7c 100644
--- a/docs/source/user-guide/sql/format_options.md
+++ b/docs/source/user-guide/sql/format_options.md
@@ -99,25 +99,25 @@ OPTIONS('COMPRESSION' 'gzip');
 
 The following options are available when reading or writing CSV files. Note: If any unsupported option is specified, an error will be raised and the query will fail.
 
-| Option               | Description                                                                                                                       | Default Value      |
-| -------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
-| COMPRESSION          | Sets the compression that should be applied to the entire CSV file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED. | UNCOMPRESSED       |
-| HAS_HEADER           | Sets if the CSV file should include column headers. If not set, uses session or system default.                                   | None               |
-| DELIMITER            | Sets the character which should be used as the column delimiter within the CSV file.                                              | `,` (comma)        |
-| QUOTE                | Sets the character which should be used for quoting values within the CSV file.                                                   | `"` (double quote) |
-| TERMINATOR           | Sets the character which should be used as the line terminator within the CSV file.                                               | None               |
-| ESCAPE               | Sets the character which should be used for escaping special characters within the CSV file.                                      | None               |
-| DOUBLE_QUOTE         | Sets if quotes within quoted fields should be escaped by doubling them (e.g., `"aaa""bbb"`).                                      | None               |
-| NEWLINES_IN_VALUES   | Sets if newlines in quoted values are supported. If not set, uses session or system default.                                      | None               |
-| DATE_FORMAT          | Sets the format that dates should be encoded in within the CSV file.                                                              | None               |
-| DATETIME_FORMAT      | Sets the format that datetimes should be encoded in within the CSV file.                                                          | None               |
-| TIMESTAMP_FORMAT     | Sets the format that timestamps should be encoded in within the CSV file.                                                         | None               |
-| TIMESTAMP_TZ_FORMAT  | Sets the format that timestamps with timezone should be encoded in within the CSV file.                                           | None               |
-| TIME_FORMAT          | Sets the format that times should be encoded in within the CSV file.                                                              | None               |
-| NULL_VALUE           | Sets the string which should be used to indicate null values within the CSV file.                                                 | None               |
-| NULL_REGEX           | Sets the regex pattern to match null values when loading CSVs.                                                                    | None               |
-| SCHEMA_INFER_MAX_REC | Sets the maximum number of records to scan to infer the schema.                                                                   | None               |
-| COMMENT              | Sets the character which should be used to indicate comment lines in the CSV file.                                                | None               |
+| Option               | Description                                                                                                                                                      | Default Value      |
+| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
+| COMPRESSION          | Sets the compression that should be applied to the entire CSV file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED.                                | UNCOMPRESSED       |
+| HAS_HEADER           | Sets if the CSV file should include column headers. If not set, uses session or system default.                                                                  | None               |
+| DELIMITER            | Sets the character which should be used as the column delimiter within the CSV file.                                                                             | `,` (comma)        |
+| QUOTE                | Sets the character which should be used for quoting values within the CSV file.                                                                                  | `"` (double quote) |
+| TERMINATOR           | Sets the character which should be used as the line terminator within the CSV file.                                                                              | None               |
+| ESCAPE               | Sets the character which should be used for escaping special characters within the CSV file.                                                                     | None               |
+| DOUBLE_QUOTE         | Sets if quotes within quoted fields should be escaped by doubling them (e.g., `"aaa""bbb"`).                                                                     | None               |
+| NEWLINES_IN_VALUES   | Sets if newlines in quoted values are supported. If not set, uses session or system default.                                                                     | None               |
+| DATE_FORMAT          | Sets the format that dates should be encoded in within the CSV file.                                                                                             | None               |
+| DATETIME_FORMAT      | Sets the format that datetimes should be encoded in within the CSV file.                                                                                         | None               |
+| TIMESTAMP_FORMAT     | Sets the format that timestamps should be encoded in within the CSV file.                                                                                        | None               |
+| TIMESTAMP_TZ_FORMAT  | Sets the format that timestamps with timezone should be encoded in within the CSV file.                                                                          | None               |
+| TIME_FORMAT          | Sets the format that times should be encoded in within the CSV file.                                                                                             | None               |
+| NULL_VALUE           | Sets the string which should be used to indicate null values within the CSV file.                                                                                | None               |
+| NULL_REGEX           | Sets the regex pattern to match null values when loading CSVs.                                                                                                   | None               |
+| SCHEMA_INFER_MAX_REC | Sets the maximum number of records to scan to infer the schema. If set to 0, schema inference is disabled and all fields will be inferred as Utf8 (string) type. | None               |
+| COMMENT              | Sets the character which should be used to indicate comment lines in the CSV file.                                                                               | None               |
 
 **Example:**
 
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 7c88d1fd9c3eb..83fc2fe312340 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -294,7 +294,7 @@ ceil(numeric_expression)
 #### Example
 
 ```sql
-    > SELECT ceil(3.14);
+> SELECT ceil(3.14);
 +------------+
 | ceil(3.14) |
 +------------+
@@ -2125,7 +2125,7 @@ upper(str)
 
 ### `uuid`
 
-Returns [`UUID v4`](<https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)>) string value which is unique per row.
+Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_%28random%29) string value which is unique per row.
 
 ```sql
 uuid()
@@ -2294,7 +2294,7 @@ SELECT regexp_like('aBc', '(b|d)', 'i');
 +--------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 
 ### `regexp_match`
 
@@ -2333,7 +2333,7 @@ regexp_match(str, regexp[, flags])
             +---------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 
 ### `regexp_replace`
 
@@ -2374,7 +2374,7 @@ SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
 +-------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
 
 ## Time and Date Functions
 
@@ -2389,6 +2389,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 - [datetrunc](#datetrunc)
 - [from_unixtime](#from_unixtime)
 - [make_date](#make_date)
+- [make_time](#make_time)
 - [now](#now)
 - [to_char](#to_char)
 - [to_date](#to_date)
@@ -2487,6 +2488,17 @@ FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
 | 2023-01-03T03:00:00 |
 +---------------------+
 2 row(s) fetched.
+
+-- Bin the time into 15 minute intervals starting at 1 min
+>  SELECT date_bin(interval '15 minutes', time, TIME '00:01:00') as bin
+FROM VALUES (TIME '02:18:18'), (TIME '19:00:03')  t(time);
++----------+
+| bin      |
++----------+
+| 02:16:00 |
+| 18:46:00 |
++----------+
+2 row(s) fetched.
 ```
 
 ### `date_format`
@@ -2625,7 +2637,40 @@ make_date(year, month, day)
 +-----------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
+
+### `make_time`
+
+Make a time from hour/minute/second component parts.
+
+```sql
+make_time(hour, minute, second)
+```
+
+#### Arguments
+
+- **hour**: Hour to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators.
+- **minute**: Minute to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators.
+- **second**: Second to use when making the time. Can be a constant, column or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> select make_time(13, 23, 1);
++-------------------------------------------+
+| make_time(Int64(13),Int64(23),Int64(1))   |
++-------------------------------------------+
+| 13:23:01                                  |
++-------------------------------------------+
+> select make_time('23', '01', '31');
++-----------------------------------------------+
+| make_time(Utf8("23"),Utf8("01"),Utf8("31"))   |
++-----------------------------------------------+
+| 23:01:31                                      |
++-----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `now`
 
@@ -2666,7 +2711,7 @@ to_char(expression, format)
 +----------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 #### Aliases
 
@@ -2675,7 +2720,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 ### `to_date`
 
 Converts a value to a date (`YYYY-MM-DD`).
-Supports strings, integer and double types as input.
+Supports strings, numeric and timestamp types as input.
 Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
 Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
 Returns the corresponding date.
@@ -2710,7 +2755,7 @@ to_date('2017-05-31', '%Y-%m-%d')
 +---------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_local_time`
 
@@ -2775,9 +2820,19 @@ FROM (
 
 ### `to_timestamp`
 
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
+for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
+`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
+for the input outside of supported bounds.
 
-Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 
 ```sql
 to_timestamp(expression[, ..., format_n])
@@ -2786,7 +2841,11 @@ to_timestamp(expression[, ..., format_n])
 #### Arguments
 
 - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+- **format_n**:
+  Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+  Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+  parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+  only supported at the end of the string preceded by a space.
 
 #### Example
 
@@ -2805,11 +2864,18 @@ to_timestamp(expression[, ..., format_n])
 +--------------------------------------------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_timestamp_micros`
 
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 
 ```sql
 to_timestamp_micros(expression[, ..., format_n])
@@ -2818,7 +2884,11 @@ to_timestamp_micros(expression[, ..., format_n])
 #### Arguments
 
 - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+- **format_n**:
+  Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+  Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+  parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+  only supported at the end of the string preceded by a space.
 
 #### Example
 
@@ -2837,11 +2907,18 @@ to_timestamp_micros(expression[, ..., format_n])
 +---------------------------------------------------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_timestamp_millis`
 
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 
 ```sql
 to_timestamp_millis(expression[, ..., format_n])
@@ -2850,7 +2927,11 @@ to_timestamp_millis(expression[, ..., format_n])
 #### Arguments
 
 - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+- **format_n**:
+  Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+  Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+  parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+  only supported at the end of the string preceded by a space.
 
 #### Example
 
@@ -2869,11 +2950,17 @@ to_timestamp_millis(expression[, ..., format_n])
 +---------------------------------------------------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_timestamp_nanos`
 
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 
 ```sql
 to_timestamp_nanos(expression[, ..., format_n])
@@ -2882,7 +2969,11 @@ to_timestamp_nanos(expression[, ..., format_n])
 #### Arguments
 
 - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+- **format_n**:
+  Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+  Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+  parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+  only supported at the end of the string preceded by a space.
 
 #### Example
 
@@ -2901,11 +2992,18 @@ to_timestamp_nanos(expression[, ..., format_n])
 +---------------------------------------------------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_timestamp_seconds`
 
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
+integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
+if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
+session time zone, or UTC if no session time zone is set.
+Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
+
+The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
+The time zone can be a value like +00:00, 'Europe/London' etc.
 
 ```sql
 to_timestamp_seconds(expression[, ..., format_n])
@@ -2914,7 +3012,11 @@ to_timestamp_seconds(expression[, ..., format_n])
 #### Arguments
 
 - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+- **format_n**:
+  Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
+  Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
+  parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
+  only supported at the end of the string preceded by a space.
 
 #### Example
 
@@ -2933,11 +3035,11 @@ to_timestamp_seconds(expression[, ..., format_n])
 +----------------------------------------------------------------------------------------------------------------+
 ```
 
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
 
 ### `to_unixtime`
 
-Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
+Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00`). Supports strings, dates, timestamps, integer, unsigned integer, and float types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers, unsigned integers, and floats are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00`).
 
 ```sql
 to_unixtime(expression[, ..., format_n])
@@ -4767,11 +4869,11 @@ digest(expression, algorithm)
 
 ```sql
 > select digest('foo', 'sha256');
-+------------------------------------------+
-| digest(Utf8("foo"), Utf8("sha256"))      |
-+------------------------------------------+
-| <binary_hash_result>                     |
-+------------------------------------------+
++------------------------------------------------------------------+
+| digest(Utf8("foo"),Utf8("sha256"))                               |
++------------------------------------------------------------------+
+| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
++------------------------------------------------------------------+
 ```
 
 ### `md5`
@@ -4790,11 +4892,11 @@ md5(expression)
 
 ```sql
 > select md5('foo');
-+-------------------------------------+
-| md5(Utf8("foo"))                    |
-+-------------------------------------+
-| <md5_checksum_result>               |
-+-------------------------------------+
++----------------------------------+
+| md5(Utf8("foo"))                 |
++----------------------------------+
+| acbd18db4cc2f85cedef654fccc4a4d8 |
++----------------------------------+
 ```
 
 ### `sha224`
@@ -4813,11 +4915,11 @@ sha224(expression)
 
 ```sql
 > select sha224('foo');
-+------------------------------------------+
-| sha224(Utf8("foo"))                      |
-+------------------------------------------+
-| <sha224_hash_result>                     |
-+------------------------------------------+
++----------------------------------------------------------+
+| sha224(Utf8("foo"))                                      |
++----------------------------------------------------------+
+| 0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db |
++----------------------------------------------------------+
 ```
 
 ### `sha256`
@@ -4836,11 +4938,11 @@ sha256(expression)
 
 ```sql
 > select sha256('foo');
-+--------------------------------------+
-| sha256(Utf8("foo"))                  |
-+--------------------------------------+
-| <sha256_hash_result>                 |
-+--------------------------------------+
++------------------------------------------------------------------+
+| sha256(Utf8("foo"))                                              |
++------------------------------------------------------------------+
+| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
++------------------------------------------------------------------+
 ```
 
 ### `sha384`
@@ -4859,11 +4961,11 @@ sha384(expression)
 
 ```sql
 > select sha384('foo');
-+-----------------------------------------+
-| sha384(Utf8("foo"))                     |
-+-----------------------------------------+
-| <sha384_hash_result>                    |
-+-----------------------------------------+
++--------------------------------------------------------------------------------------------------+
+| sha384(Utf8("foo"))                                                                              |
++--------------------------------------------------------------------------------------------------+
+| 98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb |
++--------------------------------------------------------------------------------------------------+
 ```
 
 ### `sha512`
@@ -4882,11 +4984,11 @@ sha512(expression)
 
 ```sql
 > select sha512('foo');
-+-------------------------------------------+
-| sha512(Utf8("foo"))                       |
-+-------------------------------------------+
-| <sha512_hash_result>                      |
-+-------------------------------------------+
++----------------------------------------------------------------------------------------------------------------------------------+
+| sha512(Utf8("foo"))                                                                                                              |
++----------------------------------------------------------------------------------------------------------------------------------+
+| f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7 |
++----------------------------------------------------------------------------------------------------------------------------------+
 ```
 
 ## Union Functions
@@ -4954,6 +5056,7 @@ union_tag(union_expression)
 ## Other Functions
 
 - [arrow_cast](#arrow_cast)
+- [arrow_metadata](#arrow_metadata)
 - [arrow_typeof](#arrow_typeof)
 - [get_field](#get_field)
 - [version](#version)
@@ -4996,6 +5099,36 @@ arrow_cast(expression, datatype)
 +---------------------------+---------------------+
 ```
 
+### `arrow_metadata`
+
+Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata.
+
+```sql
+arrow_metadata(expression, [key])
+```
+
+#### Arguments
+
+- **expression**: The expression to retrieve metadata from. Can be a column or other expression.
+- **key**: Optional. The specific metadata key to retrieve.
+
+#### Example
+
+```sql
+> select arrow_metadata(col) from table;
++----------------------------+
+| arrow_metadata(table.col)  |
++----------------------------+
+| {k: v}                     |
++----------------------------+
+> select arrow_metadata(col, 'k') from table;
++-------------------------------+
+| arrow_metadata(table.col, 'k')|
++-------------------------------+
+| v                             |
++-------------------------------+
+```
+
 ### `arrow_typeof`
 
 Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression.
@@ -5022,44 +5155,53 @@ arrow_typeof(expression)
 ### `get_field`
 
 Returns a field within a map or a struct with the given key.
+Supports nested field access by providing multiple field names.
 Note: most users invoke `get_field` indirectly via field access
 syntax such as `my_struct_col['field_name']` which results in a call to
 `get_field(my_struct_col, 'field_name')`.
+Nested access like `my_struct['a']['b']` is optimized to a single call:
+`get_field(my_struct, 'a', 'b')`.
 
 ```sql
-get_field(expression1, expression2)
+get_field(expression, field_name[, field_name2, ...])
 ```
 
 #### Arguments
 
-- **expression1**: The map or struct to retrieve a field for.
-- **expression2**: The field name in the map or struct to retrieve data for. Must evaluate to a string.
+- **expression**: The map or struct to retrieve a field from.
+- **field_name**: The field name(s) to access, in order for nested access. Must evaluate to strings.
 
 #### Example
 
 ```sql
-> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
-> select struct(idx, v) from t as c;
-+-------------------------+
-| struct(c.idx,c.v)       |
-+-------------------------+
-| {c0: data, c1: fusion}  |
-| {c0: apache, c1: arrow} |
-+-------------------------+
-> select get_field((select struct(idx, v) from t), 'c0');
-+-----------------------+
-| struct(t.idx,t.v)[c0] |
-+-----------------------+
-| data                  |
-| apache                |
-+-----------------------+
-> select get_field((select struct(idx, v) from t), 'c1');
-+-----------------------+
-| struct(t.idx,t.v)[c1] |
-+-----------------------+
-| fusion                |
-| arrow                 |
-+-----------------------+
+> -- Access a field from a struct column
+> create table test( struct_col) as values
+    ({name: 'Alice', age: 30}),
+    ({name: 'Bob', age: 25});
+> select struct_col from test;
++-----------------------------+
+| struct_col                  |
++-----------------------------+
+| {name: Alice, age: 30}      |
+| {name: Bob, age: 25}        |
++-----------------------------+
+> select struct_col['name'] as name from test;
++-------+
+| name  |
++-------+
+| Alice |
+| Bob   |
++-------+
+
+> -- Nested field access with multiple arguments
+> create table test(struct_col) as values
+    ({outer: {inner_val: 42}});
+> select struct_col['outer']['inner_val'] as result from test;
++--------+
+| result |
++--------+
+| 42     |
++--------+
 ```
 
 ### `version`
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 22666a1b45b56..4e3ea12e2f28b 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -19,5 +19,5 @@
 # to compile this workspace and run CI jobs.
 
 [toolchain]
-channel = "1.91.0"
+channel = "1.92.0"
 components = ["rustfmt", "clippy"]
diff --git a/rustfmt.toml b/rustfmt.toml
index 4522e520a469b..c680d9d068d5c 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-edition = "2021"
+edition = "2024"
 max_width = 90
 
 # ignore generated files
diff --git a/test-utils/src/array_gen/binary.rs b/test-utils/src/array_gen/binary.rs
index 9740eeae5e7fe..ab0530a9ab4e4 100644
--- a/test-utils/src/array_gen/binary.rs
+++ b/test-utils/src/array_gen/binary.rs
@@ -19,8 +19,8 @@ use arrow::array::{
     ArrayRef, BinaryViewArray, GenericBinaryArray, OffsetSizeTrait, UInt32Array,
 };
 use arrow::compute;
-use rand::rngs::StdRng;
 use rand::Rng;
+use rand::rngs::StdRng;
 
 /// Randomly generate binary arrays
 pub struct BinaryArrayGenerator {
diff --git a/test-utils/src/array_gen/boolean.rs b/test-utils/src/array_gen/boolean.rs
index 004d615b4caa4..c9104c0b8e788 100644
--- a/test-utils/src/array_gen/boolean.rs
+++ b/test-utils/src/array_gen/boolean.rs
@@ -17,8 +17,8 @@
 
 use arrow::array::{ArrayRef, BooleanArray, BooleanBuilder, UInt32Array};
 use arrow::compute::take;
-use rand::rngs::StdRng;
 use rand::Rng;
+use rand::rngs::StdRng;
 
 /// Randomly generate boolean arrays
 pub struct BooleanArrayGenerator {
diff --git a/test-utils/src/array_gen/decimal.rs b/test-utils/src/array_gen/decimal.rs
index c5ec8ac5e8938..54fa2269d6e4c 100644
--- a/test-utils/src/array_gen/decimal.rs
+++ b/test-utils/src/array_gen/decimal.rs
@@ -17,8 +17,8 @@
 
 use arrow::array::{ArrayRef, PrimitiveArray, PrimitiveBuilder, UInt32Array};
 use arrow::datatypes::DecimalType;
-use rand::rngs::StdRng;
 use rand::Rng;
+use rand::rngs::StdRng;
 
 use super::random_data::RandomNativeData;
 
diff --git a/test-utils/src/array_gen/primitive.rs b/test-utils/src/array_gen/primitive.rs
index 62a38a1b4ce1d..5944879600cb0 100644
--- a/test-utils/src/array_gen/primitive.rs
+++ b/test-utils/src/array_gen/primitive.rs
@@ -17,9 +17,9 @@
 
 use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, UInt32Array};
 use arrow::datatypes::DataType;
-use chrono_tz::{Tz, TZ_VARIANTS};
+use chrono_tz::{TZ_VARIANTS, Tz};
 use rand::prelude::IndexedRandom;
-use rand::{rng, rngs::StdRng, Rng};
+use rand::{Rng, rng, rngs::StdRng};
 use std::sync::Arc;
 
 use super::random_data::RandomNativeData;
diff --git a/test-utils/src/array_gen/random_data.rs b/test-utils/src/array_gen/random_data.rs
index ea2b872f7d86f..f341d23417439 100644
--- a/test-utils/src/array_gen/random_data.rs
+++ b/test-utils/src/array_gen/random_data.rs
@@ -17,19 +17,19 @@
 
 use arrow::array::ArrowPrimitiveType;
 use arrow::datatypes::{
-    i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Decimal32Type,
-    Decimal64Type, DurationMicrosecondType, DurationMillisecondType,
-    DurationNanosecondType, DurationSecondType, Float32Type, Float64Type, Int16Type,
-    Int32Type, Int64Type, Int8Type, IntervalDayTime, IntervalDayTimeType,
-    IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalYearMonthType,
-    Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
+    Date32Type, Date64Type, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
+    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+    DurationSecondType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type,
+    Int64Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano,
+    IntervalMonthDayNanoType, IntervalYearMonthType, Time32MillisecondType,
+    Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
     TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
-    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, i256,
 };
+use rand::Rng;
 use rand::distr::StandardUniform;
 use rand::prelude::Distribution;
 use rand::rngs::StdRng;
-use rand::Rng;
 
 /// Generate corresponding NativeType value randomly according to
 /// ArrowPrimitiveType.
diff --git a/test-utils/src/array_gen/string.rs b/test-utils/src/array_gen/string.rs
index 546485fd8dc16..896182290ccca 100644
--- a/test-utils/src/array_gen/string.rs
+++ b/test-utils/src/array_gen/string.rs
@@ -18,9 +18,9 @@
 use arrow::array::{
     ArrayRef, GenericStringArray, OffsetSizeTrait, StringViewArray, UInt32Array,
 };
+use rand::Rng;
 use rand::distr::StandardUniform;
 use rand::rngs::StdRng;
-use rand::Rng;
 
 /// Randomly generate string arrays
 pub struct StringArrayGenerator {
diff --git a/test-utils/src/string_gen.rs b/test-utils/src/string_gen.rs
index 75ed03898a279..21eecc05b8ce9 100644
--- a/test-utils/src/string_gen.rs
+++ b/test-utils/src/string_gen.rs
@@ -19,7 +19,7 @@ use crate::array_gen::StringArrayGenerator;
 use crate::stagger_batch;
 use arrow::record_batch::RecordBatch;
 use rand::rngs::StdRng;
-use rand::{rng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rng};
 
 /// Randomly generate strings
 pub struct StringBatchGenerator(StringArrayGenerator);
diff --git a/test-utils/src/tpcds.rs b/test-utils/src/tpcds.rs
index ce5bac5bfd83d..a12ae8ceaef9c 100644
--- a/test-utils/src/tpcds.rs
+++ b/test-utils/src/tpcds.rs
@@ -299,7 +299,7 @@ pub fn tpcds_schemas() -> Vec<TableDef> {
                 Field::new("c_birth_country", DataType::Utf8, false),
                 Field::new("c_login", DataType::Utf8, false),
                 Field::new("c_email_address", DataType::Utf8, false),
-                Field::new("c_last_review_date_sk", DataType::Utf8, false),
+                Field::new("c_last_review_date", DataType::Utf8, false),
             ]),
         ),
         TableDef::new(
diff --git a/typos.toml b/typos.toml
index 09c5c55c452ab..196766f12fbc0 100644
--- a/typos.toml
+++ b/typos.toml
@@ -46,5 +46,9 @@ extend-exclude = [
     "dev/changelog/**",
     "benchmarks/**",
     "*.csv",
-    "docs/source/contributor-guide/governance.md"
+    "docs/source/contributor-guide/governance.md",
+    # submodules
+    "parquet-testing/**",
+    "datafusion-testing/**",
+    "testing/**",
 ]