diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index fd2b9c73949..76dc06ced80 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -441,24 +441,6 @@ jobs: fetch-depth: 1 submodules: true - - name: Checkout CI Build/Test Scripts - if: needs.check-skip.outputs.should_skip != 'true' - uses: actions/checkout@v4 - with: - repository: apache/cloudberry-devops-release - ref: main - path: cloudberry-devops-release - fetch-depth: 1 - - - name: Move cloudberry-devops-release directory - if: needs.check-skip.outputs.should_skip != 'true' - run: | - set -eo pipefail - if ! mv "${GITHUB_WORKSPACE}"/cloudberry-devops-release "${GITHUB_WORKSPACE}"/..; then - echo "::error::Container initialization failed" - exit 1 - fi - - name: Cloudberry Environment Initialization if: needs.check-skip.outputs.should_skip != 'true' env: @@ -510,8 +492,8 @@ jobs: SRC_DIR: ${{ github.workspace }} run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/configure-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -523,8 +505,8 @@ jobs: run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/build-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/build-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/build-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"; then echo "::error::Build script failed" exit 1 fi @@ -606,7 +588,7 @@ jobs: # Create RPM echo "Creating RPM package..." rpmdev-setuptree - ln -s "${SRC_DIR}"/../cloudberry-devops-release/packaging/rpm/el/SPECS/apache-cloudberry-db-incubating.spec "${HOME}"/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec + ln -s "${SRC_DIR}"/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec "${HOME}"/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec cp "${SRC_DIR}"/LICENSE /usr/local/cloudberry-db DEBUG_RPMBUILD_OPT="" @@ -616,7 +598,7 @@ jobs: DEBUG_IDENTIFIER=".debug" fi - "${SRC_DIR}"/../cloudberry-devops-release/scripts/build-rpm.sh --version "${CBDB_VERSION}" --release "${BUILD_NUMBER}" "${DEBUG_RPMBUILD_OPT}" + "${SRC_DIR}"/devops/build/packaging/rpm/build-rpm.sh --version "${CBDB_VERSION}" --release "${BUILD_NUMBER}" "${DEBUG_RPMBUILD_OPT}" # Get OS version and move RPM os_version=$(grep -oP '(?<=^VERSION_ID=")[0-9]' /etc/os-release) @@ -653,8 +635,8 @@ jobs: SRC_DIR: ${{ github.workspace }} run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/unittest-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/unittest-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh"; then echo "::error::Unittest script failed" exit 1 fi @@ -904,24 +886,6 @@ jobs: run: | echo "Timestamp from output: ${{ needs.build.outputs.build_timestamp }}" - - name: Checkout CI Build/Test Scripts - if: needs.check-skip.outputs.should_skip != 'true' - uses: actions/checkout@v4 - with: - repository: apache/cloudberry-devops-release - ref: main - path: cloudberry-devops-release - fetch-depth: 1 - - - name: Move cloudberry-devops-release directory - if: needs.check-skip.outputs.should_skip != 'true' - run: | - set -eo pipefail - if ! mv "${GITHUB_WORKSPACE}"/cloudberry-devops-release "${GITHUB_WORKSPACE}"/..; then - echo "::error::Container initialization failed" - exit 1 - fi - - name: Cloudberry Environment Initialization env: LOGS_DIR: build-logs @@ -1267,8 +1231,8 @@ jobs: set -eo pipefail { - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && NUM_PRIMARY_MIRROR_PAIRS='${{ matrix.num_primary_mirror_pairs }}' SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && NUM_PRIMARY_MIRROR_PAIRS='${{ matrix.num_primary_mirror_pairs }}' SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then echo "::error::Demo cluster creation failed" exit 1 fi @@ -1343,7 +1307,7 @@ jobs: MAKE_DIRECTORY='-C $dir' \ PGOPTIONS='${PG_OPTS}' \ SRC_DIR='${SRC_DIR}' \ - ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/test-cloudberry.sh" \ + ${SRC_DIR}/devops/build/automation/cloudberry/scripts/test-cloudberry.sh" \ 2>&1 | tee "$config_log"; then echo "::warning::Test execution failed for configuration $((i+1)): make -C $dir $target" overall_status=1 @@ -1376,7 +1340,7 @@ jobs: ls -Rl "/tmp/cloudberry-cores" echo "-----------------------------------------" - "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/analyze_core_dumps.sh "$test_id" + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh "$test_id" core_analysis_rc=$? case "$core_analysis_rc" in 0) echo "No core dumps found for this configuration" ;; @@ -1452,7 +1416,7 @@ jobs: # Parse this configuration's results MAKE_NAME="${{ matrix.test }}-config$i" \ - "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/parse-test-results.sh "$config_log" + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/parse-test-results.sh "$config_log" status_code=$? { diff --git a/.github/workflows/build-dbg-cloudberry.yml b/.github/workflows/build-dbg-cloudberry.yml index 569e6a350d6..4bc23a5d677 100644 --- a/.github/workflows/build-dbg-cloudberry.yml +++ b/.github/workflows/build-dbg-cloudberry.yml @@ -343,24 +343,6 @@ jobs: fetch-depth: 1 submodules: true - - name: Checkout CI Build/Test Scripts - if: needs.check-skip.outputs.should_skip != 'true' - uses: actions/checkout@v4 - with: - repository: apache/cloudberry-devops-release - ref: main - path: cloudberry-devops-release - fetch-depth: 1 - - - name: Move cloudberry-devops-release directory - if: needs.check-skip.outputs.should_skip != 'true' - run: | - set -eo pipefail - if ! mv "${GITHUB_WORKSPACE}"/cloudberry-devops-release "${GITHUB_WORKSPACE}"/..; then - echo "::error::Container initialization failed" - exit 1 - fi - - name: Cloudberry Environment Initialization if: needs.check-skip.outputs.should_skip != 'true' env: @@ -412,8 +394,8 @@ jobs: SRC_DIR: ${{ github.workspace }} run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/configure-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -425,8 +407,8 @@ jobs: run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/build-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/build-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/build-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"; then echo "::error::Build script failed" exit 1 fi @@ -508,7 +490,7 @@ jobs: # Create RPM echo "Creating RPM package..." rpmdev-setuptree - ln -s "${SRC_DIR}"/../cloudberry-devops-release/packaging/rpm/el/SPECS/apache-cloudberry-db-incubating.spec "${HOME}"/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec + ln -s "${SRC_DIR}"/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec "${HOME}"/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec cp "${SRC_DIR}"/LICENSE /usr/local/cloudberry-db DEBUG_RPMBUILD_OPT="" @@ -518,7 +500,7 @@ jobs: DEBUG_IDENTIFIER=".debug" fi - "${SRC_DIR}"/../cloudberry-devops-release/scripts/build-rpm.sh --version "${CBDB_VERSION}" --release "${BUILD_NUMBER}" "${DEBUG_RPMBUILD_OPT}" + "${SRC_DIR}"/devops/build/packaging/rpm/build-rpm.sh --version "${CBDB_VERSION}" --release "${BUILD_NUMBER}" "${DEBUG_RPMBUILD_OPT}" # Get OS version and move RPM os_version=$(grep -oP '(?<=^VERSION_ID=")[0-9]' /etc/os-release) @@ -553,8 +535,8 @@ jobs: SRC_DIR: ${{ github.workspace }} run: | set -eo pipefail - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/unittest-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/unittest-cloudberry.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh"; then echo "::error::Unittest script failed" exit 1 fi @@ -804,24 +786,6 @@ jobs: run: | echo "Timestamp from output: ${{ needs.build.outputs.build_timestamp }}" - - name: Checkout CI Build/Test Scripts - if: needs.check-skip.outputs.should_skip != 'true' - uses: actions/checkout@v4 - with: - repository: apache/cloudberry-devops-release - ref: main - path: cloudberry-devops-release - fetch-depth: 1 - - - name: Move cloudberry-devops-release directory - if: needs.check-skip.outputs.should_skip != 'true' - run: | - set -eo pipefail - if ! mv "${GITHUB_WORKSPACE}"/cloudberry-devops-release "${GITHUB_WORKSPACE}"/..; then - echo "::error::Container initialization failed" - exit 1 - fi - - name: Cloudberry Environment Initialization env: LOGS_DIR: build-logs @@ -1167,8 +1131,8 @@ jobs: set -eo pipefail { - chmod +x "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && NUM_PRIMARY_MIRROR_PAIRS='${{ matrix.num_primary_mirror_pairs }}' SRC_DIR=${SRC_DIR} ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && NUM_PRIMARY_MIRROR_PAIRS='${{ matrix.num_primary_mirror_pairs }}' SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then echo "::error::Demo cluster creation failed" exit 1 fi @@ -1239,7 +1203,7 @@ jobs: MAKE_DIRECTORY='-C $dir' \ PGOPTIONS='${PG_OPTS}' \ SRC_DIR='${SRC_DIR}' \ - ${SRC_DIR}/../cloudberry-devops-release/build_automation/cloudberry/scripts/test-cloudberry.sh" \ + ${SRC_DIR}/devops/build/automation/cloudberry/scripts/test-cloudberry.sh" \ 2>&1 | tee "$config_log"; then echo "::warning::Test execution failed for configuration $((i+1)): make -C $dir $target" overall_status=1 @@ -1272,7 +1236,7 @@ jobs: ls -Rl "/tmp/cloudberry-cores" echo "-----------------------------------------" - "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/analyze_core_dumps.sh "$test_id" + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh "$test_id" core_analysis_rc=$? case "$core_analysis_rc" in 0) echo "No core dumps found for this configuration" ;; @@ -1348,7 +1312,7 @@ jobs: # Parse this configuration's results MAKE_NAME="${{ matrix.test }}-config$i" \ - "${SRC_DIR}"/../cloudberry-devops-release/build_automation/cloudberry/scripts/parse-test-results.sh "$config_log" + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/parse-test-results.sh "$config_log" status_code=$? { diff --git a/.github/workflows/docker-cbdb-build-containers.yml b/.github/workflows/docker-cbdb-build-containers.yml new file mode 100644 index 00000000000..f5f8676cd4c --- /dev/null +++ b/.github/workflows/docker-cbdb-build-containers.yml @@ -0,0 +1,208 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# GitHub Actions Workflow for Apache Cloudberry Build Environments +# -------------------------------------------------------------------- +# Purpose: +# Builds, tests, and publishes multi-architecture Docker images for +# Apache Cloudberry DB build environments. Images are built for both +# Rocky Linux 8 and 9, tested with TestInfra, and pushed to DockerHub. +# +# Multi-Architecture Support: +# - Builds images for both AMD64 and ARM64 architectures +# - Creates and pushes multi-arch manifests +# - Uses QEMU for cross-platform builds +# - Automated testing for all architectures +# +# Image Tags: +# - Latest: cbdb-build-{platform}-latest +# - Versioned: cbdb-build-{platform}-{YYYYMMDD}-{git-short-sha} +# +# Features: +# - Matrix build for multiple platforms +# - Parallel architecture builds +# - Build caching strategy +# - Path filtering to only build changed platforms +# - Comprehensive build summary and metadata +# - Container testing with TestInfra +# - Multi-arch manifest creation +# +# Requirements: +# - DockerHub credentials in GitHub secrets +# - DOCKERHUB_USER +# - DOCKERHUB_TOKEN +# -------------------------------------------------------------------- + +name: docker-cbdb-build-containers + +# Trigger workflow on pushes to main when relevant paths change +# Also allows manual triggering via GitHub UI +on: + push: + branches: + - main + paths: + - 'devops/deploy/docker/build/rocky8/**' + - 'devops/deploy/docker/build/rocky9/**' + workflow_dispatch: # Manual trigger + +# Prevent multiple workflow runs from interfering with each other +concurrency: + group: docker-build-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-push: + # Set timeout to prevent hanging builds + timeout-minutes: 60 + runs-on: ubuntu-latest + + # Matrix strategy to build for both Rocky Linux 8 and 9 + strategy: + matrix: + platform: ['rocky8', 'rocky9'] + + steps: + # Checkout repository code with full history + - name: Checkout code + uses: actions/checkout@v4 + + # Generate version information for image tags + # - BUILD_DATE: Current date in YYYYMMDD format + # - SHA_SHORT: Short form of the git commit SHA + - name: Set version + id: version + run: | + echo "BUILD_DATE=$(date -u +'%Y%m%d')" >> $GITHUB_OUTPUT + echo "SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + # Determine if the current platform's files have changed + # This prevents unnecessary builds if only one platform was modified + - name: Determine if platform changed + id: platform-filter + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 + with: + filters: | + rocky8: + - 'devops/deploy/docker/build/rocky8/**' + rocky9: + - 'devops/deploy/docker/build/rocky9/**' + + # Set up QEMU for multi-architecture support + # This allows building ARM64 images on AMD64 infrastructure and vice versa + - name: Set up QEMU + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/setup-qemu-action@v3 + + # Login to DockerHub for pushing images + # Requires DOCKERHUB_USER and DOCKERHUB_TOKEN secrets to be set + - name: Login to Docker Hub + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # Setup Docker Buildx for efficient builds + # Enable debug mode for better troubleshooting + - name: Set up Docker Buildx + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --debug + + # Build and test images for each architecture + # This ensures both AMD64 and ARM64 variants work correctly + - name: Build and test images + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + run: | + # Build for each platform + for arch in amd64 arm64; do + # Build the image for testing + docker buildx build \ + --platform linux/$arch \ + --load \ + -t apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-$arch-test \ + ./devops/deploy/docker/build/${{ matrix.platform }} + + # Run tests in a container + docker run -d \ + -h cdw \ + --name cbdb-build-${{ matrix.platform }}-$arch-test \ + apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-$arch-test \ + bash -c "sleep 30" + + # Execute TestInfra tests + docker exec cbdb-build-${{ matrix.platform }}-$arch-test pytest \ + --cache-clear \ + --disable-warnings \ + -p no:warnings \ + /tests/testinfra/test_cloudberry_db_env.py + + # Cleanup test container + docker rm -f cbdb-build-${{ matrix.platform }}-$arch-test + done + + # Build and push multi-architecture images + # This creates a manifest list that supports both architectures + - name: Build and Push Multi-arch Docker images + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/build-push-action@v6 + with: + context: ./devops/deploy/docker/build/${{ matrix.platform }} + push: true + platforms: linux/amd64,linux/arm64 + # Tag with both latest and version-specific tags + tags: | + apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-latest + apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }} + # Add standard Open Container Initiative (OCI) labels + labels: | + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.created=${{ steps.version.outputs.BUILD_DATE }} + org.opencontainers.image.version=${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }} + + # Generate a detailed build summary in GitHub Actions UI + # This provides quick access to build information and image usage instructions + - name: Build Summary + if: always() + run: | + echo "### Build Summary for ${{ matrix.platform }} 🚀" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 🔍 Build Information" >> $GITHUB_STEP_SUMMARY + echo "- **Build Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY + echo "- **Platform**: ${{ matrix.platform }}" >> $GITHUB_STEP_SUMMARY + echo "- **Architectures**: amd64, arm64" >> $GITHUB_STEP_SUMMARY + echo "- **Commit SHA**: [\`${{ github.sha }}\`](${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }})" >> $GITHUB_STEP_SUMMARY + echo "- **Build Date**: ${{ steps.version.outputs.BUILD_DATE }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY + echo "- Latest tag: \`apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-latest\`" >> $GITHUB_STEP_SUMMARY + echo "- Version tag: \`apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 📋 Quick Reference" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY + echo "# Pull the image (automatically selects correct architecture)" >> $GITHUB_STEP_SUMMARY + echo "docker pull apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "# Pull specific architecture if needed" >> $GITHUB_STEP_SUMMARY + echo "docker pull --platform linux/amd64 apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "docker pull --platform linux/arm64 apache/incubator-cloudberry:cbdb-build-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/docker-cbdb-test-containers.yml b/.github/workflows/docker-cbdb-test-containers.yml new file mode 100644 index 00000000000..87fcf245edc --- /dev/null +++ b/.github/workflows/docker-cbdb-test-containers.yml @@ -0,0 +1,182 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Purpose: Builds, tests and pushes multi-architecture Docker images for +# Apache Cloudberry DB test environments. Images are built for both AMD64 +# and ARM64 architectures on Rocky Linux 8 and 9. +# +# Images are tagged with: +# - cbdb-test-rocky8-latest +# - cbdb-test-rocky8-{YYYYMMDD}-{git-short-sha} +# - cbdb-test-rocky9-latest +# - cbdb-test-rocky9-{YYYYMMDD}-{git-short-sha} +# +# Features: +# - Multi-architecture support (AMD64 and ARM64) +# - Matrix build for multiple platforms +# - QEMU emulation for cross-platform builds +# - Buildx for efficient multi-arch builds +# - Path filtering to only build changed platforms +# - Comprehensive build summary and metadata +# +# -------------------------------------------------------------------- + +name: docker-cbdb-test-containers + +# Trigger on pushes to docker-images branch when relevant paths change +# Also allows manual triggering via GitHub UI +on: + push: + branches: + - main + paths: + - 'devops/deploy/docker/test/rocky8/**' + - 'devops/deploy/docker/test/rocky9/**' + workflow_dispatch: # Manual trigger + +# Prevent multiple workflow runs from interfering with each other +concurrency: + group: docker-test-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-push: + timeout-minutes: 60 # Prevent hanging builds + runs-on: ubuntu-latest + strategy: + matrix: + # Build for both Rocky Linux 8 and 9 + platform: ['rocky8', 'rocky9'] + + steps: + # Checkout repository code + - name: Checkout code + uses: actions/checkout@v4 + + # Generate version information for image tags + - name: Set version + id: version + run: | + echo "BUILD_DATE=$(date -u +'%Y%m%d')" >> $GITHUB_OUTPUT + echo "SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + # Determine if the current platform's files have changed + - name: Determine if platform changed + id: platform-filter + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 + with: + filters: | + rocky8: + - 'devops/deploy/docker/test/rocky8/**' + rocky9: + - 'devops/deploy/docker/test/rocky9/**' + + # Skip if no changes for current platform + - name: Skip if not relevant + if: ${{ steps.platform-filter.outputs[matrix.platform] != 'true' }} + run: echo "Skipping because the changes do not affect this platform" + + # Set up QEMU for multi-architecture support + # This allows building ARM64 images on AMD64 infrastructure and vice versa + - name: Set up QEMU + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/setup-qemu-action@v3 + + # Login to DockerHub for pushing images + - name: Login to Docker Hub + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # Setup Docker Buildx for efficient multi-architecture builds + - name: Set up Docker Buildx + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --debug + + # Build and test images for each architecture + # This ensures both AMD64 and ARM64 variants work correctly + - name: Build and test images + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + run: | + # Build for each platform + for arch in amd64 arm64; do + echo "Building for $arch architecture..." + docker buildx build \ + --platform linux/$arch \ + --load \ + -t apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-$arch-test \ + ./devops/deploy/docker/test/${{ matrix.platform }} + done + + # Build and push multi-architecture images + # Creates a manifest list that supports both architectures + - name: Build and Push Multi-arch Docker images + if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} + uses: docker/build-push-action@v6 + with: + context: ./devops/deploy/docker/test/${{ matrix.platform }} + push: true + platforms: linux/amd64,linux/arm64 + # Use caching for faster builds + cache-from: | + type=registry,ref=apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest + type=gha,scope=docker-cbdb-test-${{ matrix.platform }} + # Tag with both latest and version-specific tags + tags: | + apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest + apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }} + # Add metadata labels for better image tracking + labels: | + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.created=${{ steps.version.outputs.BUILD_DATE }} + org.opencontainers.image.version=${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }} + + # Generate a detailed build summary in GitHub Actions UI + # This provides quick access to build information and image usage instructions + - name: Build Summary + if: always() + run: | + echo "### Build Summary for ${{ matrix.platform }} 🚀" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 🔍 Build Information" >> $GITHUB_STEP_SUMMARY + echo "- **Build Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY + echo "- **Platform**: ${{ matrix.platform }}" >> $GITHUB_STEP_SUMMARY + echo "- **Architectures**: AMD64, ARM64" >> $GITHUB_STEP_SUMMARY + echo "- **Commit SHA**: [\`${{ github.sha }}\`](${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }})" >> $GITHUB_STEP_SUMMARY + echo "- **Build Date**: ${{ steps.version.outputs.BUILD_DATE }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY + echo "- Latest tag: \`apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest\`" >> $GITHUB_STEP_SUMMARY + echo "- Version tag: \`apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-${{ steps.version.outputs.BUILD_DATE }}-${{ steps.version.outputs.SHA_SHORT }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### 📋 Quick Reference" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY + echo "# Pull the image (automatically selects correct architecture)" >> $GITHUB_STEP_SUMMARY + echo "docker pull apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "# Pull specific architecture if needed" >> $GITHUB_STEP_SUMMARY + echo "docker pull --platform linux/amd64 apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "docker pull --platform linux/arm64 apache/incubator-cloudberry:cbdb-test-${{ matrix.platform }}-latest" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/configure.ac b/configure.ac index 890e8437cdf..0d0529fc35f 100644 --- a/configure.ac +++ b/configure.ac @@ -20,7 +20,7 @@ m4_pattern_forbid(^PGAC_)dnl to catch undefined macros dnl The PACKAGE_VERSION from upstream PostgreSQL is maintained in the dnl PG_PACKAGE_VERSION variable, when merging make sure to update this dnl variable with the merge conflict from the AC_INIT() statement. -AC_INIT([Apache Cloudberry], [2.1.0-devel], [dev@cloudberry.apache.org], [], [https://cloudberry.apache.org/]) +AC_INIT([Apache Cloudberry], [3.0.0-devel], [dev@cloudberry.apache.org], [], [https://cloudberry.apache.org/]) [PG_PACKAGE_VERSION=14.4] AC_SUBST(PG_PACKAGE_VERSION) diff --git a/contrib/auto_explain/expected/auto_explain_optimizer.out b/contrib/auto_explain/expected/auto_explain_optimizer.out index e1d3e334d95..1f469cc45fc 100644 --- a/contrib/auto_explain/expected/auto_explain_optimizer.out +++ b/contrib/auto_explain/expected/auto_explain_optimizer.out @@ -38,7 +38,7 @@ LOG: duration: 0.026 ms plan: Query Text: SELECT relname FROM pg_class WHERE relname='pg_class'; Index Only Scan using pg_class_relname_nsp_index on pg_class (cost=0.15..4.17 rows=1 width=64) (actual rows=1 loops=1) Index Cond: (relname = 'pg_class'::name) - Heap Fetches: 1 + Heap Fetches: 0 (slice0) Executor memory: 105K bytes. Memory used: 128000kB relname diff --git a/contrib/pax_storage/.gitignore b/contrib/pax_storage/.gitignore index 51a328f84e0..87aa2a4a742 100644 --- a/contrib/pax_storage/.gitignore +++ b/contrib/pax_storage/.gitignore @@ -12,6 +12,7 @@ Thumbs.db # Temp files dir +bench_data .tmp/** build*/** results/** diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc index da07cddd5d7..6dcaecd3205 100644 --- a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc @@ -453,6 +453,7 @@ uint32 PaxAccessMethod::ScanFlags(Relation relation) { flags |= SCAN_FORCE_BIG_WRITE_LOCK; #endif + flags |= SCAN_SUPPORT_RUNTIME_FILTER; return flags; } diff --git a/contrib/pax_storage/src/cpp/access/pax_scanner.cc b/contrib/pax_storage/src/cpp/access/pax_scanner.cc index a5e0b632002..5a354e6fa0c 100644 --- a/contrib/pax_storage/src/cpp/access/pax_scanner.cc +++ b/contrib/pax_storage/src/cpp/access/pax_scanner.cc @@ -218,7 +218,7 @@ bool PaxScanDesc::BitmapNextTuple(struct TBMIterateResult *tbmres, } TableScanDesc PaxScanDesc::BeginScan(Relation relation, Snapshot snapshot, - int nkeys, struct ScanKeyData * /*key*/, + int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags, std::shared_ptr &&pax_filter, bool build_bitmap) { @@ -326,8 +326,8 @@ void PaxScanDesc::EndScan() { } TableScanDesc PaxScanDesc::BeginScanExtractColumns( - Relation rel, Snapshot snapshot, int /*nkeys*/, - struct ScanKeyData * /*key*/, ParallelTableScanDesc parallel_scan, + Relation rel, Snapshot snapshot, int nkeys, + struct ScanKeyData *key, ParallelTableScanDesc parallel_scan, struct PlanState *ps, uint32 flags) { std::shared_ptr filter; List *targetlist = ps->plan->targetlist; @@ -361,7 +361,7 @@ TableScanDesc PaxScanDesc::BeginScanExtractColumns( filter->SetColumnProjection(std::move(col_bits)); if (pax_enable_sparse_filter) { - filter->InitSparseFilter(rel, qual); + filter->InitSparseFilter(rel, qual, key, nkeys); // FIXME: enable predicate pushdown can filter rows immediately without // assigning all columns. But it may mess the filter orders for multiple @@ -375,7 +375,7 @@ TableScanDesc PaxScanDesc::BeginScanExtractColumns( filter->InitRowFilter(rel, ps, filter->GetColumnProjection()); } } - return BeginScan(rel, snapshot, 0, nullptr, parallel_scan, flags, + return BeginScan(rel, snapshot, nkeys, key, parallel_scan, flags, std::move(filter), build_bitmap); } diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc index 647cb5743cf..5de1b14cd97 100644 --- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc +++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc @@ -50,6 +50,10 @@ static const relopt_compress_type_mapping kSelfRelCompressMap[] = { pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD}, {ColumnEncoding_Kind_COMPRESS_ZLIB_STR, pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB}, +#ifdef USE_LZ4 + {ColumnEncoding_Kind_COMPRESS_LZ4_STR, + pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4}, +#endif }; typedef struct { diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h index 4e813f38c40..e6c29363ab1 100644 --- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h +++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h @@ -41,6 +41,7 @@ namespace paxc { #define ColumnEncoding_Kind_DICTIONARY_STR "dict" #define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd" #define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib" +#define ColumnEncoding_Kind_COMPRESS_LZ4_STR "lz4" #define STORAGE_FORMAT_TYPE_PORC "porc" #define STORAGE_FORMAT_TYPE_PORC_VEC "porc_vec" diff --git a/contrib/pax_storage/src/cpp/cmake/pax.cmake b/contrib/pax_storage/src/cpp/cmake/pax.cmake index 71775bac2dd..099a66f30d8 100644 --- a/contrib/pax_storage/src/cpp/cmake/pax.cmake +++ b/contrib/pax_storage/src/cpp/cmake/pax.cmake @@ -51,6 +51,7 @@ set(pax_storage_src storage/columns/pax_dict_encoding.cc storage/columns/pax_decoding.cc storage/columns/pax_encoding.cc + storage/columns/pax_delta_encoding.cc storage/columns/pax_rlev2_decoding.cc storage/columns/pax_rlev2_encoding.cc storage/columns/pax_vec_bitpacked_column.cc diff --git a/contrib/pax_storage/src/cpp/cmake/pax_format.cmake b/contrib/pax_storage/src/cpp/cmake/pax_format.cmake index 4bdc25671f9..5a12185a0e6 100644 --- a/contrib/pax_storage/src/cpp/cmake/pax_format.cmake +++ b/contrib/pax_storage/src/cpp/cmake/pax_format.cmake @@ -41,6 +41,7 @@ set(pax_storage_src storage/columns/pax_dict_encoding.cc storage/columns/pax_decoding.cc storage/columns/pax_encoding.cc + storage/columns/pax_delta_encoding.cc storage/columns/pax_rlev2_decoding.cc storage/columns/pax_rlev2_encoding.cc storage/columns/pax_vec_column.cc diff --git a/contrib/pax_storage/src/cpp/pax_gbench.cc b/contrib/pax_storage/src/cpp/pax_gbench.cc index 82dbaaa7bb2..b6a0ecb0c76 100644 --- a/contrib/pax_storage/src/cpp/pax_gbench.cc +++ b/contrib/pax_storage/src/cpp/pax_gbench.cc @@ -25,12 +25,310 @@ *------------------------------------------------------------------------- */ +#include "pax_gbench.h" + +#include "comm/cbdb_api.h" + #include -static void example_benchmark(benchmark::State &state) { +#include +#include +#include + +#include "access/paxc_rel_options.h" +#include "comm/cbdb_wrappers.h" +#include "cpp-stub/src/stub.h" +#include "storage/micro_partition_iterator.h" +#include "storage/pax.h" +#include "storage/strategy.h" + +namespace pax::bench { + +// Create memory context for benchmark +void CreateMemoryContext() { + MemoryContext test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "TestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + MemoryContextSwitchTo(test_memory_context); +} + +// Global registry +class BenchmarkRegistry { + private: + std::vector init_functions_; + std::vector cleanup_functions_; + bool initialized_ = false; + + public: + void RegisterInitFunction(InitFunction func) { + init_functions_.push_back(func); + } + + void RegisterCleanupFunction(CleanupFunction func) { + cleanup_functions_.push_back(func); + } + + void RunAllInitFunctions() { + if (initialized_) return; + + printf("Running PAX Benchmark Suite...\n"); + printf("Initializing all benchmark modules...\n\n"); + + for (const auto &func : init_functions_) { + func(); + } + initialized_ = true; + } + + void RunAllCleanupFunctions() { + if (!initialized_) return; + + printf("\nCleaning up all benchmark modules...\n"); + + // Cleanup functions executed in reverse order + for (auto it = cleanup_functions_.rbegin(); it != cleanup_functions_.rend(); + ++it) { + (*it)(); + } + initialized_ = false; + } +}; + +// Global registry access function +BenchmarkRegistry &GetBenchmarkRegistry() { + static BenchmarkRegistry instance; + return instance; +} + +// Registration functions +void RegisterBenchmarkInit(InitFunction func) { + GetBenchmarkRegistry().RegisterInitFunction(func); +} + +void RegisterBenchmarkCleanup(CleanupFunction func) { + GetBenchmarkRegistry().RegisterCleanupFunction(func); +} + +// Global Mock functions for benchmark framework +bool MockMinMaxGetStrategyProcinfo(Oid, Oid, Oid *, FmgrInfo *, + StrategyNumber) { + return false; +} + +int32 MockGetFastSequences(Oid) { + static int32 mock_id = 0; + return mock_id++; +} + +void MockInsertMicroPartitionPlaceHolder(Oid, int) {} +void MockDeleteMicroPartitionEntry(Oid, Snapshot, int) {} +void MockExecStoreVirtualTuple(TupleTableSlot *) {} + +std::string MockBuildPaxDirectoryPath(RelFileNode rnode, BackendId backend_id) { + // Create a simple file path for benchmarks + return std::string("./bench_data"); +} + +std::vector MockGetMinMaxColumnIndexes(Relation) { + return std::vector(); +} + +std::vector MockBloomFilterColumnIndexes(Relation) { + return std::vector(); +} + +std::vector> MockGetRelEncodingOptions( + Relation relation) { + std::vector> encoding_opts; + + // Get number of columns from relation + int num_columns = 10; // default for benchmark + if (relation && relation->rd_att) { + num_columns = relation->rd_att->natts; + } + + // Create encoding options for each column (NO_ENCODED, 0) + for (int i = 0; i < num_columns; i++) { + encoding_opts.emplace_back( + std::make_tuple(ColumnEncoding_Kind_NO_ENCODED, 0)); + } + + return encoding_opts; +} + +// Mock TupleDescInitEntry that doesn't rely on SYSCACHE +void MockTupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, + const char *attributeName, Oid oidtypeid, + int32 typmod, int attdim) { + // Basic validation + if (attributeNumber < 1 || attributeNumber > desc->natts) { + return; + } + + Form_pg_attribute att = TupleDescAttr(desc, attributeNumber - 1); + + // Set basic attribute properties + namestrcpy(&(att->attname), attributeName); + att->atttypid = oidtypeid; + att->atttypmod = typmod; + att->attndims = attdim; + att->attnum = attributeNumber; + att->attnotnull = false; + att->atthasdef = false; + att->attidentity = '\0'; + att->attgenerated = '\0'; + att->attisdropped = false; + att->attislocal = true; + att->attinhcount = 0; + att->attcollation = InvalidOid; + + // Set type-specific properties based on OID (hardcoded for common types) + switch (oidtypeid) { + case INT2OID: // smallint + att->attlen = 2; + att->attalign = 's'; + att->attstorage = 'p'; + att->attbyval = true; + break; + case INT4OID: // integer + att->attlen = 4; + att->attalign = 'i'; + att->attstorage = TYPSTORAGE_PLAIN; + att->attbyval = true; + break; + case INT8OID: // bigint + att->attlen = 8; + att->attalign = 'd'; + att->attstorage = TYPSTORAGE_PLAIN; + att->attbyval = FLOAT8PASSBYVAL; + break; + case FLOAT8OID: // double precision + att->attlen = 8; + att->attalign = 'd'; + att->attstorage = 'p'; + att->attbyval = FLOAT8PASSBYVAL; + break; + case BOOLOID: // boolean + att->attlen = 1; + att->attalign = 'c'; + att->attstorage = 'p'; + att->attbyval = true; + break; + case TEXTOID: // text + att->attlen = -1; + att->attalign = 'i'; + att->attstorage = TYPSTORAGE_PLAIN; + att->attbyval = false; + att->attcollation = DEFAULT_COLLATION_OID; + break; + case NUMERICOID: // numeric + att->attlen = -1; + att->attalign = TYPALIGN_INT; + att->attstorage = TYPSTORAGE_PLAIN; + att->attbyval = false; + break; + case TIMESTAMPOID: // timestamp + att->attlen = 8; + att->attalign = 'd'; + att->attstorage = TYPSTORAGE_PLAIN; + att->attbyval = FLOAT8PASSBYVAL; + break; + default: + // Default values for unknown types + att->attlen = -1; + att->attalign = 'i'; + att->attstorage = 'p'; + att->attbyval = false; + break; + } +} + +// Global initialization function for general benchmark framework +void GlobalBenchmarkInit() { + static bool global_initialized = false; + if (global_initialized) return; + + printf("Initializing PAX benchmark framework...\n"); + + // Initialize memory context + MemoryContextInit(); + + // Setup global Mock functions + static std::unique_ptr stub_global = std::make_unique(); + + stub_global->set(MinMaxGetPgStrategyProcinfo, MockMinMaxGetStrategyProcinfo); + stub_global->set(CPaxGetFastSequences, MockGetFastSequences); + stub_global->set(cbdb::BuildPaxDirectoryPath, MockBuildPaxDirectoryPath); + stub_global->set(cbdb::InsertMicroPartitionPlaceHolder, + MockInsertMicroPartitionPlaceHolder); + stub_global->set(cbdb::DeleteMicroPartitionEntry, + MockDeleteMicroPartitionEntry); + stub_global->set(cbdb::GetMinMaxColumnIndexes, MockGetMinMaxColumnIndexes); + stub_global->set(cbdb::GetBloomFilterColumnIndexes, + MockBloomFilterColumnIndexes); + stub_global->set(cbdb::GetRelEncodingOptions, MockGetRelEncodingOptions); + stub_global->set(ExecStoreVirtualTuple, MockExecStoreVirtualTuple); + stub_global->set(TupleDescInitEntry, MockTupleDescInitEntry); + + // Create basic test directory + system("mkdir -p ./bench_data"); + + global_initialized = true; + printf("PAX benchmark framework initialized.\n"); +} + +// Global cleanup function for general benchmark framework +void GlobalBenchmarkCleanup() { + printf("Cleaning up PAX benchmark framework...\n"); + + // Clean up test directory + // system("rm -rf ./bench_data"); + + // Reset memory context + if (TopMemoryContext) { + MemoryContextReset(TopMemoryContext); + } + + printf("PAX benchmark framework cleaned up.\n"); +} + +// Example benchmark test +static void example_benchmark(::benchmark::State &state) { for (auto _ : state) { + // Empty example test } } BENCHMARK(example_benchmark); -BENCHMARK_MAIN(); \ No newline at end of file +} // namespace pax::benchmark + +// Global cleanup function (C-style for atexit) +static void cleanup_all() { + pax::bench::GetBenchmarkRegistry().RunAllCleanupFunctions(); + pax::bench::GlobalBenchmarkCleanup(); +} + +// Main entry function +int main(int argc, char **argv) { + // Register global cleanup function + std::atexit(cleanup_all); + + // Global initialization + pax::bench::GlobalBenchmarkInit(); + + // Run all registered initialization functions + pax::bench::GetBenchmarkRegistry().RunAllInitFunctions(); + + // Initialize benchmark framework + ::benchmark::Initialize(&argc, argv); + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; + + printf("\n=== Starting PAX Benchmark Suite ===\n"); + printf("Use --benchmark_filter= to run specific tests\n"); + printf("Use --benchmark_list_tests to see all available tests\n\n"); + + // Run benchmark + ::benchmark::RunSpecifiedBenchmarks(); + + return 0; +} \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/pax_gbench.h b/contrib/pax_storage/src/cpp/pax_gbench.h new file mode 100644 index 00000000000..44376022693 --- /dev/null +++ b/contrib/pax_storage/src/cpp/pax_gbench.h @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pax_gbench.h + * + * IDENTIFICATION + * contrib/pax_storage/src/cpp/pax_gbench.h + * + *------------------------------------------------------------------------- + */ + +#pragma once + +#include +#include + +namespace pax { + +namespace bench { + +// Generic initialization and cleanup function types +using InitFunction = std::function; +using CleanupFunction = std::function; + +// Create memory context for benchmark +extern void CreateMemoryContext(); + +// Forward declaration +class BenchmarkRegistry; + +// Global registry access function +BenchmarkRegistry &GetBenchmarkRegistry(); + +// Global initialization and cleanup functions +void GlobalBenchmarkInit(); +void GlobalBenchmarkCleanup(); + +// Registration functions (implemented in pax_gbench.cc) +void RegisterBenchmarkInit(InitFunction func); +void RegisterBenchmarkCleanup(CleanupFunction func); + +} // namespace benchmark +} // namespace pax + +// Convenient registration macros +#define REGISTER_BENCHMARK_INIT(func) \ + static bool BENCHMARK_INIT_##__COUNTER__ = []() { \ + pax::bench::RegisterBenchmarkInit(func); \ + return true; \ + }() + +#define REGISTER_BENCHMARK_CLEANUP(func) \ + static bool BENCHMARK_CLEANUP_##__COUNTER__ = []() { \ + pax::bench::RegisterBenchmarkCleanup(func); \ + return true; \ + }() diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc index dfd346ef615..f39e453cfee 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc @@ -697,7 +697,6 @@ TEST_P(PaxNonFixedColumnCompressTest, auto number = ::testing::get<0>(GetParam()); auto kind = ::testing::get<1>(GetParam()); auto verify_range = ::testing::get<2>(GetParam()); - auto enable_offsets_encoding = ::testing::get<2>(GetParam()); const size_t number_of_rows = 1024; PaxEncoder::EncodingOption encoding_option; @@ -705,10 +704,9 @@ TEST_P(PaxNonFixedColumnCompressTest, encoding_option.compress_level = 5; encoding_option.is_sign = true; - if (enable_offsets_encoding) { - encoding_option.offsets_encode_type = kind; - encoding_option.offsets_compress_level = 5; - } + encoding_option.offsets_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + encoding_option.offsets_compress_level = 5; non_fixed_column = new PaxNonFixedEncodingColumn( number_of_rows, number_of_rows, std::move(encoding_option)); @@ -744,10 +742,9 @@ TEST_P(PaxNonFixedColumnCompressTest, decoding_option.is_sign = true; decoding_option.compress_level = 5; - if (enable_offsets_encoding) { - decoding_option.offsets_encode_type = kind; - decoding_option.offsets_compress_level = 5; - } + decoding_option.offsets_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + decoding_option.offsets_compress_level = 5; auto non_fixed_column_for_read = new PaxNonFixedEncodingColumn( number_of_rows * number, sizeof(int32) * number_of_rows, @@ -801,6 +798,9 @@ INSTANTIATE_TEST_SUITE_P( PaxColumnEncodingTestCombine, PaxColumnCompressTest, testing::Combine(testing::Values(16, 32, 64), testing::Values(ColumnEncoding_Kind_NO_ENCODED, +#ifdef USE_LZ4 + ColumnEncoding_Kind_COMPRESS_LZ4, +#endif ColumnEncoding_Kind_COMPRESS_ZSTD, ColumnEncoding_Kind_COMPRESS_ZLIB))); @@ -808,6 +808,9 @@ INSTANTIATE_TEST_SUITE_P( PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest, testing::Combine(testing::Values(16, 32, 64), testing::Values(ColumnEncoding_Kind_NO_ENCODED, +#ifdef USE_LZ4 + ColumnEncoding_Kind_COMPRESS_LZ4, +#endif ColumnEncoding_Kind_COMPRESS_ZSTD, ColumnEncoding_Kind_COMPRESS_ZLIB), testing::Values(true, false), diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc index 87a34cbb6d7..f4bae52ea7d 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -50,6 +50,12 @@ std::shared_ptr PaxCompressor::CreateBlockCompressor( compressor = std::make_shared(); break; } +#ifdef USE_LZ4 + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: { + compressor = std::make_shared(); + break; + } +#endif case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError, fmt("Invalid compress type %d", @@ -230,9 +236,12 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) { } size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap, - void *src_buff, size_t src_len, int /*lvl*/) { - return LZ4_compress_default((char *)src_buff, (char *)dst_buff, src_len, - dst_cap); + void *src_buff, size_t src_len, int lvl) { + // acceleration affects compression speed, the larger acceleration value, + // the less compression ratio. + int acceleration = (20 - lvl) / 6; + return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len, + dst_cap, acceleration); } size_t PaxLZ4Compressor::Decompress(void *dst_buff, size_t dst_len, diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress_bench.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress_bench.cc new file mode 100644 index 00000000000..0a792601e99 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress_bench.cc @@ -0,0 +1,421 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pax_compress_bench.cc + * + * IDENTIFICATION + * contrib/pax_storage/src/cpp/storage/columns/pax_compress_bench.cc + * + *------------------------------------------------------------------------- + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/pax_memory.h" +#include "pax_gbench.h" +#include "storage/columns/pax_compress.h" +#include "storage/columns/pax_decoding.h" +#include "storage/columns/pax_delta_encoding.h" +#include "storage/columns/pax_rlev2_encoding.h" +#include "storage/pax_buffer.h" + +namespace pax::bench { + +namespace { + +// Test data and prebuilt buffers for decode/decompress benchmarks +static const size_t kCount = 1024 * 1024; +static std::vector g_offsets; +static std::unique_ptr g_raw_bytes; +static size_t g_raw_len = 0; + +static std::vector g_rle_encoded; +static size_t g_rle_len = 0; + +static std::vector g_delta_encoded; +static size_t g_delta_len = 0; + +static std::unique_ptr g_zstd_compressed; +static size_t g_zstd_len = 0; + +static std::shared_ptr g_zstd; + +// Simple helpers for bench data persistence +static void EnsureDirExists(const char *dir_path) { + if (mkdir(dir_path, 0755) != 0) { + if (errno != EEXIST) { + std::cerr << "Failed to create directory: " << dir_path << std::endl; + std::abort(); + } + } +} + +static bool ReadWholeFile(const char *path, std::vector &out) { + std::ifstream in(path, std::ios::binary); + if (!in.is_open()) return false; + in.seekg(0, std::ios::end); + std::streampos size = in.tellg(); + if (size <= 0) return false; + out.resize(static_cast(size)); + in.seekg(0, std::ios::beg); + in.read(out.data(), size); + return static_cast(in); +} + +static bool ReadWholeFile(const char *path, std::unique_ptr &out, + size_t &out_len) { + std::ifstream in(path, std::ios::binary); + if (!in.is_open()) return false; + in.seekg(0, std::ios::end); + std::streampos size = in.tellg(); + if (size <= 0) return false; + out_len = static_cast(size); + out = std::make_unique(out_len); + in.seekg(0, std::ios::beg); + in.read(out.get(), size); + return static_cast(in); +} + +static void WriteWholeFile(const char *path, const char *data, size_t len) { + std::ofstream out(path, std::ios::binary | std::ios::trunc); + if (!out.is_open()) { + std::cerr << "Failed to open file for write: " << path << std::endl; + std::abort(); + } + out.write(data, static_cast(len)); + if (!out) { + std::cerr << "Failed to write file: " << path << std::endl; + std::abort(); + } +} + +static const char *kBenchDataDir = "bench_data"; +static const char *kRLEV2Path = "bench_data/rle_v2_u32.bin"; +static const char *kDeltaPath = "bench_data/delta_u32.bin"; +static const char *kZSTDPath = "bench_data/zstd_u32.bin"; +static const char *kRawPath = "bench_data/raw_u32.bin"; + +static std::vector GenerateMonotonicOffsets(size_t n, uint32_t seed) { + std::vector offsets; + offsets.resize(n); + offsets[0] = 0; + std::mt19937 rng(seed); + std::uniform_int_distribution step_dist(1, 256); + for (size_t i = 1; i < n; ++i) { + offsets[i] = offsets[i - 1] + static_cast(step_dist(rng)); + } + return offsets; +} + +// Lazily ensure raw bytes are available (prefer loading from disk) +static void EnsureRawData() { + if (g_raw_len != 0 && g_raw_bytes) return; + EnsureDirExists(kBenchDataDir); + std::vector raw_from_file; + if (ReadWholeFile(kRawPath, raw_from_file)) { + g_raw_len = raw_from_file.size(); + g_raw_bytes = std::make_unique(g_raw_len); + std::memcpy(g_raw_bytes.get(), raw_from_file.data(), g_raw_len); + return; + } + // Fallback: generate and persist + g_offsets = GenerateMonotonicOffsets(kCount, /*seed=*/12345); + g_raw_len = g_offsets.size() * sizeof(uint32_t); + g_raw_bytes = std::make_unique(g_raw_len); + std::memcpy(g_raw_bytes.get(), g_offsets.data(), g_raw_len); + WriteWholeFile(kRawPath, g_raw_bytes.get(), g_raw_len); +} + +// Lazily ensure RLEv2 encoded buffer exists (load or build from raw) +static void EnsureRleEncoded() { + if (g_rle_len != 0 && !g_rle_encoded.empty()) return; + EnsureDirExists(kBenchDataDir); + if (ReadWholeFile(kRLEV2Path, g_rle_encoded)) { + g_rle_len = g_rle_encoded.size(); + return; + } + EnsureRawData(); + PaxEncoder::EncodingOption enc_opt; + enc_opt.column_encode_type = ColumnEncoding_Kind_RLE_V2; + enc_opt.is_sign = false; + + PaxOrcEncoder rle_encoder(enc_opt); + auto rle_out = std::make_shared>(g_raw_len); + rle_encoder.SetDataBuffer(rle_out); + // encode directly from raw bytes to avoid depending on g_offsets + size_t count = g_raw_len / sizeof(uint32_t); + const uint32_t *vals = reinterpret_cast(g_raw_bytes.get()); + for (size_t i = 0; i < count; ++i) { + uint32_t v = vals[i]; + rle_encoder.Append(reinterpret_cast(&v), sizeof(uint32_t)); + } + rle_encoder.Flush(); + + g_rle_len = rle_encoder.GetBufferSize(); + g_rle_encoded.assign(rle_encoder.GetBuffer(), + rle_encoder.GetBuffer() + g_rle_len); + WriteWholeFile(kRLEV2Path, g_rle_encoded.data(), g_rle_len); +} + +// Lazily ensure Delta encoded buffer exists (load or build from raw) +static void EnsureDeltaEncoded() { + if (g_delta_len != 0 && !g_delta_encoded.empty()) return; + EnsureDirExists(kBenchDataDir); + if (ReadWholeFile(kDeltaPath, g_delta_encoded)) { + g_delta_len = g_delta_encoded.size(); + return; + } + EnsureRawData(); + PaxEncoder::EncodingOption enc_opt; + enc_opt.is_sign = false; + // type not used by PaxDeltaEncoder + PaxDeltaEncoder delta_encoder(enc_opt); + auto delta_out = std::make_shared>(g_raw_len); + delta_encoder.SetDataBuffer(delta_out); + // Encode whole array in one shot + delta_encoder.Append(g_raw_bytes.get(), g_raw_len); + delta_encoder.Flush(); + + g_delta_len = delta_encoder.GetBufferSize(); + g_delta_encoded.assign(delta_encoder.GetBuffer(), + delta_encoder.GetBuffer() + g_delta_len); + WriteWholeFile(kDeltaPath, g_delta_encoded.data(), g_delta_len); +} + +// Lazily ensure ZSTD compressed buffer exists (load or build from raw) +static void EnsureZstdCompressed() { + EnsureDirExists(kBenchDataDir); + if (!g_zstd) { + g_zstd = + PaxCompressor::CreateBlockCompressor(ColumnEncoding_Kind_COMPRESS_ZSTD); + if (!g_zstd) { + std::cerr << "Failed to create ZSTD compressor" << std::endl; + std::abort(); + } + } + if (g_zstd_len != 0 && g_zstd_compressed) return; + if (ReadWholeFile(kZSTDPath, g_zstd_compressed, g_zstd_len)) { + return; + } + EnsureRawData(); + size_t bound = g_zstd->GetCompressBound(g_raw_len); + g_zstd_compressed = std::make_unique(bound); + g_zstd_len = g_zstd->Compress(g_zstd_compressed.get(), bound, + g_raw_bytes.get(), g_raw_len, /*lvl=*/5); + if (g_zstd->IsError(g_zstd_len) || g_zstd_len == 0) { + std::cerr << "ZSTD one-time compress failed" << std::endl; + std::abort(); + } + WriteWholeFile(kZSTDPath, g_zstd_compressed.get(), g_zstd_len); +} + +static void PrepareOnce() { + pax::bench::CreateMemoryContext(); + EnsureDirExists(kBenchDataDir); +} + +static void CleanupBenchData() { + const char *files[] = {kRLEV2Path, kDeltaPath, kZSTDPath, kRawPath}; + for (const char *p : files) { + std::remove(p); + } + + rmdir(kBenchDataDir); +} + +} // namespace + +// Register module init with gbench framework +REGISTER_BENCHMARK_INIT(PrepareOnce); +REGISTER_BENCHMARK_CLEANUP(CleanupBenchData); + +// RLEv2 encode benchmark +static void BM_RLEV2_Encode(::benchmark::State &state) { + // Prepare raw data only; no encoded buffers are created here + EnsureRawData(); + for (auto _ : state) { + PaxEncoder::EncodingOption enc_opt; + enc_opt.column_encode_type = ColumnEncoding_Kind_RLE_V2; + enc_opt.is_sign = false; + + PaxOrcEncoder encoder(enc_opt); + auto out = std::make_shared>(g_raw_len); + encoder.SetDataBuffer(out); + + size_t count = g_raw_len / sizeof(uint32_t); + const uint32_t *vals = + reinterpret_cast(g_raw_bytes.get()); + for (size_t i = 0; i < count; ++i) { + uint32_t v = vals[i]; + encoder.Append(reinterpret_cast(&v), sizeof(uint32_t)); + } + encoder.Flush(); + g_rle_len = encoder.GetBufferSize(); + benchmark::DoNotOptimize(encoder.GetBuffer()); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); + state.counters["raw_kb"] = + benchmark::Counter(static_cast(g_raw_len) / (1024.0)); + state.counters["rle_kb"] = + benchmark::Counter(static_cast(g_rle_len) / (1024.0)); +} +BENCHMARK(BM_RLEV2_Encode); + +// RLEv2 decode benchmark +static void BM_RLEV2_Decode(::benchmark::State &state) { + // Ensure we have raw size and encoded buffer ready (prefer from disk) + EnsureRawData(); + EnsureRleEncoded(); + for (auto _ : state) { + PaxDecoder::DecodingOption dec_opt; + dec_opt.column_encode_type = ColumnEncoding_Kind_RLE_V2; + dec_opt.is_sign = false; + + auto decoder = PaxDecoder::CreateDecoder(dec_opt); + auto out = std::make_shared>(g_raw_len); + decoder->SetSrcBuffer(g_rle_encoded.data(), g_rle_len); + decoder->SetDataBuffer(out); + size_t n = decoder->Decoding(); + benchmark::DoNotOptimize(n); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); +} +BENCHMARK(BM_RLEV2_Decode); + +// Delta encode benchmark +static void BM_Delta_Encode(::benchmark::State &state) { + EnsureRawData(); + for (auto _ : state) { + PaxEncoder::EncodingOption enc_opt; + enc_opt.is_sign = false; + PaxDeltaEncoder encoder(enc_opt); + auto out = std::make_shared>(g_raw_len); + encoder.SetDataBuffer(out); + encoder.Append(g_raw_bytes.get(), g_raw_len); + encoder.Flush(); + g_delta_len = encoder.GetBufferSize(); + benchmark::DoNotOptimize(encoder.GetBuffer()); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); + state.counters["delta_kb"] = + benchmark::Counter(static_cast(g_delta_len) / (1024.0)); +} +BENCHMARK(BM_Delta_Encode); + +// Delta decode benchmark +static void BM_Delta_Decode(::benchmark::State &state) { + EnsureRawData(); + EnsureDeltaEncoded(); + for (auto _ : state) { + PaxDecoder::DecodingOption dec_opt; + dec_opt.is_sign = false; + dec_opt.column_encode_type = ColumnEncoding_Kind_DIRECT_DELTA; + PaxDeltaDecoder decoder(dec_opt); + auto out = std::make_shared>(g_raw_len); + decoder.SetSrcBuffer(g_delta_encoded.data(), g_delta_len); + decoder.SetDataBuffer(out); + size_t n = decoder.Decoding(); + if (n != g_raw_len / sizeof(uint32_t) && out->Used() != g_raw_len) { + std::cerr << "Delta decode failed, n: " << n + << ", g_raw_len: " << g_raw_len + << ", g_delta_len: " << g_delta_len + << ", out: Used: " << out->Used() << std::endl; + std::abort(); + } + + if (memcmp(out->GetBuffer(), g_raw_bytes.get(), g_raw_len) != 0) { + std::cerr << "Delta decode failed, out: " << out->GetBuffer() + << ", g_raw_bytes: " << g_raw_bytes.get() << std::endl; + std::abort(); + } + + benchmark::DoNotOptimize(n); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); +} +BENCHMARK(BM_Delta_Decode); + +// ZSTD compress benchmark +static void BM_ZSTD_Compress(::benchmark::State &state) { + EnsureRawData(); + if (!g_zstd) { + g_zstd = + PaxCompressor::CreateBlockCompressor(ColumnEncoding_Kind_COMPRESS_ZSTD); + if (!g_zstd) { + std::cerr << "Failed to create ZSTD compressor" << std::endl; + std::abort(); + } + } + size_t bound = g_zstd->GetCompressBound(g_raw_len); + std::unique_ptr dst(new char[bound]); + for (auto _ : state) { + size_t n = g_zstd->Compress(dst.get(), bound, g_raw_bytes.get(), g_raw_len, + /*lvl=*/5); + g_zstd_len = n; + benchmark::DoNotOptimize(n); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); + state.counters["zstd_kb"] = + benchmark::Counter(static_cast(g_zstd_len) / (1024.0)); +} +BENCHMARK(BM_ZSTD_Compress); + +// ZSTD decompress benchmark +static void BM_ZSTD_Decompress(::benchmark::State &state) { + EnsureRawData(); + EnsureZstdCompressed(); + std::unique_ptr dst(new char[g_raw_len]); + for (auto _ : state) { + size_t n = g_zstd->Decompress(dst.get(), g_raw_len, g_zstd_compressed.get(), + g_zstd_len); + benchmark::DoNotOptimize(n); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(g_raw_len)); +} +BENCHMARK(BM_ZSTD_Decompress); + +} // namespace pax::bench diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc index 7ba0fcd6768..0e15ec52088 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc @@ -31,6 +31,7 @@ #include "comm/pax_memory.h" #include "storage/columns/pax_dict_encoding.h" #include "storage/columns/pax_rlev2_decoding.h" +#include "storage/columns/pax_delta_encoding.h" namespace pax { @@ -47,7 +48,7 @@ std::shared_ptr PaxDecoder::CreateDecoder(const DecodingOption &deco break; } case ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA: { - /// TODO(jiaqizho) support it + decoder = std::make_shared>(decoder_options); break; } case ColumnEncoding_Kind::ColumnEncoding_Kind_DICTIONARY: { diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.cc new file mode 100644 index 00000000000..3f4b5341c4a --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.cc @@ -0,0 +1,511 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pax_delta_encoding.cc + * + * IDENTIFICATION + * contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.cc + * + *------------------------------------------------------------------------- + */ +#include "storage/columns/pax_delta_encoding.h" + +#include +#include +#include + +namespace pax { + +// delta bitpack encoder +template +PaxDeltaEncoder::PaxDeltaEncoder(const EncodingOption &encoder_options) + : PaxEncoder(encoder_options) {} + +template +void PaxDeltaEncoder::Append(char *data, size_t size) { + CBDB_CHECK(!has_append_, cbdb::CException::kExTypeAbort, + fmt("PaxDeltaEncoder::Append only support Append Once")); + has_append_ = true; + + auto T_data = reinterpret_cast(data); + auto T_data_len = size / sizeof(T); + Encode(T_data, T_data_len); +} + +inline uint8_t NumBitsAllowZero(uint32_t value) { + if (value == 0) return 0; + uint8_t bits = 0; + while (value) { + bits++; + value >>= 1; + } + return bits; +} + +// Fast bit width calculation (0 -> 0) +inline static uint8_t FastNumBits(uint32_t v) { +#if defined(__GNUC__) || defined(__clang__) + return v == 0 ? 0 : static_cast(32 - __builtin_clz(v)); +#else + uint8_t bits = 0; + while (v) { + ++bits; + v >>= 1; + } + return bits; +#endif +} + +// 64-bit bit writer based on raw pointer (writes to reserved DataBuffer range) +struct BitWriter64Ptr { + uint8_t *out; + size_t index; + uint64_t bit_buffer; + uint32_t bit_count; + + BitWriter64Ptr(uint8_t *p) : out(p), index(0), bit_buffer(0), bit_count(0) {} + + inline void Append(uint32_t value, uint8_t width) { + if (width == 0) return; + bit_buffer |= (static_cast(value) << bit_count); + bit_count += width; + while (bit_count >= 8) { + out[index++] = static_cast(bit_buffer & 0xFF); + bit_buffer >>= 8; + bit_count -= 8; + } + } + + inline void FlushToByte() { + if (bit_count > 0) { + out[index++] = static_cast(bit_buffer & 0xFF); + bit_buffer = 0; + bit_count = 0; + } + } +}; + +// 64-bit bit reader based on raw pointer (limited to specified payload bytes) +struct BitReader64Ptr { + const uint8_t *in; + size_t size; + size_t index; + uint64_t bit_buffer; + uint32_t bit_count; + + BitReader64Ptr(const uint8_t *p, size_t len) + : in(p), size(len), index(0), bit_buffer(0), bit_count(0) {} + + inline void Ensure(uint32_t need_bits) { + while (bit_count < need_bits && index < size) { + bit_buffer |= (static_cast(in[index]) << bit_count); + ++index; + bit_count += 8; + } + } + + inline uint32_t Read(uint8_t width) { + if (width == 0) return 0; + Ensure(width); + uint32_t result; + if (width == 32) + result = static_cast(bit_buffer & 0xFFFFFFFFull); + else + result = static_cast(bit_buffer & ((1ull << width) - 1)); + bit_buffer >>= width; + bit_count -= width; + return result; + } + + inline void AlignToByte() { + uint32_t drop = bit_count % 8; + if (drop) { + bit_buffer >>= drop; + bit_count -= drop; + } + } +}; + +/* +Overall layout: + DeltaBlockHeader (struct, fixed-size) + - uint32 value_per_block + - uint32 values_per_mini_block + - uint32 total_count + T first_value + [Repeated Block until total_count is exhausted] + - uint32 min_delta + - uint8 bit_widths[ mini_blocks_per_block ] + - uint8 payload[computed from bit_widths] + // bit-packed adjusted deltas, mini-block by mini-block + // within a block: bits are written MSB-first, end aligned to byte +*/ + +template +size_t PaxDeltaEncoder::GetBoundSize(size_t src_len) const { + size_t value_count = src_len / sizeof(T); + size_t block_count = (value_count + value_per_block_ - 1) / value_per_block_; + /* header + first_value + block_count * (min_delta + bit_widths ) + * + payload was eliminated to value_count*/ + return sizeof(DeltaBlockHeader) + sizeof(T) + + block_count * (sizeof(uint32) + mini_blocks_per_block_) + value_count; +} + +template +void PaxDeltaEncoder::Encode(T *data, size_t count) { + // Estimate allocation: by element byte count, sufficient to accommodate + // header and bit stream + if (result_buffer_->Capacity() < + count * sizeof(T) + sizeof(DeltaBlockHeader) + sizeof(T)) { + result_buffer_->ReSize(count * sizeof(T) + sizeof(DeltaBlockHeader) + + sizeof(T)); + } + + DeltaBlockHeader header; + header.value_per_block = value_per_block_; + header.values_per_mini_block = values_per_mini_block_; + header.total_count = count; + // add delta block header + result_buffer_->Write(reinterpret_cast(&header), sizeof(header)); + result_buffer_->Brush(sizeof(header)); + // add base value + result_buffer_->Write(reinterpret_cast(&data[0]), sizeof(data[0])); + result_buffer_->Brush(sizeof(data[0])); + + size_t values_emitted = 1; + T previous_value = data[0]; + + while (values_emitted < count) { + uint32_t values_in_block = std::min( + value_per_block_, static_cast(count - values_emitted)); + + if (deltas_scratch_.size() < values_in_block) { + deltas_scratch_.resize(values_in_block); + } + uint32_t *deltas = deltas_scratch_.data(); + uint32_t min_delta = UINT32_MAX; + uint32_t mini_max[mini_blocks_per_block_] = {0}; + + for (uint32_t i = 0; i < values_in_block; ++i) { + T current = data[values_emitted + i]; + uint32_t delta = static_cast(current - previous_value); + deltas[i] = delta; + previous_value = current; + if (delta < min_delta) min_delta = delta; + uint32_t mini_index = i / values_per_mini_block_; + if (delta > mini_max[mini_index]) mini_max[mini_index] = delta; + } + + // write block header: min_delta later + uint8_t bit_widths[mini_blocks_per_block_] = {0}; + uint64_t total_bits = 0; + for (uint32_t i = 0; i < mini_blocks_per_block_; ++i) { + uint32_t start = i * values_per_mini_block_; + if (start >= values_in_block) { + bit_widths[i] = 0; + continue; + } + uint32_t adjusted_max = mini_max[i] - min_delta; + uint8_t w = FastNumBits(adjusted_max); + bit_widths[i] = w; + uint32_t end = std::min(start + values_per_mini_block_, values_in_block); + total_bits += static_cast(w) * (end - start); + } + uint32_t payload_bytes = static_cast((total_bits + 7) / 8); + + size_t need_size = + payload_bytes + mini_blocks_per_block_ + sizeof(min_delta); + + // Grows the buffer to be at least need_size bytes. To avoid frequent + // resizing, the new capacity is calculated as the maximum of (current + // capacity * 1.5) or (current capacity + need_size). + if (result_buffer_->Available() < need_size) { + size_t inc_size = need_size > (result_buffer_->Capacity() * 0.5) + ? need_size + : result_buffer_->Capacity() * 0.5; + result_buffer_->ReSize(result_buffer_->Capacity() + inc_size); + } + + // write block header: min_delta + result_buffer_->Write(reinterpret_cast(&min_delta), + sizeof(min_delta)); + result_buffer_->Brush(sizeof(min_delta)); + + // write bit_widths + result_buffer_->Write(reinterpret_cast(bit_widths), + mini_blocks_per_block_); + result_buffer_->Brush(mini_blocks_per_block_); + + uint8_t *payload_ptr = + reinterpret_cast(result_buffer_->GetAvailableBuffer()); + BitWriter64Ptr bw(payload_ptr); + for (uint32_t i = 0; i < mini_blocks_per_block_; ++i) { + uint32_t start = i * values_per_mini_block_; + if (start >= values_in_block) break; + uint32_t end = std::min(start + values_per_mini_block_, values_in_block); + uint8_t w = bit_widths[i]; + if (w == 0) continue; + for (uint32_t j = start; j < end; ++j) { + uint32_t adjusted = deltas[j] - min_delta; + bw.Append(adjusted, w); + } + } + bw.FlushToByte(); + result_buffer_->Brush(payload_bytes); + + values_emitted += values_in_block; + } +} + +template +bool PaxDeltaEncoder::SupportAppendNull() const { + return false; +} + +template +void PaxDeltaEncoder::Flush() { + // do nothing +} + +// Specialized reading of one mini-block and batch writing results +// (BitReader64Ptr) +template +inline void ReadMiniBlockSpecializedPtr(BitReader64Ptr &br, T *out_values, + T ¤t_value, uint32_t count_in_mb, + uint32_t min_delta, uint8_t w) { + switch (w) { + case 0: { + for (uint32_t j = 0; j < count_in_mb; ++j) { + current_value = + static_cast(static_cast(current_value) + min_delta); + out_values[j] = current_value; + } + return; + } + case 8: { + for (uint32_t j = 0; j < count_in_mb; ++j) { + uint32_t adjusted = br.Read(8); + current_value = static_cast(static_cast(current_value) + + adjusted + min_delta); + out_values[j] = current_value; + } + return; + } + case 16: { + for (uint32_t j = 0; j < count_in_mb; ++j) { + uint32_t adjusted = br.Read(16); + current_value = static_cast(static_cast(current_value) + + adjusted + min_delta); + out_values[j] = current_value; + } + return; + } + case 32: { + for (uint32_t j = 0; j < count_in_mb; ++j) { + uint32_t adjusted = br.Read(32); + current_value = static_cast(static_cast(current_value) + + adjusted + min_delta); + out_values[j] = current_value; + } + return; + } + default: { + uint32_t j = 0; + const uint32_t n4 = count_in_mb & ~3u; + for (; j < n4; j += 4) { + uint32_t a0 = br.Read(w); + uint32_t a1 = br.Read(w); + uint32_t a2 = br.Read(w); + uint32_t a3 = br.Read(w); + current_value = static_cast(static_cast(current_value) + + a0 + min_delta); + out_values[j] = current_value; + current_value = static_cast(static_cast(current_value) + + a1 + min_delta); + out_values[j + 1] = current_value; + current_value = static_cast(static_cast(current_value) + + a2 + min_delta); + out_values[j + 2] = current_value; + current_value = static_cast(static_cast(current_value) + + a3 + min_delta); + out_values[j + 3] = current_value; + } + for (; j < count_in_mb; ++j) { + uint32_t a = br.Read(w); + current_value = static_cast(static_cast(current_value) + + a + min_delta); + out_values[j] = current_value; + } + return; + } + } +} + +// Specialized reading of one mini-block and batch writing results +template +PaxDeltaDecoder::PaxDeltaDecoder( + const PaxDecoder::DecodingOption &encoder_options) + : PaxDecoder(encoder_options), + data_buffer_(nullptr), + result_buffer_(nullptr) { + CBDB_CHECK(encoder_options.column_encode_type == + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA, + cbdb::CException::kExTypeAbort, + fmt("PaxDeltaDecoder only support DIRECT_DELTA encoding")); + // TODO: if sign is true, should use zigzag encoding, now use delta encoding + // for offsets in non-fixed columns + CBDB_CHECK(encoder_options.is_sign == false, + cbdb::CException::kExTypeUnImplements, + fmt("PaxDeltaDecoder is not supported for signed data, " + "will support zigzag later")); +} + +template +PaxDecoder *PaxDeltaDecoder::SetSrcBuffer(char *data, size_t data_len) { + if (data) { + data_buffer_ = + std::make_shared>(data, data_len, false, false); + data_buffer_->Brush(data_len); + } + return this; +} + +template +PaxDecoder *PaxDeltaDecoder::SetDataBuffer( + std::shared_ptr> result_buffer) { + result_buffer_ = result_buffer; + return this; +} + +template +const char *PaxDeltaDecoder::GetBuffer() const { + return result_buffer_ ? result_buffer_->GetBuffer() : nullptr; +} + +template +size_t PaxDeltaDecoder::GetBufferSize() const { + return result_buffer_ ? result_buffer_->Used() : 0; +} + +template +size_t PaxDeltaDecoder::Next(const char * /*not_null*/) { + CBDB_RAISE(cbdb::CException::kExTypeUnImplements); +} + +template +size_t PaxDeltaDecoder::Decoding() { + if (!data_buffer_) return 0; + Assert(result_buffer_); + + const uint8_t *p = + reinterpret_cast(data_buffer_->GetBuffer()); + uint32_t remaining = static_cast(data_buffer_->Used()); + + // read header: values_per_block, values_per_mini_block_, total_count, + // first_value + DeltaBlockHeader header; + std::memcpy(&header, p, sizeof(header)); + p += sizeof(header); + remaining -= sizeof(header); + uint32_t values_per_block = header.value_per_block; + uint32_t values_per_mini_block_ = header.values_per_mini_block; + uint32_t total_count = header.total_count; + + T first_value; + std::memcpy(&first_value, p, sizeof(T)); + p += sizeof(T); + remaining -= sizeof(T); + + // reserve output buffer + if (result_buffer_->Capacity() < total_count * sizeof(T)) { + result_buffer_->ReSize(total_count * sizeof(T)); + } + + // write first value + T current_value = static_cast(first_value); + result_buffer_->Write(reinterpret_cast(¤t_value), sizeof(T)); + result_buffer_->Brush(sizeof(T)); + uint32_t decoded = 1; + + const uint32_t mini_blocks_per_block_ = + values_per_block / values_per_mini_block_; + + while (decoded < total_count && remaining > 0) { + uint32_t min_delta; + std::memcpy(&min_delta, p, sizeof(min_delta)); + p += sizeof(min_delta); + remaining -= sizeof(min_delta); + + if (remaining < mini_blocks_per_block_) break; + + uint8_t bit_widths[mini_blocks_per_block_] = {0}; + for (uint32_t i = 0; i < mini_blocks_per_block_; ++i) { + bit_widths[i] = *p++; + --remaining; + } + + uint32_t values_in_block = + std::min(values_per_block, total_count - decoded); + + // read payload: initialize reader with remaining bytes; we'll compute + // consumed + BitReader64Ptr br(p, remaining); + + for (uint32_t i = 0; i < mini_blocks_per_block_ && decoded < total_count; + ++i) { + uint32_t start = i * values_per_mini_block_; + if (start >= values_in_block) break; + uint32_t end = std::min(start + values_per_mini_block_, values_in_block); + uint32_t cnt = end - start; + uint8_t w = bit_widths[i]; + + T *out_base = reinterpret_cast(result_buffer_->GetAvailableBuffer()); + ReadMiniBlockSpecializedPtr(br, out_base, current_value, cnt, + min_delta, w); + result_buffer_->Brush(cnt * sizeof(T)); + decoded += cnt; + } + + br.AlignToByte(); + + size_t consumed = br.index; + p += consumed; + remaining -= consumed; + } + + Assert(result_buffer_->Used() == total_count * sizeof(T)); + + return result_buffer_->Used(); +} + +template +size_t PaxDeltaDecoder::Decoding(const char * /*not_null*/, + size_t /*not_null_len*/) { + CBDB_RAISE(cbdb::CException::kExTypeUnImplements); +} + +template class PaxDeltaEncoder; +template class PaxDeltaDecoder; +// Add explicit instantiations for signed integral types used by CreateDecoder +template class PaxDeltaDecoder; +template class PaxDeltaDecoder; +template class PaxDeltaDecoder; +template class PaxDeltaDecoder; + +} // namespace pax \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.h new file mode 100644 index 00000000000..7f2251201bf --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.h @@ -0,0 +1,135 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pax_delta_encoding.h + * + * IDENTIFICATION + * contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding.h + * + *------------------------------------------------------------------------- + */ +#pragma once + +#include "storage/columns/pax_encoding.h" +#include "storage/columns/pax_decoding.h" +#include + +namespace pax { + +struct BitReader64 { + const uint8_t*& p; + uint32_t& remaining; + uint64_t bit_buffer = 0; + uint32_t bit_count = 0; + + BitReader64(const uint8_t*& ptr, uint32_t& size) : p(ptr), remaining(size) {} + + inline void Ensure(uint32_t need_bits) { + while (bit_count < need_bits && remaining > 0) { + bit_buffer |= (static_cast(*p) << bit_count); + ++p; + --remaining; + bit_count += 8; + } + } + + inline uint32_t Read(uint8_t width) { + if (width == 0) return 0; + Ensure(width); + uint32_t result; + if (width == 32) { + result = static_cast(bit_buffer & 0xFFFFFFFFull); + } else { + result = static_cast(bit_buffer & ((1ull << width) - 1)); + } + bit_buffer >>= width; + bit_count -= width; + return result; + } + + inline void AlignToByte() { + uint32_t drop = bit_count % 8; + if (drop) { + bit_buffer >>= drop; + bit_count -= drop; + } + } +}; + +struct DeltaBlockHeader { + uint32_t value_per_block; + uint32_t values_per_mini_block; + uint32_t total_count; +}; + +template +class PaxDeltaEncoder : public PaxEncoder { + public: + explicit PaxDeltaEncoder(const EncodingOption &encoder_options); + + virtual void Append(char *data, size_t size) override; + + virtual bool SupportAppendNull() const override; + + virtual void Flush() override; + + virtual size_t GetBoundSize(size_t src_len) const override; + + private: + + void Encode(T *data, size_t size); + + private: + static constexpr uint32_t value_per_block_ = 128; + static constexpr uint32_t mini_blocks_per_block_ = 4; + static constexpr uint32_t values_per_mini_block_ = + value_per_block_ / mini_blocks_per_block_; + + private: + bool has_append_ = false; + // Reusable working buffer to avoid per-block allocations during encoding + std::vector deltas_scratch_; +}; + +template +class PaxDeltaDecoder : public PaxDecoder { + public: + explicit PaxDeltaDecoder(const PaxDecoder::DecodingOption &encoder_options); + + virtual PaxDecoder *SetSrcBuffer(char *data, size_t data_len) override; + + virtual PaxDecoder *SetDataBuffer( + std::shared_ptr> result_buffer) override; + + virtual size_t Next(const char *not_null) override; + + virtual size_t Decoding() override; + + virtual size_t Decoding(const char *not_null, size_t not_null_len) override; + + virtual const char *GetBuffer() const override; + + virtual size_t GetBufferSize() const override; + + private: + std::shared_ptr> data_buffer_; + std::shared_ptr> result_buffer_; +}; + +} // namespace pax \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding_test.cc new file mode 100644 index 00000000000..031563381ee --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding_test.cc @@ -0,0 +1,339 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pax_delta_encoding_test.cc + * + * IDENTIFICATION + * contrib/pax_storage/src/cpp/storage/columns/pax_delta_encoding_test.cc + * + *------------------------------------------------------------------------- + */ + +#include "storage/columns/pax_delta_encoding.h" + +#include +#include + +#include "comm/gtest_wrappers.h" +#include "pax_gtest_helper.h" + +namespace pax { + +class PaxDeltaEncodingTest : public ::testing::Test { + protected: + void SetUp() override { + // Create encoding options + encoding_options_.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + encoding_options_.is_sign = false; + + // Create decoding options + decoding_options_.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + decoding_options_.is_sign = false; + } + + void TearDown() override {} + + // Fast bit width calculation (0 -> 0) + inline uint8_t FastNumBits(uint32_t v) { +#if defined(__GNUC__) || defined(__clang__) + return v == 0 ? 0 : static_cast(32 - __builtin_clz(v)); +#else + uint8_t bits = 0; + while (v) { + ++bits; + v >>= 1; + } + return bits; +#endif + } + + // Helper function to encode and decode data + template + std::vector EncodeAndDecode(const std::vector &input) { + // Create encoder + PaxDeltaEncoder encoder(encoding_options_); + + size_t bound_size = encoder.GetBoundSize(input.size() * sizeof(T)); + + encoder.SetDataBuffer(std::make_shared>(bound_size)); + + // Encode data + encoder.Append(reinterpret_cast(const_cast(input.data())), + input.size() * sizeof(T)); + + // Get encoded buffer + const char *encoded_data = encoder.GetBuffer(); + size_t encoded_size = encoder.GetBufferSize(); + + // Create decoder + PaxDeltaDecoder decoder(decoding_options_); + + // Set source buffer + decoder.SetSrcBuffer(const_cast(encoded_data), encoded_size); + + // Create result buffer + auto result_buffer = + std::make_shared>(input.size() * sizeof(T)); + decoder.SetDataBuffer(result_buffer); + + // Decode + size_t decoded_size = decoder.Decoding(); + + // Convert result back to vector + const T *decoded_data = reinterpret_cast(decoder.GetBuffer()); + size_t count = decoded_size / sizeof(T); + + return std::vector(decoded_data, decoded_data + count); + } + + PaxEncoder::EncodingOption encoding_options_; + PaxDecoder::DecodingOption decoding_options_; +}; + +// Test basic functionality +TEST_F(PaxDeltaEncodingTest, BasicEncodeDecode) { + std::vector input = {1, 2, 3, 4, 5}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test example from documentation - consecutive sequence +TEST_F(PaxDeltaEncodingTest, ConsecutiveSequence) { + std::vector input = {1, 2, 3, 4, 5}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); + + // Verify deltas would be [1, 1, 1, 1] with min_delta = 1 + // and adjusted deltas [0, 0, 0, 0] with bit_width = 0 +} + +// Test example from documentation - sequence with variation +TEST_F(PaxDeltaEncodingTest, SequenceWithVariation) { + std::vector input = {7, 5, 3, 1, 2, 3, 4, 5}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); + + // Verify deltas would be [-2, -2, -2, 1, 1, 1, 1] with min_delta = -2 + // Since we cast to uint32, -2 becomes a large positive number + // adjusted deltas would be [0, 0, 0, 3, 3, 3, 3] with bit_width = 2 +} + +// Test single value +TEST_F(PaxDeltaEncodingTest, SingleValue) { + std::vector input = {42}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test two values +TEST_F(PaxDeltaEncodingTest, TwoValues) { + std::vector input = {10, 15}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test large values +TEST_F(PaxDeltaEncodingTest, LargeValues) { + std::vector input = {1000000, 1000001, 1000002, 1000003}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test values with large deltas +TEST_F(PaxDeltaEncodingTest, LargeDeltas) { + std::vector input = {1, 1000, 2000, 3000}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test full block (128 values) +TEST_F(PaxDeltaEncodingTest, FullBlock) { + std::vector input; + for (uint32_t i = 0; i < 128; ++i) { + input.push_back(i); + } + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test multiple blocks +TEST_F(PaxDeltaEncodingTest, MultipleBlocks) { + std::vector input; + for (uint32_t i = 0; i < 250; ++i) { + input.push_back(i); + } + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test random data +TEST_F(PaxDeltaEncodingTest, RandomData) { + std::mt19937 gen(12345); + std::uniform_int_distribution dis(0, 1000000); + + std::vector input; + for (int i = 0; i < 100; ++i) { + input.push_back(dis(gen)); + } + + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test payload size calculation +TEST_F(PaxDeltaEncodingTest, PayloadSizeCalculation) { + std::vector input = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 56, 63, 89}; + // Test the specific example: deltas [0,0,0,0,0,0,0,0,...,0,22,6,25] with + // bit_width 0,5,0,0 + + PaxDeltaEncoder encoder(encoding_options_); + size_t bound_size = encoder.GetBoundSize(input.size() * sizeof(uint32_t)); + encoder.SetDataBuffer(std::make_shared>(bound_size)); + encoder.Append(reinterpret_cast(input.data()), + input.size() * sizeof(uint32_t)); + + // Verify the encoded data structure manually + const char *encoded_data = encoder.GetBuffer(); + size_t encoded_size = encoder.GetBufferSize(); + + EXPECT_GT(encoded_size, 0); + + // Parse the encoded data + const uint8_t *p = reinterpret_cast(encoded_data); + + // Read header + DeltaBlockHeader header; + std::memcpy(&header, p, sizeof(header)); + p += sizeof(header); + + EXPECT_EQ(header.value_per_block, 128); + EXPECT_EQ(header.values_per_mini_block, 32); + EXPECT_EQ(header.total_count, input.size()); + + // Read first value + uint32_t first_value; + std::memcpy(&first_value, p, sizeof(first_value)); + p += sizeof(first_value); + EXPECT_EQ(first_value, 1); + + // Read block data + uint32_t min_delta; + std::memcpy(&min_delta, p, sizeof(min_delta)); + p += sizeof(min_delta); + + // Read allbit widths + uint8_t bit_widths[4]; + for (int i = 0; i < 4; ++i) { + bit_widths[i] = *p++; + } + + // bit_widths should be [0, 6, 0, 0] + ASSERT_EQ(bit_widths[0], 0); + ASSERT_EQ(bit_widths[1], 5); + ASSERT_EQ(bit_widths[2], 0); + ASSERT_EQ(bit_widths[3], 0); + + // Compute payload size from bit_widths and counts + uint32_t values_in_block = + input.size() - 1; // we constructed input with 35 deltas in first block + uint64_t total_bits = 0; + for (uint32_t i = 0; i < 4; ++i) { + uint32_t start = i * 32; + if (start >= values_in_block) break; + uint32_t end = std::min(start + 32u, values_in_block); + uint8_t w = bit_widths[i]; + total_bits += static_cast(w) * (end - start); + } + uint32_t payload_size = static_cast((total_bits + 7) / 8); + + // For this example, we expect payload_size = 2 bytes + EXPECT_EQ(payload_size, 2); + + // Assert payload bitmap is correct + uint8_t payload[4]; + std::memcpy(payload, p, 4); + p += 4; + + // payload should be LSB-Last, value is(22,6,25) + // [0b10110, 0b00110, 0b11001] + EXPECT_EQ(payload[0], 0b11010110); + EXPECT_EQ(payload[1], 0b01100100); +} + +// Test bit width calculation helper +TEST_F(PaxDeltaEncodingTest, BitWidthCalculation) { + EXPECT_EQ(FastNumBits(0), 0); + EXPECT_EQ(FastNumBits(1), 1); + EXPECT_EQ(FastNumBits(2), 2); + EXPECT_EQ(FastNumBits(3), 2); + EXPECT_EQ(FastNumBits(4), 3); + EXPECT_EQ(FastNumBits(7), 3); + EXPECT_EQ(FastNumBits(8), 4); + EXPECT_EQ(FastNumBits(15), 4); + EXPECT_EQ(FastNumBits(16), 5); + EXPECT_EQ(FastNumBits(255), 8); + EXPECT_EQ(FastNumBits(256), 9); +} + +// Test zero deltas (all same values) +TEST_F(PaxDeltaEncodingTest, ZeroDeltas) { + std::vector input = {42, 42, 42, 42, 42}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test decreasing sequence (negative deltas) +TEST_F(PaxDeltaEncodingTest, DecreasingSequence) { + std::vector input = {100, 90, 80, 70, 60}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test mixed pattern +TEST_F(PaxDeltaEncodingTest, MixedPattern) { + std::vector input = {10, 20, 15, 25, 5, 30, 1, 35}; + auto output = EncodeAndDecode(input); + EXPECT_EQ(input, output); +} + +// Test empty input (edge case) +TEST_F(PaxDeltaEncodingTest, EmptyInput) { + std::vector input = {}; + // This should handle gracefully or throw expected exception + // For now, let's skip this test until we clarify expected behavior +} + +// Test different data types +TEST_F(PaxDeltaEncodingTest, DifferentTypes) { + // Test int32_t (with non-negative values) + std::vector input32 = {1, 2, 3, 4, 5}; + auto output32 = EncodeAndDecode(input32); + EXPECT_EQ(input32, output32); +} + +} // namespace pax + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_dict_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_dict_encoding.h index e552fa7a55a..38f3ba217db 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_dict_encoding.h +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_dict_encoding.h @@ -53,6 +53,10 @@ class PaxDictEncoder final : public PaxEncoder { void Flush() override; + size_t GetBoundSize(size_t src_len) const override { + CBDB_RAISE(cbdb::CException::kExTypeUnImplements); + } + private: size_t AppendInternal(char *data, size_t len); @@ -89,7 +93,8 @@ class PaxDictDecoder final : public PaxDecoder { PaxDecoder *SetSrcBuffer(char *data, size_t data_len) override; - PaxDecoder *SetDataBuffer(std::shared_ptr> result_buffer) override; + PaxDecoder *SetDataBuffer( + std::shared_ptr> result_buffer) override; const char *GetBuffer() const override; @@ -121,8 +126,8 @@ class PaxDictDecoder final : public PaxDecoder { buffer = src_buff->GetBuffer(); - index_buffer = - std::make_shared>((int32 *)buffer, head.indexsz, false, false); + index_buffer = std::make_shared>( + (int32 *)buffer, head.indexsz, false, false); index_buffer->BrushAll(); desc_buffer = std::make_shared>( @@ -130,8 +135,8 @@ class PaxDictDecoder final : public PaxDecoder { false); desc_buffer->BrushAll(); - entry_buffer = std::make_shared>(buffer + head.indexsz, head.dictsz, - false, false); + entry_buffer = std::make_shared>( + buffer + head.indexsz, head.dictsz, false, false); entry_buffer->BrushAll(); return std::make_tuple(index_buffer, entry_buffer, desc_buffer); diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc index 3a354ceec8d..b11b2b7b6bd 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc @@ -33,6 +33,7 @@ #include "comm/pax_memory.h" #include "storage/columns/pax_dict_encoding.h" #include "storage/columns/pax_rlev2_encoding.h" +#include "storage/columns/pax_delta_encoding.h" namespace pax { @@ -56,8 +57,7 @@ std::shared_ptr PaxEncoder::CreateStreamingEncoder( break; } case ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA: { - // TODO(jiaqizho): support direct delta encoding - // not support yet, then direct return a nullptr(means no encoding) + encoder = std::make_shared>(encoder_options); break; } case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h index 362e68caa13..465c7bf0600 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h @@ -75,6 +75,8 @@ class PaxEncoder { virtual size_t GetBufferSize() const; + virtual size_t GetBoundSize(size_t src_len) const = 0; + /** * steaming encoder * diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc index 25b6d2f1d6d..90060050236 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc @@ -59,21 +59,37 @@ void PaxNonFixedEncodingColumn::InitEncoder() { } void PaxNonFixedEncodingColumn::InitOffsetStreamCompressor() { - Assert(encoder_options_.offsets_encode_type != - ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED); - offsets_compressor_ = PaxCompressor::CreateBlockCompressor( - encoder_options_.offsets_encode_type); + Assert(encoder_options_.offsets_encode_type == + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA); + SetOffsetsEncodeType(encoder_options_.offsets_encode_type); SetOffsetsCompressLevel(encoder_options_.offsets_compress_level); + + PaxEncoder::EncodingOption opt = encoder_options_; + opt.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + opt.is_sign = false; + // offsets are fixed-width, do not enable non_fixed streaming restriction + offsets_encoder_ = PaxEncoder::CreateStreamingEncoder(opt, false); } void PaxNonFixedEncodingColumn::InitOffsetStreamDecompressor() { Assert(decoder_options_.offsets_encode_type != ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED); - offsets_compressor_ = PaxCompressor::CreateBlockCompressor( - decoder_options_.offsets_encode_type); SetOffsetsEncodeType(decoder_options_.offsets_encode_type); SetOffsetsCompressLevel(decoder_options_.offsets_compress_level); + + if (decoder_options_.offsets_encode_type == + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA) { + PaxDecoder::DecodingOption temp_opt = decoder_options_; + temp_opt.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + temp_opt.is_sign = false; + offsets_decoder_ = PaxDecoder::CreateDecoder(temp_opt); + } else { + offsets_compressor_ = PaxCompressor::CreateBlockCompressor( + decoder_options_.offsets_encode_type); + } } void PaxNonFixedEncodingColumn::InitDecoder() { @@ -169,9 +185,13 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr> data, auto offsets_decompress = [&]() { Assert(!compress_route_); - Assert(offsets_compressor_); + Assert(offsets_compressor_ || offsets_decoder_); + + if (offsets->Used() == 0) { + return; + } - if (offsets->Used() != 0) { + if (offsets_compressor_) { auto d_size = offsets_compressor_->Decompress( PaxNonFixedColumn::offsets_->Start(), PaxNonFixedColumn::offsets_->Capacity(), offsets->Start(), @@ -182,22 +202,36 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr> data, fmt("Decompress failed, %s", compressor_->ErrorName(d_size))); } PaxNonFixedColumn::offsets_->Brush(d_size); + return; + } + + if (offsets_decoder_) { + // Decode offsets using encoder for int32 stream + shared_offsets_data_ = std::make_shared>( + PaxNonFixedColumn::offsets_->Start(), + PaxNonFixedColumn::offsets_->Capacity(), false, false); + offsets_decoder_->SetDataBuffer(shared_offsets_data_); + offsets_decoder_->SetSrcBuffer(offsets->Start(), offsets->Used()); + offsets_decoder_->Decoding(); + PaxNonFixedColumn::offsets_->Brush(shared_offsets_data_->Used()); + return; } }; exist_decoder = compressor_ || decoder_; + bool has_offsets_processor = offsets_compressor_ || offsets_decoder_; - if (exist_decoder && offsets_compressor_) { + if (exist_decoder && has_offsets_processor) { data_decompress(); offsets_decompress(); PaxNonFixedColumn::estimated_size_ = total_size; PaxNonFixedColumn::next_offsets_ = -1; - } else if (exist_decoder && !offsets_compressor_) { + } else if (exist_decoder && !has_offsets_processor) { data_decompress(); PaxNonFixedColumn::offsets_ = offsets; PaxNonFixedColumn::estimated_size_ = total_size; PaxNonFixedColumn::next_offsets_ = -1; - } else if (!exist_decoder && offsets_compressor_) { + } else if (!exist_decoder && has_offsets_processor) { PaxNonFixedColumn::data_ = data; offsets_decompress(); PaxNonFixedColumn::estimated_size_ = total_size; @@ -278,17 +312,17 @@ std::pair PaxNonFixedEncodingColumn::GetOffsetBuffer( AppendLastOffset(); } - if (offsets_compressor_ && compress_route_) { - if (shared_offsets_data_) { - return std::make_pair(shared_offsets_data_->Start(), - shared_offsets_data_->Used()); - } + if (shared_offsets_data_) { + return std::make_pair(shared_offsets_data_->Start(), + shared_offsets_data_->Used()); + } - if (PaxNonFixedColumn::offsets_->Used() == 0) { - // should never append last offset again - return PaxNonFixedColumn::GetOffsetBuffer(false); - } + if (PaxNonFixedColumn::offsets_->Used() == 0) { + // should never append last offset again + return PaxNonFixedColumn::GetOffsetBuffer(false); + } + if (offsets_compressor_ && compress_route_) { size_t bound_size = offsets_compressor_->GetCompressBound( PaxNonFixedColumn::offsets_->Used()); shared_offsets_data_ = std::make_shared>(bound_size); @@ -308,6 +342,20 @@ std::pair PaxNonFixedEncodingColumn::GetOffsetBuffer( shared_offsets_data_->Used()); } + if (offsets_encoder_ && compress_route_) { + // For delta encoder, allocate a buffer sized by raw bytes for safety + size_t bound_size = offsets_encoder_->GetBoundSize(offsets_->Used()); + shared_offsets_data_ = std::make_shared>(bound_size); + offsets_encoder_->SetDataBuffer(shared_offsets_data_); + + // Encode entire offsets buffer as a single stream + offsets_encoder_->Append(offsets_->Start(), offsets_->Used()); + offsets_encoder_->Flush(); + + return std::make_pair(shared_offsets_data_->Start(), + shared_offsets_data_->Used()); + } + // no compress or uncompressed // should never append last offset again return PaxNonFixedColumn::GetOffsetBuffer(false); diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h index b4e956cfe4a..06b60d02ac2 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h @@ -83,6 +83,9 @@ class PaxNonFixedEncodingColumn : public PaxNonFixedColumn { std::shared_ptr> shared_data_; std::shared_ptr offsets_compressor_; + // Optional encoder/decoder for offsets stream (alternative to compression) + std::shared_ptr offsets_encoder_; + std::shared_ptr offsets_decoder_; std::shared_ptr> shared_offsets_data_; }; diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc index 5fa7fb7153c..b3a7ec59458 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc @@ -1361,4 +1361,96 @@ TEST_F(PaxEncodingTest, TestEncodingWithAllNULL) { ASSERT_EQ(n_read, shared_dst_data->Used()); } +TEST_F(PaxEncodingTest, TestPaxDeltaEncodingBasic) { + std::vector data_vec{100, 101, 102, 105, 106, 110, 120, 121}; + auto shared_data = std::make_shared>(1024); + auto shared_dst_data = std::make_shared>(1024); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + encoder_options.is_sign = false; + auto encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + ASSERT_TRUE(encoder); + encoder->SetDataBuffer(shared_data); + encoder->Append(reinterpret_cast(data_vec.data()), data_vec.size() * sizeof(uint32_t)); + encoder->Flush(); + + ASSERT_NE(encoder->GetBuffer(), nullptr); + ASSERT_GT(encoder->GetBufferSize(), 0UL); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + decoder_options.is_sign = false; + + auto decoder = PaxDecoder::CreateDecoder(decoder_options); + ASSERT_TRUE(decoder); + decoder->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + ASSERT_EQ(shared_dst_data->Used(), data_vec.size() * sizeof(int32)); + + auto result_dst_data = std::make_shared>( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < data_vec.size(); ++i) { + ASSERT_EQ((*result_dst_data)[i], static_cast(data_vec[i])); + } +} + +TEST_F(PaxEncodingTest, TestPaxDeltaEncodingRoundTripRandom) { + const size_t n = 1000; + std::vector data_vec(n); + std::mt19937 rng(12345); + std::uniform_int_distribution base_dist(0, 100); + std::uniform_int_distribution step_dist(0, 5); + + data_vec[0] = base_dist(rng); + for (size_t i = 1; i < n; ++i) { + data_vec[i] = data_vec[i - 1] + step_dist(rng); + } + + auto shared_data = std::make_shared>(n * sizeof(uint32_t)); + auto shared_dst_data = std::make_shared>(n * sizeof(uint32_t)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + encoder_options.is_sign = false; + auto encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + ASSERT_TRUE(encoder); + encoder->SetDataBuffer(shared_data); + + encoder->Append(reinterpret_cast(data_vec.data()), data_vec.size() * sizeof(uint32_t)); + encoder->Flush(); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; + decoder_options.is_sign = false; + + auto decoder = PaxDecoder::CreateDecoder(decoder_options); + ASSERT_TRUE(decoder); + decoder->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + ASSERT_EQ(shared_dst_data->Used(), data_vec.size() * sizeof(int32)); + + auto result_dst_data = std::make_shared>( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < data_vec.size(); ++i) { + ASSERT_EQ((*result_dst_data)[i], static_cast(data_vec[i])); + } +} + } // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h index 7d021a1f1cf..f2197258b69 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h @@ -49,6 +49,10 @@ class PaxOrcEncoder final : public PaxEncoder { void Flush() override; + size_t GetBoundSize(size_t src_len) const override { + CBDB_RAISE(cbdb::CException::kExTypeUnImplements); + } + private: struct EncoderContext { bool is_sign; diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.cc index aaf514f5926..8f3aafae2c4 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.cc @@ -348,7 +348,7 @@ void PaxVecNonFixedEncodingColumn::Set( PaxVecNonFixedColumn::estimated_size_ = total_size; PaxVecNonFixedColumn::next_offsets_ = -1; } else { // (!compressor_ && !offsets_compressor_) - PaxVecNonFixedColumn::Set(data, offsets_, total_size, non_null_rows); + PaxVecNonFixedColumn::Set(data, offsets, total_size, non_null_rows); } } diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.h b/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.h index 4362312a5a9..524ddca261a 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.h +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_vec_encoding_column.h @@ -112,6 +112,9 @@ class PaxVecNonFixedEncodingColumn : public PaxVecNonFixedColumn { std::shared_ptr> shared_data_; std::shared_ptr offsets_compressor_; + // Optional encoder/decoder for offsets stream (alternative to compression) + std::shared_ptr offsets_encoder_; + std::shared_ptr offsets_decoder_; std::shared_ptr> shared_offsets_data_; }; diff --git a/contrib/pax_storage/src/cpp/storage/filter/pax_filter.cc b/contrib/pax_storage/src/cpp/storage/filter/pax_filter.cc index d7f752a33ec..5f19ab58400 100644 --- a/contrib/pax_storage/src/cpp/storage/filter/pax_filter.cc +++ b/contrib/pax_storage/src/cpp/storage/filter/pax_filter.cc @@ -44,11 +44,12 @@ namespace pax { PaxFilter::PaxFilter() : sparse_filter_(nullptr), row_filter_(nullptr) {} void PaxFilter::InitSparseFilter(Relation relation, List *quals, + ScanKey key, int nkeys, bool allow_fallback_to_pg) { Assert(!sparse_filter_); sparse_filter_ = std::make_shared(relation, allow_fallback_to_pg); - sparse_filter_->Initialize(quals); + sparse_filter_->Initialize(quals, key, nkeys); } #ifdef VEC_BUILD diff --git a/contrib/pax_storage/src/cpp/storage/filter/pax_filter.h b/contrib/pax_storage/src/cpp/storage/filter/pax_filter.h index 467b841ec89..ebc2fff8538 100644 --- a/contrib/pax_storage/src/cpp/storage/filter/pax_filter.h +++ b/contrib/pax_storage/src/cpp/storage/filter/pax_filter.h @@ -50,7 +50,7 @@ class PaxFilter final { ~PaxFilter() = default; // The sparse filter - void InitSparseFilter(Relation relation, List *quals, + void InitSparseFilter(Relation relation, List *quals, ScanKey key, int nkeys, bool allow_fallback_to_pg = false); #ifdef VEC_BUILD void InitSparseFilter( diff --git a/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_filter.h b/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_filter.h index 504878c4dd2..6efa59a7ff6 100644 --- a/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_filter.h +++ b/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_filter.h @@ -65,7 +65,7 @@ class PaxSparseFilter final { bool ExistsFilterPath() const; - void Initialize(List *quals); + void Initialize(List *quals, ScanKey key, int nkeys); #ifdef VEC_BUILD void Initialize( @@ -83,6 +83,8 @@ class PaxSparseFilter final { private: #endif + std::shared_ptr ProcessScanKey(ScanKey key); + // Used to build the filter tree with the PG quals std::shared_ptr ExprWalker(Expr *expr); Expr *ExprFlatVar(Expr *expr); diff --git a/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_pg_path.cc b/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_pg_path.cc index 0630db6dc21..3a7bc64f389 100644 --- a/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_pg_path.cc +++ b/contrib/pax_storage/src/cpp/storage/filter/pax_sparse_pg_path.cc @@ -36,7 +36,7 @@ namespace pax { -void PaxSparseFilter::Initialize(List *quals) { +void PaxSparseFilter::Initialize(List *quals, ScanKey key, int nkeys) { ListCell *qual_cell; std::vector> fl_nodes; /* first level nodes */ std::string origin_tree_str; @@ -44,10 +44,27 @@ void PaxSparseFilter::Initialize(List *quals) { // no inited Assert(!filter_tree_); - if (!quals) { + if (!quals && nkeys == 0) { return; } + // walk scan key and only support min/max filter now + for (int i = 0; i < nkeys; i++) { + // TODO: support bloom filter in PaxFilter + // but now just skip it, SeqNext() will check bloom filter in PassByBloomFilter() + if (key[i].sk_flags & SK_BLOOM_FILTER) { + continue; + } + + if (key[i].sk_strategy != BTGreaterEqualStrategyNumber && + key[i].sk_strategy != BTLessEqualStrategyNumber) { + continue; + } + std::shared_ptr fl_node = ProcessScanKey(&key[i]); + Assert(fl_node); + fl_nodes.emplace_back(std::move(fl_node)); + } + foreach (qual_cell, quals) { Expr *fl_clause = (Expr *)lfirst(qual_cell); std::shared_ptr fl_node = ExprWalker(fl_clause); @@ -67,6 +84,47 @@ void PaxSparseFilter::Initialize(List *quals) { origin_tree_str.c_str(), DebugString().c_str()); } +std::shared_ptr PaxSparseFilter::ProcessScanKey(ScanKey key) { + std::shared_ptr node = nullptr; + Assert(key); + Assert(!(key->sk_flags & SK_BLOOM_FILTER)); + Assert(key->sk_strategy == BTGreaterEqualStrategyNumber || + key->sk_strategy == BTLessEqualStrategyNumber); + Assert(key->sk_attno > 0 && + key->sk_attno <= RelationGetNumberOfAttributes(rel_)); + + AttrNumber attno = key->sk_attno; + + // Build VarNode on the left + auto var_node = std::make_shared(); + var_node->attrno = attno; + + // Build ConstNode on the right from ScanKey + auto const_node = std::make_shared(); + const_node->const_val = key->sk_argument; + const_node->const_type = key->sk_subtype; + if (key->sk_flags & SK_ISNULL) { + const_node->sk_flags |= SK_ISNULL; + } + + // Build OpNode and attach children: (var, const) + auto op_node = std::make_shared(); + op_node->strategy = key->sk_strategy; + op_node->collation = key->sk_collation; // may be InvalidOid; executor will + // fallback to attr collation + + // Set operand types + Form_pg_attribute attr = TupleDescAttr(RelationGetDescr(rel_), attno - 1); + op_node->left_typid = attr->atttypid; + op_node->right_typid = key->sk_subtype; + + PFTNode::AppendSubNode(op_node, std::move(var_node)); + PFTNode::AppendSubNode(op_node, std::move(const_node)); + + node = op_node; + return node; +} + Expr *PaxSparseFilter::ExprFlatVar(Expr *clause) { Expr *flat_clause = clause; if (unlikely(!clause)) { diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition.h b/contrib/pax_storage/src/cpp/storage/micro_partition.h index 77d61462ad4..56d85b46a74 100644 --- a/contrib/pax_storage/src/cpp/storage/micro_partition.h +++ b/contrib/pax_storage/src/cpp/storage/micro_partition.h @@ -58,7 +58,6 @@ class MicroPartitionWriter { RelFileNode node; bool need_wal = false; std::vector> encoding_opts; - std::pair offsets_encoding_opts; std::vector enable_min_max_col_idxs; std::vector enable_bf_col_idxs; diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc index 63413a2239d..6c8d49502e5 100644 --- a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc +++ b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc @@ -104,7 +104,6 @@ static std::unique_ptr BuildColumns( const std::vector &types, const TupleDesc desc, const std::vector> &column_encoding_types, - const std::pair &offsets_encoding_types, const PaxStorageFormat &storage_format) { std::unique_ptr columns; bool is_vec; @@ -125,14 +124,7 @@ static std::unique_ptr BuildColumns( encoding_option.is_sign = true; encoding_option.compress_level = std::get<1>(column_encoding_types[i]); - if (offsets_encoding_types.first == ColumnEncoding_Kind_DEF_ENCODED) { - // default value of offsets_stream is zstd - encoding_option.offsets_encode_type = ColumnEncoding_Kind_COMPRESS_ZSTD; - encoding_option.offsets_compress_level = 5; - } else { - encoding_option.offsets_encode_type = offsets_encoding_types.first; - encoding_option.offsets_compress_level = offsets_encoding_types.second; - } + encoding_option.offsets_encode_type = ColumnEncoding_Kind_DIRECT_DELTA; switch (type) { case (pax::porc::proto::Type_Kind::Type_Kind_STRING): { @@ -241,10 +233,9 @@ OrcWriter::OrcWriter( Assert(writer_options.rel_tuple_desc->natts == static_cast(column_types.size())); - pax_columns_ = BuildColumns(column_types_, writer_options.rel_tuple_desc, - writer_options.encoding_opts, - writer_options.offsets_encoding_opts, - writer_options.storage_format); + pax_columns_ = + BuildColumns(column_types_, writer_options.rel_tuple_desc, + writer_options.encoding_opts, writer_options.storage_format); summary_.rel_oid = writer_options.rel_oid; summary_.block_id = writer_options.block_id; @@ -300,7 +291,6 @@ void OrcWriter::Flush() { new_columns = BuildColumns(column_types_, writer_options_.rel_tuple_desc, writer_options_.encoding_opts, - writer_options_.offsets_encoding_opts, writer_options_.storage_format); for (size_t i = 0; i < column_types_.size(); ++i) { diff --git a/contrib/pax_storage/src/cpp/storage/pax.cc b/contrib/pax_storage/src/cpp/storage/pax.cc index ab10387c76c..c8d29bbb6ce 100644 --- a/contrib/pax_storage/src/cpp/storage/pax.cc +++ b/contrib/pax_storage/src/cpp/storage/pax.cc @@ -200,8 +200,6 @@ std::unique_ptr TableWriter::CreateMicroPartitionWriter( options.file_name = std::move(file_path); options.encoding_opts = GetRelEncodingOptions(); options.storage_format = GetStorageFormat(); - options.offsets_encoding_opts = std::make_pair( - PAX_OFFSETS_DEFAULT_COMPRESSTYPE, PAX_OFFSETS_DEFAULT_COMPRESSLEVEL); options.enable_min_max_col_idxs = GetMinMaxColumnIndexes(); options.enable_bf_col_idxs = GetBloomFilterColumnIndexes(); @@ -261,8 +259,8 @@ void TableWriter::InitOptionsCaches() { } void TableWriter::Open() { - rel_path_ = cbdb::BuildPaxDirectoryPath( - relation_->rd_node, relation_->rd_backend); + rel_path_ = + cbdb::BuildPaxDirectoryPath(relation_->rd_node, relation_->rd_backend); InitOptionsCaches(); @@ -509,8 +507,8 @@ void TableReader::OpenFile() { if (it.GetExistToast()) { // must exist the file in disk - toast_file = file_system_->Open(it.GetFileName() + TOAST_FILE_SUFFIX, - fs::kReadMode); + toast_file = + file_system_->Open(it.GetFileName() + TOAST_FILE_SUFFIX, fs::kReadMode); } reader_ = MicroPartitionFileFactory::CreateMicroPartitionReader( @@ -588,8 +586,7 @@ void TableDeleter::DeleteWithVisibilityMap( std::unique_ptr visi_bitmap; auto catalog_update = pax::PaxCatalogUpdater::Begin(rel_); - auto rel_path = cbdb::BuildPaxDirectoryPath( - rel_->rd_node, rel_->rd_backend); + auto rel_path = cbdb::BuildPaxDirectoryPath(rel_->rd_node, rel_->rd_backend); min_max_col_idxs = cbdb::GetMinMaxColumnIndexes(rel_); stats_updater_projection->SetColumnProjection(min_max_col_idxs, @@ -662,11 +659,10 @@ void TableDeleter::DeleteWithVisibilityMap( // TODO: update stats and visimap all in one catalog update // Update the stats in pax aux table // Notice that: PAX won't update the stats in group - UpdateStatsInAuxTable(catalog_update, micro_partition_metadata, - std::make_shared(visi_bitmap->Raw()), - min_max_col_idxs, - cbdb::GetBloomFilterColumnIndexes(rel_), - stats_updater_projection); + UpdateStatsInAuxTable( + catalog_update, micro_partition_metadata, + std::make_shared(visi_bitmap->Raw()), min_max_col_idxs, + cbdb::GetBloomFilterColumnIndexes(rel_), stats_updater_projection); // write pg_pax_blocks_oid catalog_update.UpdateVisimap(block_id, visimap_file_name); diff --git a/contrib/pax_storage/src/cpp/storage/pax_defined.h b/contrib/pax_storage/src/cpp/storage/pax_defined.h index b4ce1115af8..5315797ea3a 100644 --- a/contrib/pax_storage/src/cpp/storage/pax_defined.h +++ b/contrib/pax_storage/src/cpp/storage/pax_defined.h @@ -39,7 +39,7 @@ namespace pax { #define BITS_TO_BYTES(bits) (((bits) + 7) / 8) #define PAX_OFFSETS_DEFAULT_COMPRESSTYPE \ - ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD + ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA #define PAX_OFFSETS_DEFAULT_COMPRESSLEVEL 5 #define COLUMN_STORAGE_FORMAT_IS_VEC(column) \ diff --git a/contrib/pax_storage/src/cpp/storage/proto/pax.proto b/contrib/pax_storage/src/cpp/storage/proto/pax.proto index 3e25710027d..765d3e0f8a5 100644 --- a/contrib/pax_storage/src/cpp/storage/proto/pax.proto +++ b/contrib/pax_storage/src/cpp/storage/proto/pax.proto @@ -37,6 +37,7 @@ message ColumnEncoding { COMPRESS_ZLIB = 4; // use ZLIB to compress DICTIONARY = 5; // use dict-endoing + COMPRESS_LZ4 = 6; // use lz4 to compress } optional Kind kind = 1; diff --git a/contrib/pax_storage/src/test/isolation2/input/autovacuum-analyze.source b/contrib/pax_storage/src/test/isolation2/input/autovacuum-analyze.source index 32e79cdd491..19187b107c2 100644 --- a/contrib/pax_storage/src/test/isolation2/input/autovacuum-analyze.source +++ b/contrib/pax_storage/src/test/isolation2/input/autovacuum-analyze.source @@ -178,7 +178,7 @@ SELECT count(*) FROM pg_statistic where starelid = 'autostatstbl'::regclass; select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 1, autoanalyze_count = 0, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Wait until autovacuum is triggered SELECT gp_wait_until_triggered_fault('auto_vac_worker_after_report_activity', 1, 1); @@ -192,7 +192,7 @@ SELECT gp_inject_fault('analyze_finished_one_relation', 'reset', 1); -- we can see the auto-ANALYZE finished select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 1, autoanalyze_count = 1, and n_mod_since_analyze = 0 -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Case 2 -- -- with auto_stats 'on_change' mode, the auto-ANALYZE still trigger @@ -211,7 +211,7 @@ SELECT gp_inject_fault('auto_vac_worker_after_report_activity', 'suspend', '', ' select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 2, autoanalyze_count = 1, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Wait until autovacuum is triggered SELECT gp_wait_until_triggered_fault('auto_vac_worker_after_report_activity', 1, 1); @@ -225,7 +225,7 @@ SELECT gp_inject_fault('analyze_finished_one_relation', 'reset', 1); -- we can see the auto-ANALYZE finished, check statistic and analyze count select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 2, autoanalyze_count = 2, and n_mod_since_analyze = 0 -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Case 3 -- @@ -244,7 +244,7 @@ SELECT gp_inject_fault('auto_vac_worker_after_report_activity', 'suspend', '', ' select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 2, autoanalyze_count = 2, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Wait until autovacuum is triggered SELECT gp_wait_until_triggered_fault('auto_vac_worker_after_report_activity', 1, 1); @@ -258,7 +258,7 @@ SELECT gp_inject_fault('analyze_finished_one_relation', 'reset', 1); -- we can see the auto-ANALYZE finished, check statistic and analyze count select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; -- expect analyze_count = 2, autoanalyze_count = 3, and n_mod_since_analyze = 0 since ANALYZE executed -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; -- Reset GUCs. ALTER SYSTEM RESET autovacuum_naptime; diff --git a/contrib/pax_storage/src/test/isolation2/output/autovacuum-analyze.source b/contrib/pax_storage/src/test/isolation2/output/autovacuum-analyze.source index 0c4b87746e4..dff55111b64 100644 --- a/contrib/pax_storage/src/test/isolation2/output/autovacuum-analyze.source +++ b/contrib/pax_storage/src/test/isolation2/output/autovacuum-analyze.source @@ -422,7 +422,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; (1 row) -- expect analyze_count = 1, autoanalyze_count = 0, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 1 | 0 | 1000 @@ -465,7 +465,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; 1 | 1000 (1 row) -- expect analyze_count = 1, autoanalyze_count = 1, and n_mod_since_analyze = 0 -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 1 | 1 | 0 @@ -502,7 +502,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; (1 row) -- expect analyze_count = 2, autoanalyze_count = 1, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 2 | 1 | 1000 @@ -545,7 +545,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; 1 | 2000 (1 row) -- expect analyze_count = 2, autoanalyze_count = 2, and n_mod_since_analyze = 0 -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 2 | 2 | 0 @@ -581,7 +581,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; (1 row) -- expect analyze_count = 2, autoanalyze_count = 2, and n_mod_since_analyze = 1000 since ANALYZE executed -- in same transaction for the insert statement. -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 2 | 2 | 1000 @@ -624,7 +624,7 @@ select relpages, reltuples from pg_class where oid = 'autostatstbl'::regclass; 1 | 3000 (1 row) -- expect analyze_count = 2, autoanalyze_count = 3, and n_mod_since_analyze = 0 since ANALYZE executed -select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables_internal where relname = 'autostatstbl'; +select analyze_count, autoanalyze_count, n_mod_since_analyze from pg_stat_all_tables where relname = 'autostatstbl'; analyze_count | autoanalyze_count | n_mod_since_analyze ---------------+-------------------+--------------------- 2 | 3 | 0 diff --git a/contrib/pax_storage/src/test/regress/expected/gp_toolkit.out b/contrib/pax_storage/src/test/regress/expected/gp_toolkit.out index 336354081af..745db42283a 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_toolkit.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_toolkit.out @@ -304,7 +304,7 @@ update pg_statistic set stawidth=2034567890 where starelid = 'wide_width_test':: select btdrelpages, btdexppages from gp_toolkit.gp_bloat_expected_pages where btdrelid='wide_width_test'::regclass; btdrelpages | btdexppages -------------+------------- - 4 | 3104504228 + 1 | 3104504228 (1 row) select * from gp_toolkit.gp_bloat_diag WHERE bdinspname <> 'pg_catalog'; diff --git a/contrib/pax_storage/src/test/regress/expected/pg_stat.out b/contrib/pax_storage/src/test/regress/expected/pg_stat.out index 27dc25f957d..761372051ef 100644 --- a/contrib/pax_storage/src/test/regress/expected/pg_stat.out +++ b/contrib/pax_storage/src/test/regress/expected/pg_stat.out @@ -5,7 +5,7 @@ create table pg_stat_test(a int); select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup -from pg_stat_all_tables where relname = 'pg_stat_test'; +from gp_stat_all_tables_summary where relname = 'pg_stat_test'; schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup ------------+--------------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------ public | pg_stat_test | 0 | 0 | | | 0 | 0 | 0 | 0 | 0 | 0 @@ -14,7 +14,7 @@ from pg_stat_all_tables where relname = 'pg_stat_test'; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup -from pg_stat_user_tables where relname = 'pg_stat_test'; +from gp_stat_user_tables_summary where relname = 'pg_stat_test'; schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup ------------+--------------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------ public | pg_stat_test | 0 | 0 | | | 0 | 0 | 0 | 0 | 0 | 0 @@ -22,14 +22,14 @@ from pg_stat_user_tables where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_all_indexes where relname = 'pg_stat_test'; +from gp_stat_all_indexes_summary where relname = 'pg_stat_test'; schemaname | relname | indexrelname | idx_scan | idx_tup_read | idx_tup_fetch ------------+---------+--------------+----------+--------------+--------------- (0 rows) select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_user_indexes where relname = 'pg_stat_test'; +from gp_stat_user_indexes_summary where relname = 'pg_stat_test'; schemaname | relname | indexrelname | idx_scan | idx_tup_read | idx_tup_fetch ------------+---------+--------------+----------+--------------+--------------- (0 rows) @@ -63,7 +63,7 @@ reset enable_seqscan; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze -from pg_stat_all_tables where relname = 'pg_stat_test'; +from gp_stat_all_tables_summary where relname = 'pg_stat_test'; schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze ------------+--------------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------+--------------------- public | pg_stat_test | 15 | 391 | 1 | 0 | 110 | 0 | 19 | 0 | 91 | 19 | 129 @@ -72,7 +72,7 @@ from pg_stat_all_tables where relname = 'pg_stat_test'; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze -from pg_stat_user_tables where relname = 'pg_stat_test'; +from gp_stat_user_tables_summary where relname = 'pg_stat_test'; schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze ------------+--------------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------+--------------------- public | pg_stat_test | 15 | 391 | 1 | 0 | 110 | 0 | 19 | 0 | 91 | 19 | 129 @@ -80,7 +80,7 @@ from pg_stat_user_tables where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_all_indexes where relname = 'pg_stat_test'; +from gp_stat_all_indexes_summary where relname = 'pg_stat_test'; schemaname | relname | indexrelname | idx_scan | idx_tup_read | idx_tup_fetch ------------+--------------+--------------------------+----------+--------------+--------------- public | pg_stat_test | pg_stat_user_table_index | 1 | 1 | 0 @@ -88,7 +88,7 @@ from pg_stat_all_indexes where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_user_indexes where relname = 'pg_stat_test'; +from gp_stat_user_indexes_summary where relname = 'pg_stat_test'; schemaname | relname | indexrelname | idx_scan | idx_tup_read | idx_tup_fetch ------------+--------------+--------------------------+----------+--------------+--------------- public | pg_stat_test | pg_stat_user_table_index | 1 | 1 | 0 diff --git a/contrib/pax_storage/src/test/regress/expected/stats.out b/contrib/pax_storage/src/test/regress/expected/stats.out index d3f407656fc..a75f9801a36 100644 --- a/contrib/pax_storage/src/test/regress/expected/stats.out +++ b/contrib/pax_storage/src/test/regress/expected/stats.out @@ -22,7 +22,7 @@ SELECT t.seq_scan, t.seq_tup_read, t.idx_scan, t.idx_tup_fetch, (b.heap_blks_read + b.heap_blks_hit) AS heap_blks, (b.idx_blks_read + b.idx_blks_hit) AS idx_blks, pg_stat_get_snapshot_timestamp() as snap_ts - FROM pg_catalog.pg_stat_user_tables AS t, + FROM pg_catalog.gp_stat_user_tables_summary AS t, pg_catalog.pg_statio_user_tables AS b WHERE t.relname='tenk2' AND b.relname='tenk2'; -- function to wait for counters to advance @@ -45,17 +45,17 @@ begin -- check to see if seqscan has been sensed SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; -- check to see if indexscan has been sensed SELECT (st.idx_scan >= pr.idx_scan + 1) INTO updated2 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; -- check to see if all updates have been sensed SELECT (n_tup_ins > 0) INTO updated3 - FROM pg_stat_user_tables WHERE relname='trunc_stats_test4'; + FROM gp_stat_user_tables_summary WHERE relname='trunc_stats_test4'; -- We must also check explicitly that pg_stat_get_snapshot_timestamp has -- advanced, because that comes from the global stats file which might @@ -65,7 +65,7 @@ begin -- check to see if idx_tup_fetch has been sensed SELECT (st.idx_tup_fetch >= pr.idx_tup_fetch + 1) INTO updated5 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; exit when updated1 and updated2 and updated3 and updated4 and updated5; @@ -177,7 +177,7 @@ SELECT wait_for_stats(); -- check effects SELECT relname, n_tup_ins, n_tup_upd, n_tup_del, n_live_tup, n_dead_tup - FROM pg_stat_user_tables + FROM gp_stat_user_tables_summary WHERE relname like 'trunc_stats_test%' order by relname; relname | n_tup_ins | n_tup_upd | n_tup_del | n_live_tup | n_dead_tup -------------------+-----------+-----------+-----------+------------+------------ @@ -192,7 +192,7 @@ SELECT st.seq_scan >= pr.seq_scan + 1, st.seq_tup_read >= pr.seq_tup_read + cl.reltuples, st.idx_scan >= pr.idx_scan + 1, st.idx_tup_fetch >= pr.idx_tup_fetch + 1 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; ?column? | ?column? | ?column? | ?column? ----------+----------+----------+---------- diff --git a/contrib/pax_storage/src/test/regress/input/pgstat_qd_tabstat.source b/contrib/pax_storage/src/test/regress/input/pgstat_qd_tabstat.source index a3c201de66c..4be0a8c7eb8 100644 --- a/contrib/pax_storage/src/test/regress/input/pgstat_qd_tabstat.source +++ b/contrib/pax_storage/src/test/regress/input/pgstat_qd_tabstat.source @@ -10,7 +10,7 @@ copy table_for_docopy (i, j) from stdin; 3 hello3 \. select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_docopy'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_docopy'::regclass; CREATE TABLE data_tbl (a int,b char) distributed by (a); INSERT INTO data_tbl values(1,'1'); @@ -21,7 +21,7 @@ COPY data_tbl TO '/tmp/data_tbl.csv' on segment; create table copy_on_segment (a int,b char); COPY copy_on_segment from '/tmp/data_tbl.csv' on segment log errors segment reject limit 3 rows; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'copy_on_segment'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'copy_on_segment'::regclass; -- Test pgstat table stat in initplan on QD @@ -34,26 +34,26 @@ copy table_for_initplan (i, j, k) from stdin; explain (costs off) with updated AS (update table_for_initplan set k = 33 where i = 3 returning k) select table_for_initplan.*, (select sum(k) from updated) from table_for_initplan; with updated AS (update table_for_initplan set k = 33 where i = 3 returning k) select table_for_initplan.*, (select sum(k) from updated) from table_for_initplan; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_initplan'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_initplan'::regclass; -- Test pgstat table stat in CTAS on QD create table table_for_ctas as select i, 'hello' || i from generate_series(1, 100) f(i); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_ctas'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_ctas'::regclass; select i, 'hello' || i into table_for_insert_into from generate_series(1, 100) f(i); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_insert_into'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_insert_into'::regclass; -- Test pgstat table stat in ALTER TABLE SET DISTRIBUTED BY on QD create table table_for_set_distributed_by(i int, j varchar) distributed by (i); insert into table_for_set_distributed_by select i, 'hello' || i from generate_series(1, 333) f(i); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_set_distributed_by'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_set_distributed_by'::regclass; alter table table_for_set_distributed_by set distributed by (j); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_set_distributed_by'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_set_distributed_by'::regclass; -- Test pgstat table stat in execution of funciton on QD @@ -68,7 +68,7 @@ $$ language plpgsql volatile; select update_table_for_function(); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_function'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_function'::regclass; -- Test pgstat table stat in ALTER TABLE EXPAND TABLE on QD; @@ -78,11 +78,11 @@ create table table_for_expand(i int, j varchar) distributed by (i); insert into table_for_expand select i, 'hello' || i from generate_series(1, 333) f(i); select count(distinct gp_segment_id) from table_for_expand; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_expand'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_expand'::regclass; alter table table_for_expand expand table; select count(distinct gp_segment_id) from table_for_expand; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_expand'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_expand'::regclass; select gp_debug_reset_create_table_default_numsegments(); @@ -103,7 +103,7 @@ update table_for_iud set j = 'heroes never die' where i >= 300; release savepoint level3; commit; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_iud'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_iud'::regclass; begin; savepoint level1; @@ -120,14 +120,14 @@ rollback to savepoint level3; delete from table_for_iud where i <= 200; commit; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_iud'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_iud'::regclass; -- Test pgstat table stat in TRUNCATE on QD create table table_for_truncate(i int, j varchar) distributed by (i); insert into table_for_truncate select i, 'hello' || i from generate_series(1, 777) f(i); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_truncate'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_truncate'::regclass; begin; savepoint level1; savepoint level2; @@ -141,12 +141,12 @@ delete from table_for_truncate where i >= 700; update table_for_truncate set j = 'D' where i <= 200; commit; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate'::regclass; create table table_for_truncate_abort(i int, j varchar) distributed by (i); insert into table_for_truncate_abort select i, 'hello' || i from generate_series(1, 777) f(i); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate_abort'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate_abort'::regclass; begin; savepoint level1; savepoint level2; @@ -160,7 +160,7 @@ delete from table_for_truncate_abort where i < 700; update table_for_truncate_abort set j = 'D' where i >= 200; rollback; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate_abort'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate_abort'::regclass; -- Test pgstat table stat for partition table on QD @@ -171,17 +171,17 @@ PARTITION BY RANGE (rank) DEFAULT PARTITION extra ); insert into rankpart select i, i % 10, i from generate_series(1, 1000)i; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; begin; delete from rankpart where id <= 100; rollback; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; copy rankpart (id, rank, product) from stdin; 1001 1 1001 @@ -196,8 +196,8 @@ copy rankpart (id, rank, product) from stdin; 1010 6 1010 \. select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; begin; update rankpart set rank = 1 where id > 1005; @@ -209,9 +209,9 @@ release savepoint level2; rollback to savepoint level1; commit; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; begin; savepoint level1_1; @@ -219,9 +219,9 @@ insert into rankpart select i, i % 10, i from generate_series(2001, 3000)i; insert into rankpart select i, i % 10, i from generate_series(3001, 4000)i; commit; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; -- Test pgstat matview stat with distributed policy. @@ -229,13 +229,13 @@ create table base_table(i int, j int, z int ) distributed by (i); insert into base_table select i,i,i from generate_series(1, 100) i; create materialized view mt as select * from base_table where z>=50; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; insert into base_table select i,i,i from generate_series(1, 100) i; refresh materialized view mt; select pg_sleep(0.77) from gp_dist_random('gp_id'); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; --- pg_stat_all_tables collects gpstats across segments select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; +-- gp_stat_all_tables_summary collects gpstats across segments +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from gp_stat_all_tables_summary where relid = 'mt'::regclass; drop materialized view mt; drop table base_table; @@ -245,13 +245,13 @@ create table base_table(i int, j int, z int ) distributed replicated; insert into base_table select i,i,i from generate_series(1, 100) i; create materialized view mt as select * from base_table where z>=50 distributed replicated; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; insert into base_table select i,i,i from generate_series(1, 100) i; refresh materialized view mt; select pg_sleep(0.77) from gp_dist_random('gp_id'); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; --- pg_stat_all_tables collects gpstats across segments select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; +-- gp_stat_all_tables_summary collects gpstats across segments +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from gp_stat_all_tables_summary where relid = 'mt'::regclass; reset gp_autostats_mode; @@ -263,15 +263,15 @@ insert into tabstat_ao select 1,1; delete from tabstat_ao; select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select count(*) from pg_stat_all_tables +select count(*) from gp_stat_all_tables_summary where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass) OR relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass) OR relid = (select visimaprelid from pg_appendonly where relid = 'tabstat_ao'::regclass); select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. -select n_tup_ins from pg_stat_all_tables where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); -select n_tup_ins from pg_stat_all_tables where relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); -select n_tup_ins from pg_stat_all_tables where relid = (select visimaprelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select visimaprelid from pg_appendonly where relid = 'tabstat_ao'::regclass); drop table tabstat_ao; diff --git a/contrib/pax_storage/src/test/regress/output/pgstat_qd_tabstat.source b/contrib/pax_storage/src/test/regress/output/pgstat_qd_tabstat.source index 5118839b171..a0b24ee35dc 100644 --- a/contrib/pax_storage/src/test/regress/output/pgstat_qd_tabstat.source +++ b/contrib/pax_storage/src/test/regress/output/pgstat_qd_tabstat.source @@ -10,7 +10,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_docopy'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_docopy'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 3 | 0 | 0 | 0 | 3 | 0 | 3 @@ -32,7 +32,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'copy_on_segment'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'copy_on_segment'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 4 | 0 | 0 | 0 | 4 | 0 | 4 @@ -70,7 +70,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_initplan'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_initplan'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 3 | 1 | 0 | 0 | 3 | 1 | 4 @@ -86,7 +86,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_ctas'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_ctas'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 100 | 0 | 0 | 0 | 100 | 0 | 100 @@ -101,7 +101,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_insert_into'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_insert_into'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 100 | 0 | 0 | 0 | 100 | 0 | 100 @@ -116,7 +116,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_set_distributed_by'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_set_distributed_by'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 333 | 0 | 0 | 0 | 333 | 0 | 333 @@ -129,7 +129,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_set_distributed_by'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_set_distributed_by'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 333 | 0 | 0 | 0 | 333 | 0 | 333 @@ -157,7 +157,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_function'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_function'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 333 | 0 | 200 | 0 | 133 | 200 | 533 @@ -185,7 +185,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_expand'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_expand'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 333 | 0 | 0 | 0 | 333 | 0 | 333 @@ -204,7 +204,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_expand'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_expand'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 333 | 0 | 0 | 0 | 333 | 0 | 333 @@ -238,7 +238,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_iud'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_iud'::regclass; n_live_tup | n_dead_tup ------------+------------ 333 | 34 @@ -264,7 +264,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_iud'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_iud'::regclass; n_live_tup | n_dead_tup ------------+------------ 133 | 713 @@ -279,7 +279,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'table_for_truncate'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'table_for_truncate'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 777 | 0 | 0 | 0 | 777 | 0 | 777 @@ -303,7 +303,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate'::regclass; n_live_tup | n_dead_tup ------------+------------ 699 | 301 @@ -317,7 +317,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate_abort'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate_abort'::regclass; n_live_tup | n_dead_tup ------------+------------ 777 | 0 @@ -341,7 +341,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_live_tup, n_dead_tup from pg_stat_all_tables_internal where relid = 'table_for_truncate_abort'::regclass; +select n_live_tup, n_dead_tup from pg_stat_all_tables where relid = 'table_for_truncate_abort'::regclass; n_live_tup | n_dead_tup ------------+------------ 777 | 223 @@ -360,19 +360,19 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 500 | 0 | 0 | 0 | 500 | 0 | 500 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 400 | 0 | 0 | 0 | 400 | 0 | 400 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 100 | 0 | 0 | 0 | 100 | 0 | 100 @@ -387,19 +387,19 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 500 | 0 | 50 | 0 | 500 | 0 | 500 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 400 | 0 | 40 | 0 | 400 | 0 | 400 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 100 | 0 | 10 | 0 | 100 | 0 | 100 @@ -412,13 +412,13 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 505 | 0 | 50 | 0 | 505 | 0 | 505 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 405 | 0 | 40 | 0 | 405 | 0 | 405 @@ -439,19 +439,19 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 510 | 0 | 55 | 0 | 510 | 0 | 510 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 410 | 0 | 50 | 0 | 400 | 10 | 410 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 100 | 0 | 10 | 0 | 100 | 0 | 100 @@ -468,19 +468,19 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_2'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_2'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 1510 | 0 | 55 | 0 | 1510 | 0 | 1510 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_3'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_3'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 1210 | 0 | 50 | 0 | 1200 | 10 | 1210 (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'rankpart_1_prt_extra'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'rankpart_1_prt_extra'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 300 | 0 | 10 | 0 | 300 | 0 | 300 @@ -498,7 +498,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 51 | 0 | 0 | 0 | 51 | 0 | 51 @@ -514,14 +514,14 @@ select pg_sleep(0.77) from gp_dist_random('gp_id'); -- Force pgstat_report_stat( (3 rows) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 153 | 0 | 0 | 0 | 102 | 0 | 153 (1 row) --- pg_stat_all_tables collects gpstats across segments -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; +-- gp_stat_all_tables_summary collects gpstats across segments +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from gp_stat_all_tables_summary where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 153 | 0 | 0 | 0 | 102 | 0 | 153 @@ -539,7 +539,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 51 | 0 | 0 | 0 | 51 | 0 | 51 @@ -555,14 +555,14 @@ select pg_sleep(0.77) from gp_dist_random('gp_id'); -- Force pgstat_report_stat( (3 rows) -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables_internal where relid = 'mt'::regclass; +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 153 | 0 | 0 | 0 | 102 | 0 | 153 (1 row) --- pg_stat_all_tables collects gpstats across segments -select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pg_stat_all_tables where relid = 'mt'::regclass; +-- gp_stat_all_tables_summary collects gpstats across segments +select n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from gp_stat_all_tables_summary where relid = 'mt'::regclass; n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze -----------+-----------+-----------+---------------+------------+------------+--------------------- 153 | 0 | 0 | 0 | 102 | 0 | 153 @@ -580,7 +580,7 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select count(*) from pg_stat_all_tables +select count(*) from gp_stat_all_tables_summary where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass) OR relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass) @@ -596,19 +596,19 @@ select pg_sleep(0.77); -- Force pgstat_report_stat() to send tabstat. (1 row) -select n_tup_ins from pg_stat_all_tables where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select segrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); n_tup_ins ----------- 1 (1 row) -select n_tup_ins from pg_stat_all_tables where relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select blkdirrelid from pg_appendonly where relid = 'tabstat_ao'::regclass); n_tup_ins ----------- 1 (1 row) -select n_tup_ins from pg_stat_all_tables where relid = (select visimaprelid from pg_appendonly where relid = 'tabstat_ao'::regclass); +select n_tup_ins from gp_stat_all_tables_summary where relid = (select visimaprelid from pg_appendonly where relid = 'tabstat_ao'::regclass); n_tup_ins ----------- 1 diff --git a/contrib/pax_storage/src/test/regress/sql/pg_stat.sql b/contrib/pax_storage/src/test/regress/sql/pg_stat.sql index d9fc37850b0..383a9149186 100644 --- a/contrib/pax_storage/src/test/regress/sql/pg_stat.sql +++ b/contrib/pax_storage/src/test/regress/sql/pg_stat.sql @@ -6,17 +6,17 @@ create table pg_stat_test(a int); select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup -from pg_stat_all_tables where relname = 'pg_stat_test'; +from gp_stat_all_tables_summary where relname = 'pg_stat_test'; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup -from pg_stat_user_tables where relname = 'pg_stat_test'; +from gp_stat_user_tables_summary where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_all_indexes where relname = 'pg_stat_test'; +from gp_stat_all_indexes_summary where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_user_indexes where relname = 'pg_stat_test'; +from gp_stat_user_indexes_summary where relname = 'pg_stat_test'; begin; -- make analyze same transcation with insert to avoid double the pgstat causes by unorder message read. insert into pg_stat_test select * from generate_series(1, 100); @@ -42,17 +42,17 @@ reset enable_seqscan; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze -from pg_stat_all_tables where relname = 'pg_stat_test'; +from gp_stat_all_tables_summary where relname = 'pg_stat_test'; select schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze -from pg_stat_user_tables where relname = 'pg_stat_test'; +from gp_stat_user_tables_summary where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_all_indexes where relname = 'pg_stat_test'; +from gp_stat_all_indexes_summary where relname = 'pg_stat_test'; select schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch -from pg_stat_user_indexes where relname = 'pg_stat_test'; +from gp_stat_user_indexes_summary where relname = 'pg_stat_test'; reset optimizer; reset max_parallel_workers_per_gather; diff --git a/contrib/pax_storage/src/test/regress/sql/stats.sql b/contrib/pax_storage/src/test/regress/sql/stats.sql index 36878562f87..94944161be7 100644 --- a/contrib/pax_storage/src/test/regress/sql/stats.sql +++ b/contrib/pax_storage/src/test/regress/sql/stats.sql @@ -20,7 +20,7 @@ SELECT t.seq_scan, t.seq_tup_read, t.idx_scan, t.idx_tup_fetch, (b.heap_blks_read + b.heap_blks_hit) AS heap_blks, (b.idx_blks_read + b.idx_blks_hit) AS idx_blks, pg_stat_get_snapshot_timestamp() as snap_ts - FROM pg_catalog.pg_stat_user_tables AS t, + FROM pg_catalog.gp_stat_user_tables_summary AS t, pg_catalog.pg_statio_user_tables AS b WHERE t.relname='tenk2' AND b.relname='tenk2'; @@ -44,17 +44,17 @@ begin -- check to see if seqscan has been sensed SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; -- check to see if indexscan has been sensed SELECT (st.idx_scan >= pr.idx_scan + 1) INTO updated2 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; -- check to see if all updates have been sensed SELECT (n_tup_ins > 0) INTO updated3 - FROM pg_stat_user_tables WHERE relname='trunc_stats_test4'; + FROM gp_stat_user_tables_summary WHERE relname='trunc_stats_test4'; -- We must also check explicitly that pg_stat_get_snapshot_timestamp has -- advanced, because that comes from the global stats file which might @@ -64,7 +64,7 @@ begin -- check to see if idx_tup_fetch has been sensed SELECT (st.idx_tup_fetch >= pr.idx_tup_fetch + 1) INTO updated5 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; exit when updated1 and updated2 and updated3 and updated4 and updated5; @@ -167,14 +167,14 @@ SELECT wait_for_stats(); -- check effects SELECT relname, n_tup_ins, n_tup_upd, n_tup_del, n_live_tup, n_dead_tup - FROM pg_stat_user_tables + FROM gp_stat_user_tables_summary WHERE relname like 'trunc_stats_test%' order by relname; SELECT st.seq_scan >= pr.seq_scan + 1, st.seq_tup_read >= pr.seq_tup_read + cl.reltuples, st.idx_scan >= pr.idx_scan + 1, st.idx_tup_fetch >= pr.idx_tup_fetch + 1 - FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; -- GPDB_13_MERGE_FIXME: Some statistics are handled by stat collector process on each segment but not sent to master. diff --git a/devops/README.md b/devops/README.md new file mode 100644 index 00000000000..14c3468542c --- /dev/null +++ b/devops/README.md @@ -0,0 +1,88 @@ + + +# Auto-Build Cloudberry Database from Source Code + +You can build Cloudberry Database from source code in two ways: manually or automatically. + +For the manual build, you need to manually set up many system configurations and download third-party dependencies, which is quite cumbersome and error-prone. + +To make the job easier, it is recommended that you use the automated deployment method and scripts provided here. The automation method simplifies the deployment process, reduces time costs, and allows developers to focus more on business code development. + +## 1. Setup docker environment + +Nothing special, just follow the [official documentation](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) + +## 2. Create docker build image + +Go to the supported OS directory, for example Rocky Linux 9 + +`cd devops/deploy/docker/build/rocky8/` + +And build image + +`docker build -t cloudberry-db-env . ` + +The whole process usually takes about 5 minutes. You can use the created base image as many times as you want, just launch a new container for your specific task. + +## 3. Launch container + +Just run + +`docker run -h cdw -it cloudberry-db-env` + +## 4. Checkout git repo inside container + +The same way you did it on your laptop + +`docker exec bash -c "cd /home/gpadmin && git clone --recurse-submodules https://github.com/apache/cloudberry.git"` + +## 5. Set envoronment and configure build container + +Create direcory for store logs + +`SRC_DIR=/home/gpadmin/cloudberry && docker exec bash -c "mkdir ${SRC_DIR}/build-logs"` + +Execute configure and check if system is ready for build + +`SRC_DIR=/home/gpadmin/cloudberry && docker exec bash -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ./devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"` + +## 6. Build binary + +The building consumes all available CPU resources and can take minutes to complete + +`SRC_DIR=/home/gpadmin/cloudberry && docker exec bash -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ./devops/build/automation/cloudberry/scripts/build-cloudberry.sh"` + +## 7. Install binary and create demo cluster + +By default `make install` copy compiled binary to `/usr/local/cloudberry-db` + +`SRC_DIR=/home/gpadmin/cloudberry && docker exec bash -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} make install"` + +To create demo cluster just launch `create-cloudberry-demo-cluster.sh` + +`SRC_DIR=/home/gpadmin/cloudberry && docker exec bash -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ./devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"` + +## 8. Execute test query + +Now you could set environment and execute queries + +`docker exec 7197206b0645 bash -c "source /usr/local/cloudberry-db/cloudberry-env.sh && source /home/gpadmin/cloudberry/gpAux/gpdemo/gpdemo-env.sh && psql -U gpadmin -d postgres -c 'SELECT 42'"` + +All done! diff --git a/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh b/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh new file mode 100755 index 00000000000..4749ec76271 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh @@ -0,0 +1,222 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: analyze_core_dumps.sh +# Description: Automated analysis tool for core dump files using GDB +# +# This script automatically analyzes core dump files found in a +# specified directory, providing stack traces and register +# information. It's particularly useful for analyzing crashes in +# Postgres/Cloudberry executables and Python applications. +# +# Features: +# - Automatic detection of core dump files +# - Support for both compiled executables and interpreted languages +# - Detailed stack traces with GDB +# - Register state analysis +# - Assembly code context at crash point +# - Comprehensive logging of analysis results +# +# Usage: analyze_core_dumps.sh [test_id] +# test_id: Optional identifier for the test configuration that generated cores +# +# Dependencies: +# - GDB (GNU Debugger) +# - file command +# +# Environment Variables: +# SRC_DIR - Base directory for operations (defaults to /tmp) +# +# Return Codes: +# 0 - No core files were found +# 1 - Core files were found and all were processed successfully +# 2 - Error conditions: +# - Missing required dependencies (gdb, file) +# - Issues processing some or all core files +# -------------------------------------------------------------------- + +set -u + +# Configuration +#----------------------------------------------------------------------------- +# Use SRC_DIR if defined, otherwise default to /tmp +SRC_DIR="${SRC_DIR:-/tmp}" +# Define log directory and files +LOG_DIR="${SRC_DIR}/build-logs" +# Create log directories if they don't exist +mkdir -p "${LOG_DIR}" + +# Determine log file name based on test_id argument +if [ $# -ge 1 ]; then + test_id="$1" + log_file="${LOG_DIR}/core_analysis_${test_id}_$(date +%Y%m%d_%H%M%S).log" +else + log_file="${LOG_DIR}/core_analysis_$(date +%Y%m%d_%H%M%S).log" +fi +echo "log_file: ${log_file}" + +# Directory where core dumps are located +core_dir="/tmp/cloudberry-cores/" + +# Pattern to match core dump files +core_pattern="core-*" + +# Function Definitions +#----------------------------------------------------------------------------- +# Log messages to both console and log file +# Args: +# $1 - Message to log +log_message() { + local message="[$(date '+%Y-%m-%d %H:%M:%S')] $1" + echo "$message" + echo "$message" >> "$log_file" +} + +# Analyze a single core file +# Args: +# $1 - Path to core file +# Returns: +# 0 on success, 1 on failure +analyze_core_file() { + local core_file="$1" + local file_info + + log_message "Analyzing core file: $core_file" + file_info=$(file "$core_file") + log_message "Core file info: $file_info" + + # Extract the original command from the core file info + if [[ "$file_info" =~ "from '([^']+)'" ]]; then + local original_cmd="${BASH_REMATCH[1]}" + log_message "Original command: $original_cmd" + fi + + # Extract executable path from core file info + if [[ "$file_info" =~ execfn:\ \'([^\']+)\' ]]; then + local executable="${BASH_REMATCH[1]}" + log_message "Executable path: $executable" + + # Convert relative path to absolute if needed + if [[ "$executable" == "./"* ]]; then + executable="$PWD/${executable:2}" + log_message "Converted to absolute path: $executable" + fi + + # Run GDB analysis + log_message "Starting GDB analysis..." + + gdb -quiet \ + --batch \ + -ex 'set pagination off' \ + -ex 'info target' \ + -ex 'thread apply all bt' \ + -ex 'print $_siginfo' \ + -ex quit \ + "$executable" "$core_file" 2>&1 >> "$log_file" + + local gdb_rc=$? + if [ $gdb_rc -eq 0 ] && [ -s "$log_file" ]; then + log_message "GDB analysis completed successfully" + return 0 + else + log_message "Warning: GDB analysis failed or produced no output" + return 1 + fi + else + log_message "Could not find executable path in core file" + return 1 + fi +} + +# Function to check required commands +check_dependencies() { + local missing=0 + local required_commands=("gdb" "file") + + log_message "Checking required commands..." + for cmd in "${required_commands[@]}"; do + if ! command -v "$cmd" >/dev/null 2>&1; then + log_message "Error: Required command '$cmd' not found" + missing=1 + fi + done + + if [ $missing -eq 1 ]; then + log_message "Missing required dependencies. Please install them and try again." + return 1 + fi + + log_message "All required commands found" + return 0 +} + +# Main Execution +#----------------------------------------------------------------------------- +main() { + local core_count=0 + local analyzed_count=0 + local return_code=0 + + log_message "Starting core dump analysis" + log_message "Using source directory: $SRC_DIR" + log_message "Using log directory: $LOG_DIR" + + # Check dependencies first + if ! check_dependencies; then + return 2 + fi + + # Process all core files + for core_file in "$core_dir"/$core_pattern; do + if [[ -f "$core_file" ]]; then + ((core_count++)) + if analyze_core_file "$core_file"; then + ((analyzed_count++)) + fi + fi + done + + # Determine return code based on results + if ((core_count == 0)); then + log_message "No core files found matching pattern $core_pattern in $core_dir" + return_code=0 # No cores found + elif ((analyzed_count == core_count)); then + log_message "Analysis complete. Successfully processed $analyzed_count core(s) files" + return_code=1 # All cores processed successfully + else + log_message "Analysis complete with errors. Processed $analyzed_count of $core_count core files" + return_code=2 # Some cores failed to process + fi + + log_message "Log file: $log_file" + + return $return_code +} + +# Script entry point +main +return_code=$? + +if ((return_code == 0)); then + rm -fv "${log_file}" +fi + +exit $return_code diff --git a/devops/build/automation/cloudberry/scripts/build-cloudberry.sh b/devops/build/automation/cloudberry/scripts/build-cloudberry.sh new file mode 100755 index 00000000000..db04f1b7592 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/build-cloudberry.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: build-cloudberry.sh +# Description: Builds Apache Cloudberry from source code and installs +# it. +# Performs the following steps: +# 1. Builds main Apache Cloudberry database components +# 2. Builds contrib modules +# 3. Installs both main and contrib components +# Uses parallel compilation based on available CPU cores. +# +# Required Environment Variables: +# SRC_DIR - Root source directory containing Apache Cloudberry +# source code +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# NPROC - Number of parallel jobs (defaults to all available cores) +# +# Usage: +# Export required variables: +# export SRC_DIR=/path/to/cloudberry/source +# Then run: +# ./build-cloudberry.sh +# +# Prerequisites: +# - configure-cloudberry.sh must be run first +# - Required build dependencies must be installed +# - /usr/local/cloudberry-db/lib must exist and be writable +# +# Exit Codes: +# 0 - Build and installation completed successfully +# 1 - Environment setup failed (missing SRC_DIR, LOG_DIR creation failed) +# 2 - Main component build failed +# 3 - Contrib build failed +# 4 - Installation failed +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory and files +export LOG_DIR="${SRC_DIR}/build-logs" +BUILD_LOG="${LOG_DIR}/build.log" + +# Initialize environment +init_environment "Cloudberry Build Script" "${BUILD_LOG}" + +# Set environment +log_section "Environment Setup" +export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:LD_LIBRARY_PATH +log_section_end "Environment Setup" + +# Build process +log_section "Build Process" +execute_cmd make -j$(nproc) --directory ${SRC_DIR} || exit 2 +execute_cmd make -j$(nproc) --directory ${SRC_DIR}/contrib || exit 3 +log_section_end "Build Process" + +# Installation +log_section "Installation" +execute_cmd make install --directory ${SRC_DIR} || exit 4 +execute_cmd make install --directory ${SRC_DIR}/contrib || exit 4 +log_section_end "Installation" + +# Log completion +log_completion "Cloudberry Build Script" "${BUILD_LOG}" +exit 0 diff --git a/devops/build/automation/cloudberry/scripts/cloudberry-utils.sh b/devops/build/automation/cloudberry/scripts/cloudberry-utils.sh new file mode 100755 index 00000000000..01b0b1b8381 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/cloudberry-utils.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Library: cloudberry-utils.sh +# Description: Common utility functions for Apache Cloudberry build +# and test scripts +# +# Required Environment Variables: +# SRC_DIR - Root source directory +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# +# Functions: +# init_environment "Script Name" "Log File" +# - Initialize logging and verify environment +# - Parameters: +# * script_name: Name of the calling script +# * log_file: Path to log file +# - Returns: 0 on success, 1 on failure +# +# execute_cmd command [args...] +# - Execute command with logging +# - Parameters: Command and its arguments +# - Returns: Command's exit code +# +# run_psql_cmd "sql_command" +# - Execute PostgreSQL command with logging +# - Parameters: SQL command string +# - Returns: psql command's exit code +# +# source_cloudberry_env +# - Source Cloudberry environment files +# - Returns: 0 on success +# +# log_section "section_name" +# - Log section start +# - Parameters: Name of the section +# +# log_section_end "section_name" +# - Log section end +# - Parameters: Name of the section +# +# log_completion "script_name" "log_file" +# - Log script completion +# - Parameters: +# * script_name: Name of the calling script +# * log_file: Path to log file +# +# Usage: +# source ./cloudberry-utils.sh +# +# Example: +# source ./cloudberry-utils.sh +# init_environment "My Script" "${LOG_FILE}" +# execute_cmd make clean +# log_section "Build Process" +# execute_cmd make -j$(nproc) +# log_section_end "Build Process" +# log_completion "My Script" "${LOG_FILE}" +# +# -------------------------------------------------------------------- + +# Initialize logging and environment +init_environment() { + local script_name=$1 + local log_file=$2 + + echo "=== Initializing environment for ${script_name} ===" + echo "${script_name} executed at $(date)" | tee -a "${log_file}" + echo "Whoami: $(whoami)" | tee -a "${log_file}" + echo "Hostname: $(hostname)" | tee -a "${log_file}" + echo "Working directory: $(pwd)" | tee -a "${log_file}" + echo "Source directory: ${SRC_DIR}" | tee -a "${log_file}" + echo "Log directory: ${LOG_DIR}" | tee -a "${log_file}" + + if [ -z "${SRC_DIR:-}" ]; then + echo "Error: SRC_DIR environment variable is not set" | tee -a "${log_file}" + exit 1 + fi + + mkdir -p "${LOG_DIR}" +} + +# Function to echo and execute command with logging +execute_cmd() { + local cmd_str="$*" + local timestamp=$(date "+%Y.%m.%d-%H.%M.%S") + echo "Executing at ${timestamp}: $cmd_str" | tee -a "${LOG_DIR}/commands.log" + "$@" 2>&1 | tee -a "${LOG_DIR}/commands.log" + return ${PIPESTATUS[0]} +} + +# Function to run psql commands with logging +run_psql_cmd() { + local cmd=$1 + local timestamp=$(date "+%Y.%m.%d-%H.%M.%S") + echo "Executing psql at ${timestamp}: $cmd" | tee -a "${LOG_DIR}/psql-commands.log" + psql -P pager=off template1 -c "$cmd" 2>&1 | tee -a "${LOG_DIR}/psql-commands.log" + return ${PIPESTATUS[0]} +} + +# Function to source Cloudberry environment +source_cloudberry_env() { + echo "=== Sourcing Cloudberry environment ===" | tee -a "${LOG_DIR}/environment.log" + source /usr/local/cloudberry-db/cloudberry-env.sh + source ${SRC_DIR}/../cloudberry/gpAux/gpdemo/gpdemo-env.sh +} + +# Function to log section start +log_section() { + local section_name=$1 + local timestamp=$(date "+%Y.%m.%d-%H.%M.%S") + echo "=== ${section_name} started at ${timestamp} ===" | tee -a "${LOG_DIR}/sections.log" +} + +# Function to log section end +log_section_end() { + local section_name=$1 + local timestamp=$(date "+%Y.%m.%d-%H.%M.%S") + echo "=== ${section_name} completed at ${timestamp} ===" | tee -a "${LOG_DIR}/sections.log" +} + +# Function to log script completion +log_completion() { + local script_name=$1 + local log_file=$2 + local timestamp=$(date "+%Y.%m.%d-%H.%M.%S") + echo "${script_name} execution completed successfully at ${timestamp}" | tee -a "${log_file}" +} diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh new file mode 100755 index 00000000000..4d84c3bfc21 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: configure-cloudberry.sh +# Description: Configures Apache Cloudberry build environment and runs +# ./configure with optimized settings. Performs the +# following: +# 1. Prepares /usr/local/cloudberry-db directory +# 2. Sets up library dependencies +# 3. Configures build with required features enabled +# +# Configuration Features: +# - Cloud Storage Integration (gpcloud) +# - IC Proxy Support +# - MapReduce Processing +# - Oracle Compatibility (orafce) +# - ORCA Query Optimizer +# - PAX Access Method +# - PXF External Table Access +# - Test Automation Support (tap-tests) +# +# System Integration: +# - GSSAPI Authentication +# - LDAP Authentication +# - XML Processing +# - LZ4 Compression +# - OpenSSL Support +# - PAM Authentication +# - Perl Support +# - Python Support +# +# Required Environment Variables: +# SRC_DIR - Root source directory +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# ENABLE_DEBUG - Enable debug build options (true/false, defaults to +# false) +# +# When true, enables: +# --enable-debug +# --enable-profiling +# --enable-cassert +# --enable-debug-extensions +# +# Prerequisites: +# - System dependencies must be installed: +# * xerces-c development files +# * OpenSSL development files +# * Python development files +# * Perl development files +# * LDAP development files +# - /usr/local must be writable +# - User must have sudo privileges +# +# Usage: +# Export required variables: +# export SRC_DIR=/path/to/cloudberry/source +# Then run: +# ./configure-cloudberry.sh +# +# Exit Codes: +# 0 - Configuration completed successfully +# 1 - Environment setup failed +# 2 - Directory preparation failed +# 3 - Library setup failed +# 4 - Configure command failed +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory and files +export LOG_DIR="${SRC_DIR}/build-logs" +CONFIGURE_LOG="${LOG_DIR}/configure.log" + +# Initialize environment +init_environment "Cloudberry Configure Script" "${CONFIGURE_LOG}" + +# Initial setup +log_section "Initial Setup" +execute_cmd sudo rm -rf /usr/local/cloudberry-db || exit 2 +execute_cmd sudo chmod a+w /usr/local || exit 2 +execute_cmd mkdir -p /usr/local/cloudberry-db/lib || exit 2 +execute_cmd sudo cp /usr/local/xerces-c/lib/libxerces-c.so \ + /usr/local/xerces-c/lib/libxerces-c-3.3.so \ + /usr/local/cloudberry-db/lib || exit 3 +execute_cmd sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db || exit 2 +log_section_end "Initial Setup" + +# Set environment +log_section "Environment Setup" +export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:LD_LIBRARY_PATH +log_section_end "Environment Setup" + +# Add debug options if ENABLE_DEBUG is set to "true" +CONFIGURE_DEBUG_OPTS="" + +if [ "${ENABLE_DEBUG:-false}" = "true" ]; then + CONFIGURE_DEBUG_OPTS="--enable-debug \ + --enable-profiling \ + --enable-cassert \ + --enable-debug-extensions" +fi + +# Configure build +log_section "Configure" +execute_cmd ./configure --prefix=/usr/local/cloudberry-db \ + --disable-external-fts \ + --enable-gpcloud \ + --enable-ic-proxy \ + --enable-mapreduce \ + --enable-orafce \ + --enable-orca \ + --enable-pax \ + --enable-pxf \ + --enable-tap-tests \ + ${CONFIGURE_DEBUG_OPTS} \ + --with-gssapi \ + --with-ldap \ + --with-libxml \ + --with-lz4 \ + --with-openssl \ + --with-pam \ + --with-perl \ + --with-pgport=5432 \ + --with-python \ + --with-pythonsrc-ext \ + --with-ssl=openssl \ + --with-openssl \ + --with-uuid=e2fs \ + --with-includes=/usr/local/xerces-c/include \ + --with-libraries=/usr/local/cloudberry-db/lib || exit 4 +log_section_end "Configure" + +# Capture version information +log_section "Version Information" +execute_cmd ag "GP_VERSION | GP_VERSION_NUM | PG_VERSION | PG_VERSION_NUM | PG_VERSION_STR" src/include/pg_config.h +log_section_end "Version Information" + +# Log completion +log_completion "Cloudberry Configure Script" "${CONFIGURE_LOG}" +exit 0 diff --git a/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh b/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh new file mode 100755 index 00000000000..e01d62fac9c --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: create-cloudberry-demo-cluster.sh +# Description: Creates and configures a demo Apache Cloudbery cluster. +# Performs the following steps: +# 1. Sets up required environment variables +# 2. Verifies SSH connectivity +# 3. Creates demo cluster using make +# 4. Initializes and starts the cluster +# 5. Performs comprehensive verification checks +# +# Required Environment Variables: +# SRC_DIR - Root source directory +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# +# Prerequisites: +# - Apache Cloudberry must be installed (/usr/local/cloudberry-db) +# - SSH must be configured for passwordless access to localhost +# - User must have permissions to create cluster directories +# - PostgreSQL client tools (psql) must be available +# +# Usage: +# Export required variables: +# export SRC_DIR=/path/to/cloudberry/source +# Then run: +# ./create-cloudberry-demo-cluster.sh +# +# Verification Checks: +# - Apache Cloudberry version +# - Segment configuration +# - Available extensions +# - Active sessions +# - Configuration history +# - Replication status +# +# Exit Codes: +# 0 - Cluster created and verified successfully +# 1 - Environment setup failed +# 2 - SSH verification failed +# 3 - Cluster creation failed +# 4 - Cluster startup failed +# 5 - Verification checks failed +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory +export LOG_DIR="${SRC_DIR}/build-logs" +CLUSTER_LOG="${LOG_DIR}/cluster.log" + +# Initialize environment +init_environment "Cloudberry Demo Cluster Script" "${CLUSTER_LOG}" + +# Setup environment +log_section "Environment Setup" +source /usr/local/cloudberry-db/cloudberry-env.sh || exit 1 +log_section_end "Environment Setup" + +# Verify SSH access +log_section "SSH Verification" +execute_cmd ssh $(hostname) 'whoami; hostname' || exit 2 +log_section_end "SSH Verification" + +# Create demo cluster +log_section "Demo Cluster Creation" +execute_cmd make create-demo-cluster --directory ${SRC_DIR}/../cloudberry || exit 3 +log_section_end "Demo Cluster Creation" + +# Source demo environment +log_section "Source Environment" +source ${SRC_DIR}/../cloudberry/gpAux/gpdemo/gpdemo-env.sh || exit 1 +log_section_end "Source Environment" + +# Manage cluster state +log_section "Cluster Management" +execute_cmd gpstop -a || exit 4 +execute_cmd gpstart -a || exit 4 +execute_cmd gpstate || exit 4 +log_section_end "Cluster Management" + +# Verify installation +log_section "Installation Verification" +verification_failed=false +run_psql_cmd "SELECT version()" || verification_failed=true +run_psql_cmd "SELECT * from gp_segment_configuration" || verification_failed=true +run_psql_cmd "SELECT * FROM pg_available_extensions" || verification_failed=true +run_psql_cmd "SELECT * from pg_stat_activity" || verification_failed=true +run_psql_cmd "SELECT * FROM gp_configuration_history" || verification_failed=true +run_psql_cmd "SELECT * FROM gp_stat_replication" || verification_failed=true + +if [ "$verification_failed" = true ]; then + echo "One or more verification checks failed" | tee -a "${CLUSTER_LOG}" + exit 5 +fi +log_section_end "Installation Verification" + +# Log completion +log_completion "Cloudberry Demo Cluster Script" "${CLUSTER_LOG}" +exit 0 diff --git a/devops/build/automation/cloudberry/scripts/destroy-cloudberry-demo-cluster.sh b/devops/build/automation/cloudberry/scripts/destroy-cloudberry-demo-cluster.sh new file mode 100755 index 00000000000..3d4ce241979 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/destroy-cloudberry-demo-cluster.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: destroy-cloudberry-demo-cluster.sh +# Description: Destroys and cleans up a demo Apache Cloudberry +# cluster. +# Performs the following steps: +# 1. Sources required environment variables +# 2. Stops any running cluster processes +# 3. Removes cluster data directories and configuration +# 4. Cleans up any remaining cluster resources +# +# Required Environment Variables: +# SRC_DIR - Root source directory +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# +# Prerequisites: +# - Apache Cloudberry environment must be available +# - User must have permissions to remove cluster directories +# - No active connections to the cluster +# +# Usage: +# Export required variables: +# export SRC_DIR=/path/to/cloudberry/source +# Then run: +# ./destroy-cloudberry-demo-cluster.sh +# +# Exit Codes: +# 0 - Cluster destroyed successfully +# 1 - Environment setup/sourcing failed +# 2 - Cluster destruction failed +# +# Related Scripts: +# - create-cloudberry-demo-cluster.sh: Creates a new demo cluster +# +# Notes: +# - This script will forcefully terminate all cluster processes +# - All cluster data will be permanently deleted +# - Make sure to backup any important data before running +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory +export LOG_DIR="${SRC_DIR}/build-logs" +CLUSTER_LOG="${LOG_DIR}/destroy-cluster.log" + +# Initialize environment +init_environment "Destroy Cloudberry Demo Cluster Script" "${CLUSTER_LOG}" + +# Source Cloudberry environment +log_section "Environment Setup" +source_cloudberry_env || { + echo "Failed to source Cloudberry environment" | tee -a "${CLUSTER_LOG}" + exit 1 +} +log_section_end "Environment Setup" + +# Destroy demo cluster +log_section "Destroy Demo Cluster" +execute_cmd make destroy-demo-cluster --directory ${SRC_DIR}/../cloudberry || { + echo "Failed to destroy demo cluster" | tee -a "${CLUSTER_LOG}" + exit 2 +} +log_section_end "Destroy Demo Cluster" + +# Verify cleanup +log_section "Cleanup Verification" +if [ -d "${SRC_DIR}/../cloudberry/gpAux/gpdemo/data" ]; then + echo "Warning: Data directory still exists after cleanup" | tee -a "${CLUSTER_LOG}" +fi +log_section_end "Cleanup Verification" + +# Log completion +log_completion "Destroy Cloudberry Demo Cluster Script" "${CLUSTER_LOG}" +exit 0 diff --git a/devops/build/automation/cloudberry/scripts/parse-results.pl b/devops/build/automation/cloudberry/scripts/parse-results.pl new file mode 100755 index 00000000000..d09085d5fb9 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/parse-results.pl @@ -0,0 +1,215 @@ +#!/usr/bin/env perl +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: parse_results.pl +# Description: Processes Cloudberry test output to extract statistics +# and results. +# Analyzes test log files to determine: +# 1. Overall test status (pass/fail) +# 2. Total number of tests run +# 3. Number of passed, failed, and ignored tests +# 4. Names of failed and ignored tests +# 5. Validates test counts for consistency +# Results are written to a file for shell script processing. +# +# Arguments: +# log-file Path to test log file (required) +# +# Input File Format: +# Expects test log files containing one of the following summary formats: +# - "All X tests passed." +# - "Y of X tests failed." +# - "X of Y tests passed, Z failed test(s) ignored." +# - "X of Y tests failed, Z of these failures ignored." +# +# And failed or ignored test entries in format: +# - "test_name ... FAILED" +# - "test_name ... failed (ignored)" +# +# Output File (test_results.txt): +# Environment variable format: +# STATUS=passed|failed +# TOTAL_TESTS= +# FAILED_TESTS= +# PASSED_TESTS= +# IGNORED_TESTS= +# FAILED_TEST_NAMES= +# IGNORED_TEST_NAMES= +# +# Prerequisites: +# - Read access to input log file +# - Write access to current directory +# - Perl 5.x or higher +# +# Exit Codes: +# 0 - All tests passed, or only ignored failures occurred +# 1 - Some non-ignored tests failed +# 2 - Parse error or cannot access files +# +# Example Usage: +# ./parse_results.pl test_output.log +# +# Error Handling: +# - Validates input file existence and readability +# - Verifies failed and ignored test counts match found entries +# - Reports parsing errors with detailed messages +# +# -------------------------------------------------------------------- + +use strict; +use warnings; + +# Exit codes +use constant { + SUCCESS => 0, + TEST_FAILURE => 1, + PARSE_ERROR => 2 +}; + +# Get log file path from command line argument +my $file = $ARGV[0] or die "Usage: $0 LOG_FILE\n"; +print "Parsing test results from: $file\n"; + +# Check if file exists and is readable +unless (-e $file) { + print "Error: File does not exist: $file\n"; + exit PARSE_ERROR; +} +unless (-r $file) { + print "Error: File is not readable: $file\n"; + exit PARSE_ERROR; +} + +# Open and parse the log file +open(my $fh, '<', $file) or do { + print "Cannot open log file: $! (looking in $file)\n"; + exit PARSE_ERROR; +}; + +# Initialize variables +my ($status, $total_tests, $failed_tests, $ignored_tests, $passed_tests) = ('', 0, 0, 0, 0); +my @failed_test_list = (); +my @ignored_test_list = (); + +while (<$fh>) { + # Match the summary lines + if (/All (\d+) tests passed\./) { + $status = 'passed'; + $total_tests = $1; + $passed_tests = $1; + } + elsif (/(\d+) of (\d+) tests passed, (\d+) failed test\(s\) ignored\./) { + $status = 'passed'; + $passed_tests = $1; + $total_tests = $2; + $ignored_tests = $3; + } + elsif (/(\d+) of (\d+) tests failed\./) { + $status = 'failed'; + $failed_tests = $1; + $total_tests = $2; + $passed_tests = $2 - $1; + } + elsif (/(\d+) of (\d+) tests failed, (\d+) of these failures ignored\./) { + $status = 'failed'; + $failed_tests = $1 - $3; + $ignored_tests = $3; + $total_tests = $2; + $passed_tests = $2 - $1; + } + + # Capture failed tests + if (/^(?:\s+|test\s+)(\S+)\s+\.\.\.\s+FAILED\s+/) { + push @failed_test_list, $1; + } + + # Capture ignored tests + if (/^(?:\s+|test\s+)(\S+)\s+\.\.\.\s+failed \(ignored\)/) { + push @ignored_test_list, $1; + } +} + +# Close the log file +close $fh; + +# Validate failed test count matches found test names +if ($status eq 'failed' && scalar(@failed_test_list) != $failed_tests) { + print "Error: Found $failed_tests failed tests in summary but found " . scalar(@failed_test_list) . " failed test names\n"; + print "Failed test names found:\n"; + foreach my $test (@failed_test_list) { + print " - $test\n"; + } + exit PARSE_ERROR; +} + +# Validate ignored test count matches found test names +if ($ignored_tests != scalar(@ignored_test_list)) { + print "Error: Found $ignored_tests ignored tests in summary but found " . scalar(@ignored_test_list) . " ignored test names\n"; + print "Ignored test names found:\n"; + foreach my $test (@ignored_test_list) { + print " - $test\n"; + } + exit PARSE_ERROR; +} + +# Write results to the results file +open my $result_fh, '>', 'test_results.txt' or die "Cannot write to results file: $!\n"; +print $result_fh "STATUS=$status\n"; +print $result_fh "TOTAL_TESTS=$total_tests\n"; +print $result_fh "PASSED_TESTS=$passed_tests\n"; +print $result_fh "FAILED_TESTS=$failed_tests\n"; +print $result_fh "IGNORED_TESTS=$ignored_tests\n"; +if (@failed_test_list) { + print $result_fh "FAILED_TEST_NAMES=" . join(',', @failed_test_list) . "\n"; +} +if (@ignored_test_list) { + print $result_fh "IGNORED_TEST_NAMES=" . join(',', @ignored_test_list) . "\n"; +} +close $result_fh; + +# Print to stdout for logging +print "Test Results:\n"; +print "Status: $status\n"; +print "Total Tests: $total_tests\n"; +print "Failed Tests: $failed_tests\n"; +print "Ignored Tests: $ignored_tests\n"; +print "Passed Tests: $passed_tests\n"; +if (@failed_test_list) { + print "Failed Test Names:\n"; + foreach my $test (@failed_test_list) { + print " - $test\n"; + } +} +if (@ignored_test_list) { + print "Ignored Test Names:\n"; + foreach my $test (@ignored_test_list) { + print " - $test\n"; + } +} + +# Exit with appropriate code +if ($status eq 'passed') { + exit SUCCESS; +} elsif ($status eq 'failed') { + exit TEST_FAILURE; +} else { + exit PARSE_ERROR; +} diff --git a/devops/build/automation/cloudberry/scripts/parse-test-results.sh b/devops/build/automation/cloudberry/scripts/parse-test-results.sh new file mode 100755 index 00000000000..ace00f63b3f --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/parse-test-results.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: parse-test-results.sh +# Description: Parses Apache Cloudberry test results and processes the +# output. +# Provides GitHub Actions integration and environment +# variable export functionality. This script is a wrapper +# around parse_results.pl, adding the following features: +# 1. Default log file path handling +# 2. GitHub Actions output integration +# 3. Environment variable management +# 4. Result file cleanup +# +# Arguments: +# [log-file] - Path to test log file +# (defaults to build-logs/details/make-${MAKE_NAME}.log) +# +# Prerequisites: +# - parse_results.pl must be in the same directory +# - Perl must be installed and in PATH +# - Write access to current directory (for temporary files) +# - Read access to test log file +# +# Output Variables (in GitHub Actions): +# status - Test status (passed/failed) +# total_tests - Total number of tests +# failed_tests - Number of failed tests +# passed_tests - Number of passed tests +# ignored_tests - Number of ignored tests +# failed_test_names - Names of failed tests (comma-separated) +# ignored_test_names - Names of ignored tests (comma-separated) +# +# Usage Examples: +# # Parse default log file: +# ./parse-test-results.sh +# +# # Parse specific log file: +# ./parse-test-results.sh path/to/test.log +# +# # Use with GitHub Actions: +# export GITHUB_OUTPUT=/path/to/output +# ./parse-test-results.sh +# +# Exit Codes: +# 0 - All tests passed successfully +# 1 - Tests failed but results were properly parsed +# 2 - Parse error, missing files, or unknown status +# +# Files Created/Modified: +# - Temporary: test_results.txt (automatically cleaned up) +# - If GITHUB_OUTPUT set: Appends results to specified file +# +# -------------------------------------------------------------------- + +set -uo pipefail + +# Default log file path +DEFAULT_LOG_PATH="build-logs/details/make-${MAKE_NAME}.log" +LOG_FILE=${1:-$DEFAULT_LOG_PATH} + +# Get the directory where this script is located +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Check if log file exists +if [ ! -f "$LOG_FILE" ]; then + echo "Error: Test log file not found: $LOG_FILE" + exit 2 +fi + +# Run the perl script and capture its exit code +perl "${SCRIPT_DIR}/parse-results.pl" "$LOG_FILE" +perl_exit_code=$? + +# Check if results file exists and source it if it does +if [ ! -f test_results.txt ]; then + echo "Error: No results file generated" + exit 2 +fi + +# Return the perl script's exit code +exit $perl_exit_code diff --git a/devops/build/automation/cloudberry/scripts/test-cloudberry.sh b/devops/build/automation/cloudberry/scripts/test-cloudberry.sh new file mode 100755 index 00000000000..411f16ca625 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/test-cloudberry.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: test-cloudberry.sh +# Description: Executes Apache Cloudberry test suite using specified +# make target. Supports different test types through make +# target configuration. Sources Cloudberry environment +# before running tests. +# +# Required Environment Variables: +# MAKE_TARGET - Make target to execute (e.g., installcheck-world) +# MAKE_DIRECTORY - Directory where make command will be executed +# MAKE_NAME - Name of the make operation (for logging) +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to build-logs) +# PGOPTIONS - PostgreSQL server options +# +# Usage: +# Export required variables: +# export MAKE_TARGET=installcheck-world +# export MAKE_DIRECTORY="/path/to/make/dir" +# export MAKE_NAME="Install Check" +# Then run: +# ./test-cloudberry.sh +# +# Exit Codes: +# 0 - All tests passed successfully +# 1 - Environment setup failed (missing required variables, environment sourcing failed) +# 2 - Test execution failed (make command returned error) +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory and files +export LOG_DIR="build-logs" +TEST_LOG="${LOG_DIR}/test.log" + +# Initialize environment +init_environment "Cloudberry Test Script" "${TEST_LOG}" + +# Source Cloudberry environment +log_section "Environment Setup" +source_cloudberry_env || exit 1 +log_section_end "Environment Setup" + +echo "MAKE_TARGET: ${MAKE_TARGET}" +echo "MAKE_DIRECTORY: ${MAKE_DIRECTORY}" +echo "PGOPTIONS: ${PGOPTIONS}" + +# Execute specified target +log_section "Install Check" +execute_cmd make ${MAKE_TARGET} ${MAKE_DIRECTORY} || exit 2 +log_section_end "Install Check" + +# Log completion +log_completion "Cloudberry Test Script" "${TEST_LOG}" +exit 0 diff --git a/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh b/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh new file mode 100755 index 00000000000..f7bc120bd08 --- /dev/null +++ b/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Script: unittest-cloudberry.sh +# Description: Executes unit tests for Apache Cloudberry from source +# code. Runs the 'unittest-check' make target and logs +# results. Tests are executed against the compiled source +# without requiring a full installation. +# +# Required Environment Variables: +# SRC_DIR - Root source directory +# +# Optional Environment Variables: +# LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# +# Usage: +# ./unittest-cloudberry.sh +# +# Exit Codes: +# 0 - All unit tests passed successfully +# 1 - Environment setup failed (missing SRC_DIR, LOG_DIR creation failed) +# 2 - Unit test execution failed +# +# -------------------------------------------------------------------- + +set -euo pipefail + +# Source common utilities +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "${SCRIPT_DIR}/cloudberry-utils.sh" + +# Define log directory and files +export LOG_DIR="${SRC_DIR}/build-logs" +UNITTEST_LOG="${LOG_DIR}/unittest.log" + +# Initialize environment +init_environment "Cloudberry Unittest Script" "${UNITTEST_LOG}" + +# Set environment +log_section "Environment Setup" +export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:LD_LIBRARY_PATH +log_section_end "Environment Setup" + +# Unittest process +log_section "Unittest Process" +execute_cmd make --directory ${SRC_DIR}/../cloudberry unittest-check || exit 2 +log_section_end "Unittest Process" + +# Log completion +log_completion "Cloudberry Unittest Script" "${UNITTEST_LOG}" +exit 0 diff --git a/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec b/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec new file mode 100644 index 00000000000..f7ca334a69e --- /dev/null +++ b/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec @@ -0,0 +1,178 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +%define cloudberry_install_dir /usr/local/cloudberry-db + +# Add at the top of the spec file +# Default to non-debug build +%bcond_with debug + +# Conditional stripping based on debug flag +%if %{with debug} +%define __os_install_post %{nil} +%define __strip /bin/true +%endif + +Name: apache-cloudberry-db-incubating +Version: %{version} +# In the release definition section +%if %{with debug} +Release: %{release}.debug%{?dist} +%else +Release: %{release}%{?dist} +%endif +Summary: High-performance, open-source data warehouse based on PostgreSQL/Greenplum + +License: ASL 2.0 +URL: https://cloudberry.apache.org +Vendor: Apache Cloudberry (incubating) +Group: Applications/Databases +Prefix: %{cloudberry_install_dir} + +# Disabled as we are shipping GO programs (e.g. gpbackup) +%define _missing_build_ids_terminate_build 0 + +# Disable debugsource files +%define _debugsource_template %{nil} + +# List runtime dependencies + +Requires: bash +Requires: iproute +Requires: iputils +Requires: openssh +Requires: openssh-clients +Requires: openssh-server +Requires: rsync + +%if 0%{?rhel} == 8 +Requires: apr +Requires: audit +Requires: bzip2 +Requires: keyutils +Requires: libcurl +Requires: libevent +Requires: libidn2 +Requires: libselinux +Requires: libstdc++ +Requires: libuuid +Requires: libuv +Requires: libxml2 +Requires: libyaml +Requires: libzstd +Requires: lz4 +Requires: openldap +Requires: pam +Requires: perl +Requires: python3 +Requires: readline +%endif + +%if 0%{?rhel} == 9 +Requires: apr +Requires: bzip2 +Requires: glibc +Requires: keyutils +Requires: libcap +Requires: libcurl +Requires: libidn2 +Requires: libpsl +Requires: libssh +Requires: libstdc++ +Requires: libxml2 +Requires: libyaml +Requires: libzstd +Requires: lz4 +Requires: openldap +Requires: pam +Requires: pcre2 +Requires: perl +Requires: readline +Requires: xz +%endif + +%description + +Apache Cloudberry (incubating) is an advanced, open-source, massively +parallel processing (MPP) data warehouse developed from PostgreSQL and +Greenplum. It is designed for high-performance analytics on +large-scale data sets, offering powerful analytical capabilities and +enhanced security features. + +Key Features: + +- Massively parallel processing for optimized performance +- Advanced analytics for complex data processing +- Integration with ETL and BI tools +- Compatibility with multiple data sources and formats +- Enhanced security features + +Apache Cloudberry supports both batch processing and real-time data +warehousing, making it a versatile solution for modern data +environments. + +Apache Cloudberry (incubating) is an effort undergoing incubation at +the Apache Software Foundation (ASF), sponsored by the Apache +Incubator PMC. + +Incubation is required of all newly accepted projects until a further +review indicates that the infrastructure, communications, and decision +making process have stabilized in a manner consistent with other +successful ASF projects. + +While incubation status is not necessarily a reflection of the +completeness or stability of the code, it does indicate that the +project has yet to be fully endorsed by the ASF. + +%prep +# No prep needed for binary RPM + +%build +# No prep needed for binary RPM + +%install +rm -rf %{buildroot} + +# Create the versioned directory +mkdir -p %{buildroot}%{cloudberry_install_dir}-%{version} + +cp -R %{cloudberry_install_dir}/* %{buildroot}%{cloudberry_install_dir}-%{version} + +# Create the symbolic link +ln -sfn %{cloudberry_install_dir}-%{version} %{buildroot}%{cloudberry_install_dir} + +%files +%{prefix}-%{version} +%{prefix} + +%license %{cloudberry_install_dir}-%{version}/LICENSE + +%debug_package + +%post +# Change ownership to gpadmin.gpadmin if the gpadmin user exists +if id "gpadmin" &>/dev/null; then + chown -R gpadmin:gpadmin %{cloudberry_install_dir}-%{version} + chown gpadmin:gpadmin %{cloudberry_install_dir} +fi + +%postun +if [ $1 -eq 0 ] ; then + if [ "$(readlink -f "%{cloudberry_install_dir}")" == "%{cloudberry_install_dir}-%{version}" ]; then + unlink "%{cloudberry_install_dir}" || true + fi +fi diff --git a/devops/build/packaging/rpm/apache-cloudberry-hll-incubating.spec b/devops/build/packaging/rpm/apache-cloudberry-hll-incubating.spec new file mode 100644 index 00000000000..d13c0a7b77a --- /dev/null +++ b/devops/build/packaging/rpm/apache-cloudberry-hll-incubating.spec @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +%global cloudberry_version %{?_cloudberry_version}%{!?_cloudberry_version:1.6} +%global cloudberry_install_dir /usr/local/cloudberry-db + +Name: apache-cloudberry-hll-incubating +Version: 2.18.0 +Release: %{?release}%{!?release:1} +Summary: HyperLogLog extension for Cloudberry Database %{cloudberry_version} +License: ASL 2.0 +URL: https://github.com/citusdata/postgresql-hll +Vendor: Apache Cloudberry (incubating) +Group: Applications/Databases +BuildArch: x86_64 +Requires: apache-cloudberry-db-incubating >= %{cloudberry_version} +Prefix: %{cloudberry_install_dir} + +%description +HLL is an open-source PostgreSQL extension (compatible with Apache +Cloudberry (incubating) %{cloudberry_version}) adding HyperLogLog data +structures as a native data type. HyperLogLog is a fixed-size, +set-like structure used for distinct value counting with tunable +precision. + +%prep +# No prep needed for binary RPM + +%build +# No build needed for binary RPM + +%install +mkdir -p %{buildroot}%{prefix}/lib/postgresql \ + %{buildroot}%{prefix}/share/postgresql/extension + +cp -R %{cloudberry_install_dir}/lib/postgresql/hll.so \ + %{buildroot}%{prefix}/lib/postgresql/hll.so + +cp -R %{cloudberry_install_dir}/share/postgresql/extension/hll* \ + %{buildroot}%{prefix}/share/postgresql/extension + +%files +%{prefix}/lib/postgresql/hll.so +%{prefix}/share/postgresql/extension/hll--*.sql +%{prefix}/share/postgresql/extension/hll.control + +%post +echo "HLL extension for Cloudberry Database %{cloudberry_version} has been installed in %{prefix}." +echo "To enable it in a database, run:" +echo " CREATE EXTENSION hll;" + +%postun +echo "HLL extension for Cloudberry Database %{cloudberry_version} has been removed from %{prefix}." +echo "You may need to manually clean up any database objects that were using the extension." diff --git a/devops/build/packaging/rpm/apache-cloudberry-pgvector-incubating.spec b/devops/build/packaging/rpm/apache-cloudberry-pgvector-incubating.spec new file mode 100644 index 00000000000..6b0cbd517e1 --- /dev/null +++ b/devops/build/packaging/rpm/apache-cloudberry-pgvector-incubating.spec @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +%global cloudberry_version %{?_cloudberry_version}%{!?_cloudberry_version:1.6} +%global cloudberry_install_dir /usr/local/cloudberry-db +%global pgvector_version %{?_pgvector_version}%{!?_pgvector_version:0.5.1} + +Name: cloudberry-pgvector +Version: %{pgvector_version} +Release: %{?release}%{!?release:1} +Summary: pgvector extension for Cloudberry Database %{cloudberry_version} +License: PostgreSQL +URL: https://github.com/pgvector/pgvector +Vendor: Cloudberry Open Source +Group: Applications/Databases +BuildArch: x86_64 +Requires: cloudberry-db >= %{cloudberry_version} +Prefix: %{cloudberry_install_dir} + +%description +pgvector is an open-source vector similarity search extension for +PostgreSQL and Cloudberry Database %{cloudberry_version}. It provides +vector data types and vector similarity search functions, allowing for +efficient similarity search operations on high-dimensional data. + +%prep +# No prep needed for binary RPM + +%build +# No build needed for binary RPM + +%install +mkdir -p %{buildroot}%{prefix}/include/postgresql/server/extension/vector \ + %{buildroot}%{prefix}/lib/postgresql \ + %{buildroot}%{prefix}/share/postgresql/extension +cp -R %{cloudberry_install_dir}/include/postgresql/server/extension/vector/* \ + %{buildroot}%{prefix}/include/postgresql/server/extension/vector +cp -R %{cloudberry_install_dir}/lib/postgresql/vector.so \ + %{buildroot}%{prefix}/lib/postgresql/vector.so +cp -R %{cloudberry_install_dir}/share/postgresql/extension/vector* \ + %{buildroot}%{prefix}/share/postgresql/extension + +%files +%{prefix}/include/postgresql/server/extension/vector/* +%{prefix}/lib/postgresql/vector.so +%{prefix}/share/postgresql/extension/vector--*.sql +%{prefix}/share/postgresql/extension/vector.control + +%post +echo "pgvector extension version %{version} for Cloudberry Database %{cloudberry_version} has been installed in %{prefix}." +echo "To enable it in a database, run:" +echo " CREATE EXTENSION vector;" + +%postun +echo "pgvector extension version %{version} for Cloudberry Database %{cloudberry_version} has been removed from %{prefix}." +echo "You may need to manually clean up any database objects that were using the extension." diff --git a/devops/build/packaging/rpm/build-rpm.sh b/devops/build/packaging/rpm/build-rpm.sh new file mode 100755 index 00000000000..ceb7d18d392 --- /dev/null +++ b/devops/build/packaging/rpm/build-rpm.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Script Name: build-rpm.sh +# +# Description: +# This script automates the process of building an RPM package using a specified +# version and release number. It ensures that the necessary tools are installed +# and that the spec file exists before attempting to build the RPM. The script +# also includes error handling to provide meaningful feedback in case of failure. +# +# Usage: +# ./build-rpm.sh -v [-r ] [-d|--with-debug] [-h] [--dry-run] +# +# Options: +# -v, --version : Specify the version (required) +# -r, --release : Specify the release (optional, default is 1) +# -d, --with-debug : Build with debug symbols (optional) +# -h, --help : Display this help and exit +# -n, --dry-run : Show what would be done, without making any changes +# +# Example: +# ./build-rpm.sh -v 1.5.5 -r 2 # Build with version 1.5.5 and release 2 +# ./build-rpm.sh -v 1.5.5 # Build with version 1.5.5 and default release 1 +# ./build-rpm.sh -v 1.5.5 --with-debug # Build with debug symbols +# +# Prerequisites: +# - The rpm-build package must be installed (provides the rpmbuild command). +# - The spec file must exist at ~/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec. +# +# Error Handling: +# The script includes checks to ensure: +# - The version option (-v or --version) is provided. +# - The necessary commands are available. +# - The spec file exists at the specified location. +# If any of these checks fail, the script exits with an appropriate error message. + +# Enable strict mode for better error handling +set -euo pipefail + +# Default values +VERSION="" +RELEASE="1" +DEBUG_BUILD=false + +# Function to display usage information +usage() { + echo "Usage: $0 -v [-r ] [-h] [--dry-run]" + echo " -v, --version : Specify the version (required)" + echo " -r, --release : Specify the release (optional, default is 1)" + echo " -d, --with-debug : Build with debug symbols (optional)" + echo " -h, --help : Display this help and exit" + echo " -n, --dry-run : Show what would be done, without making any changes" + exit 1 +} + +# Function to check if required commands are available +check_commands() { + local cmds=("rpmbuild") + for cmd in "${cmds[@]}"; do + if ! command -v "$cmd" &> /dev/null; then + echo "Error: Required command '$cmd' not found. Please install it before running the script." + exit 1 + fi + done +} + +# Parse options +while [[ "$#" -gt 0 ]]; do + case $1 in + -v|--version) + VERSION="$2" + shift 2 + ;; + -r|--release) + RELEASE="$2" + shift 2 + ;; + -d|--with-debug) + DEBUG_BUILD=true + shift + ;; + -h|--help) + usage + ;; + -n|--dry-run) + DRY_RUN=true + shift + ;; + *) + echo "Unknown option: ($1)" + shift + ;; + esac +done + +# Ensure version is provided +if [ -z "$VERSION" ]; then + echo "Error: Version (-v or --version) is required." + usage +fi + +# Check if required commands are available +check_commands + +# Define the spec file path +SPEC_FILE=~/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec + +# Check if the spec file exists +if [ ! -f "$SPEC_FILE" ]; then + echo "Error: Spec file not found at $SPEC_FILE." + exit 1 +fi + +# Build the rpmbuild command based on options +RPMBUILD_CMD="rpmbuild -bb \"$SPEC_FILE\" --define \"version $VERSION\" --define \"release $RELEASE\"" +if [ "$DEBUG_BUILD" = true ]; then + RPMBUILD_CMD+=" --with debug" +fi + +# Dry-run mode +if [ "${DRY_RUN:-false}" = true ]; then + echo "Dry-run mode: This is what would be done:" + echo " $RPMBUILD_CMD" + exit 0 +fi + +# Run rpmbuild with the provided options +echo "Building RPM with Version: $VERSION, Release: $RELEASE$([ "$DEBUG_BUILD" = true ] && echo ", Debug: enabled")..." +if ! eval "$RPMBUILD_CMD"; then + echo "Error: rpmbuild failed." + exit 1 +fi + +# Print completion message +echo "RPM build completed successfully with Version: $VERSION, Release: $RELEASE" diff --git a/devops/build/packaging/rpm/cloudberry-dev-repo.spec b/devops/build/packaging/rpm/cloudberry-dev-repo.spec new file mode 100644 index 00000000000..d2a06defda7 --- /dev/null +++ b/devops/build/packaging/rpm/cloudberry-dev-repo.spec @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +Name: cloudberry-dev-repo +Version: 1.0 +Release: 1%{?dist} +Summary: Cloudberry Database Repository Configuration +License: ASL 2.0 +Group: Applications/Databases +URL: https://cloudberrydb.org +Vendor: Cloudberry Open Source +BuildArch: noarch + +%description +This package configures the Cloudberry Database repository on your +system. Cloudberry Database is an open-source project aimed at +providing a scalable, high-performance SQL database for +analytics. This repository provides access to the latest RPM packages +for Cloudberry Database, allowing you to easily install and stay +up-to-date with the latest developments. + +%install +mkdir -p %{buildroot}%{_sysconfdir}/yum.repos.d/ +cat > %{buildroot}%{_sysconfdir}/yum.repos.d/cloudberry-dev.repo < /dev/null + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Create and configure the 'gpadmin' user with sudo privileges. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cbdb/cloudberry-env.sh ]; then\n source /usr/local/cbdb/cloudberry-env.sh\nfi' >> /home/gpadmin/.bashrc && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf && \ + dnf clean all # Final cleanup to remove unnecessary files + +# Install testinfra via pip +RUN pip3 install pytest-testinfra + +# Example: Copying test files into the container +COPY tests /tests + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. The container also mounts the +# /sys/fs/cgroup volume for systemd compatibility. +# -------------------------------------------------------------------- +USER gpadmin + +VOLUME [ "/sys/fs/cgroup" ] +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/build/rocky8/configs/90-cbdb-limits b/devops/deploy/docker/build/rocky8/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/build/rocky8/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/build/rocky8/configs/gpinitsystem.conf b/devops/deploy/docker/build/rocky8/configs/gpinitsystem.conf new file mode 100644 index 00000000000..3c0fb48b58c --- /dev/null +++ b/devops/deploy/docker/build/rocky8/configs/gpinitsystem.conf @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# gpinitsystem Configuration File for Apache Cloudberry +# -------------------------------------------------------------------- +# This configuration file is used to initialize an Apache Cloudberry +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# -------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# -------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# -------------------------------------------------------------------- diff --git a/devops/deploy/docker/build/rocky8/configs/init_system.sh b/devops/deploy/docker/build/rocky8/configs/init_system.sh new file mode 100755 index 00000000000..cc2d5991b9d --- /dev/null +++ b/devops/deploy/docker/build/rocky8/configs/init_system.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# -------------------------------------------------------------------- +# Container Initialization Script +# -------------------------------------------------------------------- +# This script sets up the environment inside the Docker container for +# the Apache Cloudberry Build Environment. It performs the following +# tasks: +# +# 1. Verifies that the container is running with the expected hostname. +# 2. Starts the SSH daemon to allow SSH access to the container. +# 3. Configures passwordless SSH access for the 'gpadmin' user. +# 4. Displays a welcome banner and system information. +# 5. Starts an interactive bash shell. +# +# This script is intended to be used as an entrypoint or initialization +# script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +cat <<-EOF +Welcome to the Apache Cloudberry Build Environment! + +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/deploy/docker/build/rocky8/tests/requirements.txt b/devops/deploy/docker/build/rocky8/tests/requirements.txt new file mode 100644 index 00000000000..b9711eddac5 --- /dev/null +++ b/devops/deploy/docker/build/rocky8/tests/requirements.txt @@ -0,0 +1,3 @@ +testinfra +pytest-testinfra +paramiko diff --git a/devops/deploy/docker/build/rocky8/tests/testinfra/test_cloudberry_db_env.py b/devops/deploy/docker/build/rocky8/tests/testinfra/test_cloudberry_db_env.py new file mode 100644 index 00000000000..c484c5b9408 --- /dev/null +++ b/devops/deploy/docker/build/rocky8/tests/testinfra/test_cloudberry_db_env.py @@ -0,0 +1,126 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +import testinfra + +def test_installed_packages(host): + """ + Test if the essential packages are installed. + """ + packages = [ + "epel-release", + "git", + "the_silver_searcher", + "htop", + "bison", + "gcc", + "gcc-c++", + "glibc-langpack-en", + "glibc-locale-source", + "openssh-clients", + "openssh-server", + "sudo", + "rsync", + "wget", + "openssl-devel", + "python36-devel", + "readline-devel", + "zlib-devel", + "libcurl-devel", + "libevent-devel", + "libxml2-devel", + "libuuid-devel", + "libzstd-devel", + "lz4", + "openldap-devel", + "libuv-devel", + "libyaml-devel" + ] + for package in packages: + pkg = host.package(package) + assert pkg.is_installed + + +def test_user_gpadmin_exists(host): + """ + Test if the gpadmin user exists and is configured properly. + """ + user = host.user("gpadmin") + assert user.exists + assert "wheel" in user.groups + + +def test_ssh_service(host): + """ + Test if SSH service is configured correctly. + """ + sshd_config = host.file("/etc/ssh/sshd_config") + assert sshd_config.exists + + +def test_locale_configured(host): + """ + Test if the locale is configured correctly. + """ + locale_conf = host.file("/etc/locale.conf") + assert locale_conf.exists + assert locale_conf.contains("LANG=en_US.UTF-8") + + +def test_timezone(host): + """ + Test if the timezone is configured correctly. + """ + localtime = host.file("/etc/localtime") + assert localtime.exists + + +def test_system_limits_configured(host): + """ + Test if the custom system limits are applied. + """ + limits_file = host.file("/etc/security/limits.d/90-cbdb-limits") + assert limits_file.exists + + +def test_init_system_script(host): + """ + Test if the init_system.sh script is present and executable. + """ + script = host.file("/tmp/init_system.sh") + assert script.exists + assert script.mode == 0o777 + + +def test_custom_configuration_files(host): + """ + Test if custom configuration files are correctly copied. + """ + config_file = host.file("/tmp/90-cbdb-limits") + assert config_file.exists + + +def test_locale_generated(host): + """ + Test if the en_US.UTF-8 locale is correctly generated. + """ + locale = host.run("locale -a | grep en_US.utf8") + assert locale.exit_status == 0 + assert "en_US.utf8" in locale.stdout diff --git a/devops/deploy/docker/build/rocky9/Dockerfile b/devops/deploy/docker/build/rocky9/Dockerfile new file mode 100644 index 00000000000..1da729a1969 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/Dockerfile @@ -0,0 +1,215 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Apache Cloudberry (incubating) is an effort undergoing incubation at +# the Apache Software Foundation (ASF), sponsored by the Apache +# Incubator PMC. +# +# Incubation is required of all newly accepted projects until a +# further review indicates that the infrastructure, communications, +# and decision making process have stabilized in a manner consistent +# with other successful ASF projects. +# +# While incubation status is not necessarily a reflection of the +# completeness or stability of the code, it does indicate that the +# project has yet to be fully endorsed by the ASF. +# +# -------------------------------------------------------------------- +# Dockerfile for Apache Cloudberry Build Environment +# -------------------------------------------------------------------- +# This Dockerfile sets up a Rocky Linux 9-based container for building +# and developing Apache Cloudberry. It installs necessary system +# utilities, development tools, and configures the environment for SSH +# access and systemd support. +# +# Key Features: +# - Locale setup for en_US.UTF-8 +# - SSH daemon setup for remote access +# - Essential development tools and libraries installation +# - User configuration for 'gpadmin' with sudo privileges +# +# Usage: +# docker build -t cloudberry-db-env . +# docker run -h cdw -it cloudberry-db-env +# -------------------------------------------------------------------- + +# Base image: Rocky Linux 9 +FROM rockylinux/rockylinux:9 + +# Argument for configuring the timezone +ARG TIMEZONE_VAR="America/Los_Angeles" + +# Environment variables for locale and user +ENV container=docker +ENV LANG=en_US.UTF-8 +ENV USER=gpadmin + +# -------------------------------------------------------------------- +# Install Development Tools and Utilities +# -------------------------------------------------------------------- +# Install various development tools, system utilities, and libraries +# required for building and running Apache Cloudberry. +# - EPEL repository is enabled for additional packages. +# - Cleanup steps are added to reduce image size after installation. +# -------------------------------------------------------------------- +RUN dnf makecache && \ + dnf install -y \ + epel-release \ + git && \ + dnf config-manager --disable epel-cisco-openh264 && \ + dnf makecache && \ + dnf config-manager --disable epel && \ + dnf install -y --enablerepo=epel \ + the_silver_searcher \ + bat \ + htop && \ + dnf install -y \ + bison \ + cmake3 \ + ed \ + file \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + glibc-langpack-en \ + glibc-locale-source \ + initscripts \ + iproute \ + less \ + lsof \ + m4 \ + net-tools \ + openssh-clients \ + openssh-server \ + perl \ + rpm-build \ + rpmdevtools \ + rsync \ + sudo \ + tar \ + unzip \ + util-linux-ng \ + wget \ + sshpass \ + which && \ + dnf install -y \ + apr-devel \ + bzip2-devel \ + java-11-openjdk \ + java-11-openjdk-devel \ + krb5-devel \ + libcurl-devel \ + libevent-devel \ + libxml2-devel \ + libuuid-devel \ + libzstd-devel \ + lz4 \ + lz4-devel \ + openldap-devel \ + openssl-devel \ + pam-devel \ + perl-ExtUtils-Embed \ + perl-Test-Simple \ + perl-core \ + python3-devel \ + python3-pytest \ + readline-devel \ + zlib-devel && \ + dnf install -y --enablerepo=crb \ + libuv-devel \ + libyaml-devel \ + perl-IPC-Run \ + protobuf-devel && \ + dnf clean all && \ + cd && XERCES_LATEST_RELEASE=3.3.0 && \ + wget -nv "https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + echo "$(curl -sL https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz.sha256)" | sha256sum -c - && \ + tar xf "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz"; rm "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + cd xerces-c-${XERCES_LATEST_RELEASE} && \ + ./configure --prefix=/usr/local/xerces-c && \ + make -j$(nproc) && \ + make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ + rm -rf ~/xerces-c* && \ + cd && GO_VERSION="go1.23.4" && \ + ARCH=$(uname -m) && \ + if [ "${ARCH}" = "aarch64" ]; then \ + GO_ARCH="arm64" && \ + GO_SHA256="16e5017863a7f6071363782b1b8042eb12c6ca4f4cd71528b2123f0a1275b13e"; \ + elif [ "${ARCH}" = "x86_64" ]; then \ + GO_ARCH="amd64" && \ + GO_SHA256="6924efde5de86fe277676e929dc9917d466efa02fb934197bc2eba35d5680971"; \ + else \ + echo "Unsupported architecture: ${ARCH}" && exit 1; \ + fi && \ + GO_URL="https://go.dev/dl/${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + wget -nv "${GO_URL}" && \ + echo "${GO_SHA256} ${GO_VERSION}.linux-${GO_ARCH}.tar.gz" | sha256sum -c - && \ + tar xf "${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + mv go "/usr/local/${GO_VERSION}" && \ + ln -s "/usr/local/${GO_VERSION}" /usr/local/go && \ + rm -f "${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + echo 'export PATH=$PATH:/usr/local/go/bin' | tee -a /etc/profile.d/go.sh > /dev/null + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Create and configure the 'gpadmin' user with sudo privileges. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- + +# Copy configuration files from their respective locations +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cbdb/cloudberry-env.sh ]; then\n source /usr/local/cbdb/cloudberry-env.sh\nfi' >> /home/gpadmin/.bashrc && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf && \ + dnf clean all # Final cleanup to remove unnecessary files + +# Install testinfra via pip +RUN pip3 install pytest-testinfra + +# Copying test files into the container +COPY ./tests /tests + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. The container also mounts the +# /sys/fs/cgroup volume for systemd compatibility. +# -------------------------------------------------------------------- +USER gpadmin + +VOLUME [ "/sys/fs/cgroup" ] +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/build/rocky9/configs/90-cbdb-limits b/devops/deploy/docker/build/rocky9/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/build/rocky9/configs/gpinitsystem.conf b/devops/deploy/docker/build/rocky9/configs/gpinitsystem.conf new file mode 100644 index 00000000000..d4d312231c5 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/configs/gpinitsystem.conf @@ -0,0 +1,89 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# gpinitsystem Configuration File for Apache Cloudberry +# -------------------------------------------------------------------- +# This configuration file is used to initialize an Apache Cloudberry +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# -------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# -------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# -------------------------------------------------------------------- diff --git a/devops/deploy/docker/build/rocky9/configs/init_system.sh b/devops/deploy/docker/build/rocky9/configs/init_system.sh new file mode 100755 index 00000000000..d8c4a00b035 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/configs/init_system.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +## Container Initialization Script +# -------------------------------------------------------------------- +## This script sets up the environment inside the Docker container for +## the Apache Cloudberry Build Environment. It performs the following +## tasks: +## +## 1. Verifies that the container is running with the expected hostname. +## 2. Starts the SSH daemon to allow SSH access to the container. +## 3. Configures passwordless SSH access for the 'gpadmin' user. +## 4. Displays a welcome banner and system information. +## 5. Starts an interactive bash shell. +## +## This script is intended to be used as an entrypoint or initialization +## script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +cat <<-EOF +Welcome to the Apache Cloudberry Build Environment! + +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/deploy/docker/build/rocky9/tests/requirements.txt b/devops/deploy/docker/build/rocky9/tests/requirements.txt new file mode 100644 index 00000000000..b9711eddac5 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/tests/requirements.txt @@ -0,0 +1,3 @@ +testinfra +pytest-testinfra +paramiko diff --git a/devops/deploy/docker/build/rocky9/tests/testinfra/test_cloudberry_db_env.py b/devops/deploy/docker/build/rocky9/tests/testinfra/test_cloudberry_db_env.py new file mode 100644 index 00000000000..9da7929ff98 --- /dev/null +++ b/devops/deploy/docker/build/rocky9/tests/testinfra/test_cloudberry_db_env.py @@ -0,0 +1,129 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +import testinfra + +def test_installed_packages(host): + """ + Test if the essential packages are installed. + """ + packages = [ + "epel-release", + "git", + "the_silver_searcher", + "bat", + "htop", + "bison", + "cmake", + "gcc", + "gcc-c++", + "glibc-langpack-en", + "glibc-locale-source", + "openssh-clients", + "openssh-server", + "sudo", + "rsync", + "wget", + "openssl-devel", + "python3-devel", + "python3-pytest", + "readline-devel", + "zlib-devel", + "libcurl-devel", + "libevent-devel", + "libxml2-devel", + "libuuid-devel", + "libzstd-devel", + "lz4", + "openldap-devel", + "libuv-devel", + "libyaml-devel" + ] + for package in packages: + pkg = host.package(package) + assert pkg.is_installed + + +def test_user_gpadmin_exists(host): + """ + Test if the gpadmin user exists and is configured properly. + """ + user = host.user("gpadmin") + assert user.exists + assert "wheel" in user.groups + + +def test_ssh_service(host): + """ + Test if SSH service is configured correctly. + """ + sshd_config = host.file("/etc/ssh/sshd_config") + assert sshd_config.exists + + +def test_locale_configured(host): + """ + Test if the locale is configured correctly. + """ + locale_conf = host.file("/etc/locale.conf") + assert locale_conf.exists + assert locale_conf.contains("LANG=en_US.UTF-8") + + +def test_timezone(host): + """ + Test if the timezone is configured correctly. + """ + localtime = host.file("/etc/localtime") + assert localtime.exists + + +def test_system_limits_configured(host): + """ + Test if the custom system limits are applied. + """ + limits_file = host.file("/etc/security/limits.d/90-cbdb-limits") + assert limits_file.exists + + +def test_init_system_script(host): + """ + Test if the init_system.sh script is present and executable. + """ + script = host.file("/tmp/init_system.sh") + assert script.exists + assert script.mode == 0o777 + + +def test_custom_configuration_files(host): + """ + Test if custom configuration files are correctly copied. + """ + config_file = host.file("/tmp/90-cbdb-limits") + assert config_file.exists + + +def test_locale_generated(host): + """ + Test if the en_US.UTF-8 locale is correctly generated. + """ + locale = host.run("locale -a | grep en_US.utf8") + assert locale.exit_status == 0 + assert "en_US.utf8" in locale.stdout diff --git a/devops/deploy/docker/test/rocky8/Dockerfile b/devops/deploy/docker/test/rocky8/Dockerfile new file mode 100644 index 00000000000..0d19026774d --- /dev/null +++ b/devops/deploy/docker/test/rocky8/Dockerfile @@ -0,0 +1,135 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Apache Cloudberry (incubating) is an effort undergoing incubation at +# the Apache Software Foundation (ASF), sponsored by the Apache +# Incubator PMC. +# +# Incubation is required of all newly accepted projects until a +# further review indicates that the infrastructure, communications, +# and decision making process have stabilized in a manner consistent +# with other successful ASF projects. +# +# While incubation status is not necessarily a reflection of the +# completeness or stability of the code, it does indicate that the +# project has yet to be fully endorsed by the ASF. +# +# -------------------------------------------------------------------- +# Dockerfile for Apache Cloudberry Base Environment +# -------------------------------------------------------------------- +# This Dockerfile sets up a Rocky Linux 8-based container to serve as +# a base environment for evaluating Apache Cloudberry. It installs +# necessary system utilities, configures the environment for SSH access, +# and sets up a 'gpadmin' user with sudo privileges. The Apache +# Cloudberry RPM can be installed into this container for testing and +# functional verification. +# +# Key Features: +# - Locale setup for en_US.UTF-8 +# - SSH daemon setup for remote access +# - Essential system utilities installation +# - Separate user creation and configuration steps +# +# Security Considerations: +# - This Dockerfile prioritizes ease of use for functional testing and +# evaluation. It includes configurations such as passwordless sudo access +# for the 'gpadmin' user and SSH access with password authentication. +# - These configurations are suitable for testing and development but +# should NOT be used in a production environment due to potential security +# risks. +# +# Usage: +# docker build -t cloudberry-db-base-env . +# docker run -h cdw -it cloudberry-db-base-env +# -------------------------------------------------------------------- + +# Base image: Rocky Linux 8 +FROM rockylinux/rockylinux:8 + +# Argument for configuring the timezone +ARG TIMEZONE_VAR="America/Los_Angeles" + +# Environment variables for locale +ENV LANG=en_US.UTF-8 + +# -------------------------------------------------------------------- +# System Update and Installation +# -------------------------------------------------------------------- +# Update the system and install essential system utilities required for +# running and testing Apache Cloudberry. Cleanup the DNF cache afterward +# to reduce the image size. +# -------------------------------------------------------------------- +RUN dnf install -y \ + file \ + gdb \ + glibc-locale-source \ + make \ + openssh \ + openssh-clients \ + openssh-server \ + procps-ng \ + sudo \ + which \ + && \ + dnf clean all # Clean up DNF cache after package installations + +# -------------------------------------------------------------------- +# User Creation and Configuration +# -------------------------------------------------------------------- +# - Create the 'gpadmin' user and group. +# - Configure the 'gpadmin' user with passwordless sudo privileges. +# - Add Cloudberry-specific entries to the gpadmin's .bashrc. +# -------------------------------------------------------------------- +RUN /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry/cloudberry-env.sh ]; then\n source /usr/local/cloudberry/cloudberry-env.sh\n export COORDINATOR_DATA_DIRECTORY=/data1/coordinator/gpseg-1\nfi' >> /home/gpadmin/.bashrc + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. This container serves as a base +# environment, and the Apache Cloudberry RPM can be installed for +# testing and functional verification. +# -------------------------------------------------------------------- +USER gpadmin + +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/test/rocky8/configs/90-cbdb-limits b/devops/deploy/docker/test/rocky8/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/test/rocky8/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/test/rocky8/configs/gpinitsystem.conf b/devops/deploy/docker/test/rocky8/configs/gpinitsystem.conf new file mode 100644 index 00000000000..4a5f82b668d --- /dev/null +++ b/devops/deploy/docker/test/rocky8/configs/gpinitsystem.conf @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# gpinitsystem Configuration File for Cloudberry Database +# -------------------------------------------------------------------- +# This configuration file is used to initialize a Cloudberry Database +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# -------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# -------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# -------------------------------------------------------------------- diff --git a/devops/deploy/docker/test/rocky8/configs/init_system.sh b/devops/deploy/docker/test/rocky8/configs/init_system.sh new file mode 100755 index 00000000000..3ea7e34b0ff --- /dev/null +++ b/devops/deploy/docker/test/rocky8/configs/init_system.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Container Initialization Script +# -------------------------------------------------------------------- +# This script sets up the environment inside the Docker container for +# the Apache Cloudberry Build Environment. It performs the following +# tasks: +# +# 1. Verifies that the container is running with the expected hostname. +# 2. Starts the SSH daemon to allow SSH access to the container. +# 3. Configures passwordless SSH access for the 'gpadmin' user. +# 4. Sets up the necessary directories and configuration files for +# Apache Cloudberry. +# 5. Displays a welcome banner and system information. +# 6. Starts an interactive bash shell. +# +# This script is intended to be used as an entrypoint or initialization +# script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# -------------------------------------------------------------------- +# Cloudberry Data Directories Setup +# -------------------------------------------------------------------- +# The script sets up the necessary directories for Apache Cloudberry, +# including directories for the coordinator, standby coordinator, primary +# segments, and mirror segments. It also sets up the configuration files +# required for initializing the database. +# -------------------------------------------------------------------- +sudo rm -rf /data1/* +sudo mkdir -p /data1/coordinator /data1/standby_coordinator /data1/primary /data1/mirror +sudo chown -R gpadmin.gpadmin /data1 + +# Copy the gpinitsystem configuration file to the home directory +cp /tmp/gpinitsystem.conf /home/gpadmin + +# Set up the hostfile for cluster initialization +echo $(hostname) > /home/gpadmin/hostfile_gpinitsystem + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# - Cloudberry version (if installed) +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +# Check if Apache Cloudberry is installed and display its version +if rpm -q apache-cloudberry-db-incubating > /dev/null 2>&1; then + CBDB_VERSION=$(/usr/local/cbdb/bin/postgres --gp-version) +else + CBDB_VERSION="Not installed" +fi + +cat <<-EOF +Welcome to the Apache Cloudberry Test Environment! + +Cloudberry version .. : $CBDB_VERSION +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/deploy/docker/test/rocky9/Dockerfile b/devops/deploy/docker/test/rocky9/Dockerfile new file mode 100644 index 00000000000..245cf91d6a6 --- /dev/null +++ b/devops/deploy/docker/test/rocky9/Dockerfile @@ -0,0 +1,135 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Apache Cloudberry (incubating) is an effort undergoing incubation at +# the Apache Software Foundation (ASF), sponsored by the Apache +# Incubator PMC. +# +# Incubation is required of all newly accepted projects until a +# further review indicates that the infrastructure, communications, +# and decision making process have stabilized in a manner consistent +# with other successful ASF projects. +# +# While incubation status is not necessarily a reflection of the +# completeness or stability of the code, it does indicate that the +# project has yet to be fully endorsed by the ASF. +# +# -------------------------------------------------------------------- +# Dockerfile for Cloudberry Database Base Environment +# -------------------------------------------------------------------- +# This Dockerfile sets up a Rocky Linux 9-based container to serve as +# a base environment for evaluating the Cloudberry Database. It installs +# necessary system utilities, configures the environment for SSH access, +# and sets up a 'gpadmin' user with sudo privileges. The Cloudberry +# Database RPM can be installed into this container for testing and +# functional verification. +# +# Key Features: +# - Locale setup for en_US.UTF-8 +# - SSH daemon setup for remote access +# - Essential system utilities installation +# - Separate user creation and configuration steps +# +# Security Considerations: +# - This Dockerfile prioritizes ease of use for functional testing and +# evaluation. It includes configurations such as passwordless sudo access +# for the 'gpadmin' user and SSH access with password authentication. +# - These configurations are suitable for testing and development but +# should NOT be used in a production environment due to potential security +# risks. +# +# Usage: +# docker build -t cloudberry-db-base-env . +# docker run -h cdw -it cloudberry-db-base-env +# -------------------------------------------------------------------- + +# Base image: Rocky Linux 9 +FROM rockylinux/rockylinux:9 + +# Argument for configuring the timezone +ARG TIMEZONE_VAR="America/Los_Angeles" + +# Environment variables for locale +ENV LANG=en_US.UTF-8 + +# -------------------------------------------------------------------- +# System Update and Installation +# -------------------------------------------------------------------- +# Update the system and install essential system utilities required for +# running and testing Cloudberry Database. Cleanup the DNF cache afterward +# to reduce the image size. +# -------------------------------------------------------------------- +RUN dnf install -y \ + file \ + gdb \ + glibc-locale-source \ + make \ + openssh \ + openssh-clients \ + openssh-server \ + procps-ng \ + sudo \ + which \ + && \ + dnf clean all # Clean up DNF cache after package installations + +# -------------------------------------------------------------------- +# User Creation and Configuration +# -------------------------------------------------------------------- +# - Create the 'gpadmin' user and group. +# - Configure the 'gpadmin' user with passwordless sudo privileges. +# - Add Cloudberry-specific entries to the gpadmin's .bashrc. +# -------------------------------------------------------------------- +RUN /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry/cloudberry-env.sh ]; then\n source /usr/local/cloudberry/cloudberry-env.sh\n export COORDINATOR_DATA_DIRECTORY=/data1/coordinator/gpseg-1\nfi' >> /home/gpadmin/.bashrc + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. This container serves as a base +# environment, and the Cloudberry Database RPM can be installed for +# testing and functional verification. +# -------------------------------------------------------------------- +USER gpadmin + +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/test/rocky9/configs/90-cbdb-limits b/devops/deploy/docker/test/rocky9/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/test/rocky9/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/test/rocky9/configs/gpinitsystem.conf b/devops/deploy/docker/test/rocky9/configs/gpinitsystem.conf new file mode 100644 index 00000000000..896c8c79e54 --- /dev/null +++ b/devops/deploy/docker/test/rocky9/configs/gpinitsystem.conf @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# ---------------------------------------------------------------------- +# gpinitsystem Configuration File for Cloudberry Database +# ---------------------------------------------------------------------- +# This configuration file is used to initialize a Cloudberry Database +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# ---------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# ---------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# ---------------------------------------------------------------------- diff --git a/devops/deploy/docker/test/rocky9/configs/init_system.sh b/devops/deploy/docker/test/rocky9/configs/init_system.sh new file mode 100755 index 00000000000..3ea7e34b0ff --- /dev/null +++ b/devops/deploy/docker/test/rocky9/configs/init_system.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Container Initialization Script +# -------------------------------------------------------------------- +# This script sets up the environment inside the Docker container for +# the Apache Cloudberry Build Environment. It performs the following +# tasks: +# +# 1. Verifies that the container is running with the expected hostname. +# 2. Starts the SSH daemon to allow SSH access to the container. +# 3. Configures passwordless SSH access for the 'gpadmin' user. +# 4. Sets up the necessary directories and configuration files for +# Apache Cloudberry. +# 5. Displays a welcome banner and system information. +# 6. Starts an interactive bash shell. +# +# This script is intended to be used as an entrypoint or initialization +# script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# -------------------------------------------------------------------- +# Cloudberry Data Directories Setup +# -------------------------------------------------------------------- +# The script sets up the necessary directories for Apache Cloudberry, +# including directories for the coordinator, standby coordinator, primary +# segments, and mirror segments. It also sets up the configuration files +# required for initializing the database. +# -------------------------------------------------------------------- +sudo rm -rf /data1/* +sudo mkdir -p /data1/coordinator /data1/standby_coordinator /data1/primary /data1/mirror +sudo chown -R gpadmin.gpadmin /data1 + +# Copy the gpinitsystem configuration file to the home directory +cp /tmp/gpinitsystem.conf /home/gpadmin + +# Set up the hostfile for cluster initialization +echo $(hostname) > /home/gpadmin/hostfile_gpinitsystem + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# - Cloudberry version (if installed) +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +# Check if Apache Cloudberry is installed and display its version +if rpm -q apache-cloudberry-db-incubating > /dev/null 2>&1; then + CBDB_VERSION=$(/usr/local/cbdb/bin/postgres --gp-version) +else + CBDB_VERSION="Not installed" +fi + +cat <<-EOF +Welcome to the Apache Cloudberry Test Environment! + +Cloudberry version .. : $CBDB_VERSION +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/release/cloudberry-release.sh b/devops/release/cloudberry-release.sh new file mode 100755 index 00000000000..5fd579b481e --- /dev/null +++ b/devops/release/cloudberry-release.sh @@ -0,0 +1,496 @@ +#!/usr/bin/env bash +# ====================================================================== +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ====================================================================== +# +# cloudberry-release.sh — Apache Cloudberry (Incubating) release utility +# +# This script automates the preparation of an Apache Cloudberry release +# candidate, including version validation, tag creation, and source +# tarball assembly. +# +# Supported Features: +# - Validates version consistency across configure.ac, configure, gpversion.py, and pom.xml +# - Supports both final releases and release candidates (e.g., 2.0.0-incubating, 2.0.0-incubating-rc1) +# - Optionally reuses existing annotated Git tags if they match the current HEAD +# - Verifies that Git submodules are initialized (if defined in .gitmodules) +# - Verifies Git identity (user.name and user.email) prior to tagging +# - Creates a BUILD_NUMBER file (currently hardcoded as 1) in the release tarball +# - Recursively archives all submodules into the source tarball +# - Generates SHA-512 checksum (.sha512) for the source tarball +# - Generates GPG signature (.asc) for the source tarball, unless --skip-signing is used +# - Moves signed artifacts into a dedicated artifacts/ directory +# - Verifies integrity and authenticity of artifacts via SHA-512 checksum and GPG signature +# - Allows skipping of upstream remote URL validation (e.g., for forks) via --skip-remote-check +# +# Usage: +# ./cloudberry-release.sh --stage --tag 2.0.0-incubating-rc1 --gpg-user your@apache.org +# +# Options: +# -s, --stage Stage a release candidate and generate source tarball +# -t, --tag Tag to apply or validate (e.g., 2.0.0-incubating-rc1) +# -f, --force-tag-reuse Allow reuse of an existing tag (must match HEAD) +# -r, --repo Optional path to local Cloudberry Git repository +# -S, --skip-remote-check Skip validation of remote.origin.url (useful for forks/mirrors) +# -g, --gpg-user GPG key ID or email to use for signing (required) +# -k, --skip-signing Skip GPG key validation and signature generation +# -h, --help Show usage and exit +# +# Requirements: +# - Must be run from the root of a valid Apache Cloudberry Git clone, +# or the path must be explicitly provided using --repo +# - Git user.name and user.email must be configured +# - Repository remote must be: git@github.com:apache/cloudberry.git +# +# Examples: +# ./cloudberry-release.sh -s -t 2.0.0-incubating-rc1 --gpg-user your@apache.org +# ./cloudberry-release.sh -s -t 2.0.0-incubating-rc1 --skip-signing +# ./cloudberry-release.sh --stage --tag 2.0.0-incubating-rc2 --force-tag-reuse --gpg-user your@apache.org +# ./cloudberry-release.sh --stage --tag 2.0.0-incubating-rc1 -r ~/cloudberry --skip-remote-check --gpg-user your@apache.org +# +# Notes: +# - When reusing a tag, the `--force-tag-reuse` flag must be provided. +# - This script creates a BUILD_NUMBER file in the source root for traceability. It is included in the tarball. +# ====================================================================== + +set -euo pipefail + +confirm() { + read -r -p "$1 [y/N] " response + case "$response" in + [yY][eE][sS]|[yY]) true ;; + *) echo "Aborted."; exit 1 ;; + esac +} + +section() { + echo + echo "=================================================================" + echo ">> $1" + echo "=================================================================" +} + +show_help() { + echo "Apache Cloudberry (Incubating) Release Tool" + echo + echo "Usage:" + echo " $0 --stage --tag " + echo + echo "Options:" + echo " -s, --stage" + echo " Stage a release candidate and generate source tarball" + echo + echo " -t, --tag " + echo " Required with --stage (e.g., 2.0.0-incubating-rc1)" + echo + echo " -f, --force-tag-reuse" + echo " Reuse existing tag if it matches current HEAD" + echo + echo " -r, --repo " + echo " Optional path to a local Cloudberry Git repository clone" + echo + echo " -S, --skip-remote-check" + echo " Skip remote.origin.url check (use for forks or mirrors)" + echo " Required for official releases:" + echo " git@github.com:apache/cloudberry.git" + echo + echo " -g, --gpg-user " + echo " GPG key ID or email to use for signing (required unless --skip-signing)" + echo + echo " -k, --skip-signing" + echo " Skip GPG key validation and signature generation" + echo + echo " -h, --help" + echo " Show this help message" + exit 1 +} + +# Flags +STAGE=false +SKIP_SIGNING=false +TAG="" +FORCE_TAG_REUSE=false +REPO_ARG="" +SKIP_REMOTE_CHECK=false +GPG_USER="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + -g|--gpg-user) + if [[ $# -lt 2 ]]; then + echo "ERROR: --gpg-user requires an email." >&2 + show_help + fi + GPG_USER="$2" + shift 2 + ;; + -s|--stage) + STAGE=true + shift + ;; + -t|--tag) + if [[ $# -lt 2 ]]; then + echo "ERROR: Missing tag value after --tag" >&2 + show_help + fi + TAG="$2" + shift 2 + ;; + -f|--force-tag-reuse) + FORCE_TAG_REUSE=true + shift + ;; + -r|--repo) + if [[ $# -lt 2 ]]; then + echo "ERROR: --repo requires a path." >&2 + show_help + fi + REPO_ARG="$2" + shift 2 + ;; + -S|--skip-remote-check) + SKIP_REMOTE_CHECK=true + shift + ;; + -k|--skip-signing) + SKIP_SIGNING=true + shift + ;; + -h|--help) + show_help + ;; + *) + echo "ERROR: Unknown option: $1" >&2 + show_help + ;; + esac +done + +# GPG signing checks +if [[ "$SKIP_SIGNING" != true ]]; then + if [[ -z "$GPG_USER" ]]; then + echo "ERROR: --gpg-user is required for signing the release tarball." >&2 + show_help + fi + + if ! gpg --list-keys "$GPG_USER" > /dev/null 2>&1; then + echo "ERROR: GPG key '$GPG_USER' not found in your local keyring." >&2 + echo "Please import or generate the key before proceeding." >&2 + exit 1 + fi +else + echo "INFO: GPG signing has been intentionally skipped (--skip-signing)." +fi + +if [[ -n "$REPO_ARG" ]]; then + if [[ -n "$REPO_ARG" ]]; then + if [[ ! -d "$REPO_ARG" || ! -f "$REPO_ARG/configure.ac" ]]; then + echo "ERROR: '$REPO_ARG' does not appear to be a valid Cloudberry source directory." + echo "Expected to find a 'configure.ac' file but it is missing." + echo + echo "Hint: Make sure you passed the correct --repo path to a valid Git clone." + exit 1 + fi + cd "$REPO_ARG" + elif [[ ! -f configure.ac ]]; then + echo "ERROR: No Cloudberry source directory specified and no 'configure.ac' found in the current directory." + echo + echo "Hint: Either run this script from the root of a Cloudberry Git clone," + echo "or use the --repo option to specify the source directory." + exit 1 + fi + cd "$REPO_ARG" + + if [[ ! -d ".git" ]]; then + echo "ERROR: '$REPO_ARG' is not a valid Git repository." + exit 1 + fi + + if [[ "$SKIP_REMOTE_CHECK" != true ]]; then + REMOTE_URL=$(git config --get remote.origin.url || true) + if [[ "$REMOTE_URL" != "git@github.com:apache/cloudberry.git" ]]; then + echo "ERROR: remote.origin.url must be set to 'git@github.com:apache/cloudberry.git' for official releases." + echo " Found: '${REMOTE_URL:-}'" + echo + echo "This check ensures the release is being staged from the authoritative upstream repository." + echo "Use --skip-remote-check only if this is a fork or non-release automation." + exit 1 + fi + fi +fi + +# If --repo was not provided, ensure we are in a valid source directory +if [[ -z "$REPO_ARG" ]]; then + if [[ ! -f configure.ac || ! -f gpMgmt/bin/gppylib/gpversion.py || ! -f pom.xml ]]; then + echo "ERROR: You must run this script from the root of a valid Cloudberry Git clone" + echo " or pass the path using --repo ." + echo + echo "Missing one or more expected files:" + echo " - configure.ac" + echo " - gpMgmt/bin/gppylib/gpversion.py" + echo " - pom.xml" + exit 1 + fi +fi + +if ! $STAGE && [[ -z "$TAG" ]]; then + show_help +fi + +if $STAGE && [[ -z "$TAG" ]]; then + echo "ERROR: --tag (-t) is required when using --stage." >&2 + show_help +fi + +section "Validating Version Consistency" + +# Extract version from configure.ac +CONFIGURE_AC_VERSION=$(grep "^AC_INIT" configure.ac | sed -E "s/^AC_INIT\(\[[^]]+\], \[([^]]+)\].*/\1/") +CONFIGURE_AC_MAJOR=$(echo "$CONFIGURE_AC_VERSION" | cut -d. -f1) +EXPECTED="[$CONFIGURE_AC_MAJOR,99]" + +# Validate tag format +SEMVER_REGEX="^${CONFIGURE_AC_MAJOR}\\.[0-9]+\\.[0-9]+(-incubating(-rc[0-9]+)?)?$" +if ! [[ "$TAG" =~ $SEMVER_REGEX ]]; then + echo "ERROR: Tag '$TAG' does not match expected pattern for major version $CONFIGURE_AC_MAJOR (e.g., ${CONFIGURE_AC_MAJOR}.0.0-incubating or ${CONFIGURE_AC_MAJOR}.0.0-incubating-rc1)" + exit 1 +fi + +# Check gpversion.py consistency +PY_LINE=$(grep "^MAIN_VERSION" gpMgmt/bin/gppylib/gpversion.py | sed -E 's/#.*//' | tr -d '[:space:]') + +if [[ "$PY_LINE" != "MAIN_VERSION=$EXPECTED" ]]; then + echo "ERROR: gpversion.py MAIN_VERSION is $PY_LINE, but configure.ac suggests $EXPECTED" + echo "Please correct this mismatch before proceeding." + exit 1 +fi + +# For final releases (non-RC), ensure configure.ac version matches tag exactly +if [[ "$TAG" != *-rc* && "$CONFIGURE_AC_VERSION" != "$TAG" ]]; then + echo "ERROR: configure.ac version ($CONFIGURE_AC_VERSION) does not match final release tag ($TAG)" + echo "Please update configure.ac to match the tag before proceeding." + exit 1 +fi + +# Ensure the generated 'configure' script is up to date +CONFIGURE_VERSION_LINE=$(grep "^PACKAGE_VERSION=" configure || true) +CONFIGURE_VERSION=$(echo "$CONFIGURE_VERSION_LINE" | sed -E "s/^PACKAGE_VERSION='([^']+)'.*/\1/") + +if [[ "$CONFIGURE_VERSION" != "$TAG" ]]; then + echo "ERROR: Version in generated 'configure' script ($CONFIGURE_VERSION) does not match release tag ($TAG)." + echo "This likely means autoconf was not run after updating configure.ac." + exit 1 +fi + +# Ensure xmllint is available +if ! command -v xmllint >/dev/null 2>&1; then + echo "ERROR: xmllint is required but not installed." + exit 1 +fi + +# Extract version from pom.xml using xmllint with namespace stripping +POM_VERSION=$(xmllint --xpath '//*[local-name()="project"]/*[local-name()="version"]/text()' pom.xml 2>/dev/null || true) + +if [[ -z "$POM_VERSION" ]]; then + echo "ERROR: Could not extract from pom.xml" + exit 1 +fi + +if [[ "$POM_VERSION" != "$TAG" ]]; then + echo "ERROR: Version in pom.xml ($POM_VERSION) does not match release tag ($TAG)." + echo "Please update pom.xml before tagging." + exit 1 +fi + +# Ensure working tree is clean +if ! git diff-index --quiet HEAD --; then + echo "ERROR: Working tree is not clean. Please commit or stash changes before proceeding." + exit 1 +fi + +echo "MAIN_VERSION verified" +printf " %-14s: %s\n" "Release Tag" "$TAG" +printf " %-14s: %s\n" "configure.ac" "$CONFIGURE_AC_VERSION" +printf " %-14s: %s\n" "configure" "$CONFIGURE_VERSION" +printf " %-14s: %s\n" "pom.xml" "$POM_VERSION" +printf " %-14s: %s\n" "gpversion.py" "${EXPECTED//[\[\]]}" + +section "Checking the state of the Tag" + +# Check if the tag already exists before making any changes +if git rev-parse "$TAG" >/dev/null 2>&1; then + TAG_COMMIT=$(git rev-list -n 1 "$TAG") + HEAD_COMMIT=$(git rev-parse HEAD) + + if [[ "$TAG_COMMIT" == "$HEAD_COMMIT" && "$FORCE_TAG_REUSE" == true ]]; then + echo "INFO: Tag '$TAG' already exists and matches HEAD. Proceeding with reuse." + elif [[ "$FORCE_TAG_REUSE" == true ]]; then + echo "ERROR: --force-tag-reuse was specified but tag '$TAG' does not match HEAD." + echo " Tags must be immutable. Cannot continue." + exit 1 + else + echo "ERROR: Tag '$TAG' already exists and does not match HEAD." + echo " Use --force-tag-reuse only when HEAD matches the tag commit." + exit 1 + fi +elif [[ "$FORCE_TAG_REUSE" == true ]]; then + echo "ERROR: --force-tag-reuse was specified, but tag '$TAG' does not exist." + echo " You can only reuse a tag if it already exists." + exit 1 +else + echo "INFO: Tag '$TAG' does not yet exist. It will be created during staging." +fi + +# Check and display submodule initialization status +if [ -s .gitmodules ]; then + section "Checking Git Submodules" + + UNINITIALIZED=false + while read -r status path rest; do + if [[ "$status" == "-"* ]]; then + echo "Uninitialized: $path" + UNINITIALIZED=true + else + echo "Initialized : $path" + fi + done < <(git submodule status) + + if [[ "$UNINITIALIZED" == true ]]; then + echo + echo "ERROR: One or more Git submodules are not initialized." + echo "Please run:" + echo " git submodule update --init --recursive" + echo "before proceeding with the release preparation." + exit 1 + fi +fi + +section "Checking GIT_USER_NAME and GIT_USER_EMAIL values" + +if $STAGE; then + # Validate Git environment before performing tag operation + GIT_USER_NAME=$(git config --get user.name || true) + GIT_USER_EMAIL=$(git config --get user.email || true) + + echo "Git User Info:" + printf " %-14s: %s\n" "user.name" "${GIT_USER_NAME:-}" + printf " %-14s: %s\n" "user.email" "${GIT_USER_EMAIL:-}" + + if [[ -z "$GIT_USER_NAME" || -z "$GIT_USER_EMAIL" ]]; then + echo "ERROR: Git configuration is incomplete." + echo + echo " Detected:" + echo " user.name = ${GIT_USER_NAME:-}" + echo " user.email = ${GIT_USER_EMAIL:-}" + echo + echo " Git requires both to be set in order to create annotated tags for releases." + echo " You may configure them globally using:" + echo " git config --global user.name \"Your Name\"" + echo " git config --global user.email \"your@apache.org\"" + echo + echo " Alternatively, set them just for this repo using the same commands without --global." + exit 1 + fi + +section "Staging release: $TAG" + + if [[ "$FORCE_TAG_REUSE" == false ]]; then + confirm "You are about to create tag '$TAG'. Continue?" + git tag -a "$TAG" -m "Apache Cloudberry (Incubating) ${TAG} Release Candidate" + else + echo "INFO: Reusing existing tag '$TAG'; skipping tag creation." + fi + + echo "Creating BUILD_NUMBER file with value of 1" + echo "1" > BUILD_NUMBER + + echo -e "\nTag Summary" + TAG_OBJECT=$(git rev-parse "$TAG") + TAG_COMMIT=$(git rev-list -n 1 "$TAG") + echo "$TAG (tag object): $TAG_OBJECT" + echo " Points to commit: $TAG_COMMIT" + git log -1 --format="%C(auto)%h %d" "$TAG" + + section "Creating Source Tarball" + + TAR_NAME="apache-cloudberry-${TAG}-src.tar.gz" + TMP_DIR=$(mktemp -d) + trap 'rm -rf "$TMP_DIR"' EXIT + + git archive --format=tar --prefix="apache-cloudberry-${TAG}/" "$TAG" | tar -x -C "$TMP_DIR" + cp BUILD_NUMBER "$TMP_DIR/apache-cloudberry-${TAG}/" + + # Archive submodules if any + if [ -s .gitmodules ]; then + git submodule foreach --recursive --quiet " + echo \"Archiving submodule: \$sm_path\" + fullpath=\"\$toplevel/\$sm_path\" + destpath=\"$TMP_DIR/apache-cloudberry-$TAG/\$sm_path\" + mkdir -p \"\$destpath\" + git -C \"\$fullpath\" archive --format=tar --prefix=\"\$sm_path/\" HEAD | tar -x -C \"$TMP_DIR/apache-cloudberry-$TAG\" + " + fi + + tar -czf "$TAR_NAME" -C "$TMP_DIR" "apache-cloudberry-${TAG}" + rm -rf "$TMP_DIR" + echo -e "Archive saved to: $TAR_NAME" + + # Generate SHA-512 checksum + section "Generating SHA-512 Checksum" + + echo -e "\nGenerating SHA-512 checksum" + shasum -a 512 "$TAR_NAME" > "${TAR_NAME}.sha512" + echo "Checksum saved to: ${TAR_NAME}.sha512" + + section "Signing with GPG key: $GPG_USER" + # Conditionally generate GPG signature + if [[ "$SKIP_SIGNING" != true ]]; then + echo -e "\nSigning tarball with GPG key: $GPG_USER" + gpg --armor --detach-sign --local-user "$GPG_USER" "$TAR_NAME" + echo "GPG signature saved to: ${TAR_NAME}.asc" + else + echo "INFO: Skipping tarball signing as requested (--skip-signing)" + fi + + # Move artifacts to top-level artifacts directory + + ARTIFACTS_DIR="$(cd "$(dirname "$REPO_ARG")" && cd .. && pwd)/artifacts" + mkdir -p "$ARTIFACTS_DIR" + + section "Moving Artifacts to $ARTIFACTS_DIR" + + echo -e "\nMoving release artifacts to: $ARTIFACTS_DIR" + mv -vf "$TAR_NAME" "$ARTIFACTS_DIR/" + mv -vf "${TAR_NAME}.sha512" "$ARTIFACTS_DIR/" + [[ -f "${TAR_NAME}.asc" ]] && mv -vf "${TAR_NAME}.asc" "$ARTIFACTS_DIR/" + + section "Verifying sha512 ($ARTIFACTS_DIR/${TAR_NAME}.sha512) Release Artifact" + cd "$ARTIFACTS_DIR" + sha512sum -c "$ARTIFACTS_DIR/${TAR_NAME}.sha512" + + section "Verifying GPG Signature ($ARTIFACTS_DIR/${TAR_NAME}.asc) Release Artifact" + + if [[ "$SKIP_SIGNING" != true ]]; then + gpg --verify "${TAR_NAME}.asc" "$TAR_NAME" + else + echo "INFO: Signature verification skipped (--skip-signing). Signature is only available when generated via this script." + fi + + section "Release candidate for $TAG staged successfully" +fi diff --git a/devops/tools/elf_rockylinux_dependency_analyzer.py b/devops/tools/elf_rockylinux_dependency_analyzer.py new file mode 100755 index 00000000000..593dd169aa6 --- /dev/null +++ b/devops/tools/elf_rockylinux_dependency_analyzer.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +ELF Dependency Analyzer + +This script analyzes ELF (Executable and Linkable Format) binaries to determine their runtime +package dependencies. It can process individual files or recursively analyze directories. + +The script provides information about: +- Required packages and their versions +- Missing libraries +- Custom or non-RPM libraries +- Other special cases + +It also groups packages by their high-level dependencies, which can be cached for performance. + +Usage: + python3 elf_dependency_analyzer.py [--rebuild-cache] [ ...] + +The script will automatically determine if each argument is a file or directory and process accordingly. +Use --rebuild-cache to force rebuilding of the high-level packages cache. + +Requirements: +- Python 3.6+ +- prettytable (pip install prettytable) +- python-dateutil (pip install python-dateutil) +- ldd (usually pre-installed on Linux systems) +- file (usually pre-installed on Linux systems) +- rpm (usually pre-installed on RPM-based Linux distributions) +- repoquery (part of yum-utils package) + +Functions: +- check_requirements(): Checks if all required commands are available. +- run_command(command): Executes a shell command and returns its output. +- parse_ldd_line(line): Parses a line of ldd output to extract the library name. +- find_library_in_ld_library_path(lib_name): Searches for a library in LD_LIBRARY_PATH. +- get_package_info(lib_path): Gets package information for a given library. +- get_package_dependencies(package): Gets dependencies of a package using repoquery. +- build_high_level_packages(grand_summary): Builds a mapping of high-level packages to their dependencies. +- load_or_build_high_level_packages(grand_summary, force_rebuild): Loads or builds the high-level packages cache. +- print_summary(packages, special_cases, missing_libraries, binary_path): Prints a summary for a single binary. +- process_binary(binary_path): Processes a single binary file. +- is_elf_binary(file_path): Checks if a file is an ELF binary. +- print_grand_summary(...): Prints a grand summary of all processed binaries. +- analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): Analyzes a file or directory. +- main(): Main function to handle command-line arguments and initiate the analysis. + +This script is designed to help system administrators and developers understand the dependencies +of ELF binaries in their systems, which can be useful for troubleshooting, optimizing, or +preparing deployment packages. +""" + +import os, subprocess, re, sys, json, shutil +from collections import defaultdict +from datetime import datetime, timedelta +import argparse +from prettytable import PrettyTable +from dateutil import parser + +CACHE_FILE = 'high_level_packages_cache.json' +CACHE_EXPIRY_DAYS = 7 + +def check_requirements(): + required_commands = ['ldd', 'file', 'rpm', 'repoquery'] + missing_commands = [cmd for cmd in required_commands if shutil.which(cmd) is None] + if missing_commands: + print("Error: The following required commands are missing:") + for cmd in missing_commands: + print(f" - {cmd}") + print("\nPlease install these commands and try again.") + if 'repoquery' in missing_commands: + print("Note: 'repoquery' is typically part of the 'yum-utils' package.") + sys.exit(1) + +def run_command(command): + try: + return subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError as e: + print(f"Error running command {' '.join(command)}: {e.output.decode('utf-8').strip()}") + return None + +def parse_ldd_line(line): + match = re.search(r'\s*(\S+) => (\S+) \((0x[0-9a-f]+)\)', line) + return match.group(1) if match else None + +def find_library_in_ld_library_path(lib_name): + ld_library_path = os.environ.get('LD_LIBRARY_PATH', '') + for directory in ld_library_path.split(':'): + potential_path = os.path.join(directory, lib_name) + if os.path.isfile(potential_path): + return potential_path + return None + +def get_package_info(lib_path): + if not os.path.isfile(lib_path): + lib_name = os.path.basename(lib_path) + lib_path = find_library_in_ld_library_path(lib_name) + if not lib_path: + return None + try: + full_package_name = run_command(['rpm', '-qf', lib_path]) + if full_package_name: + package_name = full_package_name.split('-')[0] + return package_name, full_package_name.strip() + except subprocess.CalledProcessError: + pass + return None + +def get_package_dependencies(package): + try: + output = subprocess.check_output(['repoquery', '--requires', '--resolve', package], + universal_newlines=True, stderr=subprocess.DEVNULL) + return set(output.strip().split('\n')) + except subprocess.CalledProcessError: + return set() + +def build_high_level_packages(grand_summary): + all_packages = set() + for packages in grand_summary.values(): + all_packages.update(package.split('-')[0] for package in packages) + high_level_packages = {} + for package in all_packages: + deps = get_package_dependencies(package) + if deps: + high_level_packages[package] = [dep.split('-')[0] for dep in deps] + return high_level_packages + +def load_or_build_high_level_packages(grand_summary, force_rebuild=False): + if not force_rebuild and os.path.exists(CACHE_FILE): + with open(CACHE_FILE, 'r') as f: + cache_data = json.load(f) + if datetime.now() - parser.parse(cache_data['timestamp']) < timedelta(days=CACHE_EXPIRY_DAYS): + return cache_data['packages'] + packages = build_high_level_packages(grand_summary) + with open(CACHE_FILE, 'w') as f: + json.dump({'timestamp': datetime.now().isoformat(), 'packages': packages}, f) + return packages + +def print_summary(packages, special_cases, missing_libraries, binary_path): + print("\nSummary of unique runtime packages required:") + table = PrettyTable(['Package Name', 'Full Package Name']) + table.align['Package Name'] = 'l' + table.align['Full Package Name'] = 'l' + unique_packages = sorted(set(packages)) + for package_name, full_package_name in unique_packages: + table.add_row([package_name, full_package_name]) + print(table) + if missing_libraries: + print("\nMISSING LIBRARIES:") + missing_table = PrettyTable(['Missing Library', 'Referenced By']) + missing_table.align['Missing Library'] = 'l' + missing_table.align['Referenced By'] = 'l' + for lib in missing_libraries: + missing_table.add_row([lib, binary_path]) + print(missing_table) + if special_cases: + print("\nSPECIAL CASES:") + special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category']) + special_table.align['Library/Case'] = 'l' + special_table.align['Referenced By'] = 'l' + special_table.align['Category'] = 'l' + for case in special_cases: + category = "Custom/Non-RPM" if "custom or non-RPM library" in case else "Other" + library = case.split(" is ")[0] if " is " in case else case + special_table.add_row([library, binary_path, category]) + print(special_table) + else: + print("\nSPECIAL CASES: None found") + +def process_binary(binary_path): + print(f"Binary: {binary_path}\n") + print("Libraries and their corresponding packages:") + packages, special_cases, missing_libraries = [], [], [] + known_special_cases = ['linux-vdso.so.1', 'ld-linux-x86-64.so.2'] + ldd_output = run_command(['ldd', binary_path]) + if ldd_output is None: + return packages, special_cases, missing_libraries + for line in ldd_output.splitlines(): + if any(special in line for special in known_special_cases): + continue + parts = line.split('=>') + lib_name = parts[0].strip() + if "not found" in line: + missing_libraries.append(lib_name) + print(f"MISSING: {line.strip()}") + else: + if len(parts) > 1: + lib_path = parts[1].split()[0] + if lib_path != "not": + package_info = get_package_info(lib_path) + if package_info: + print(f"{lib_path} => {package_info[1]}") + packages.append(package_info) + else: + if os.path.exists(lib_path): + special_case = f"{lib_path} is a custom or non-RPM library" + special_cases.append(special_case) + print(f"{lib_path} => Custom or non-RPM library") + else: + special_case = f"{lib_path} is not found and might be a special case" + special_cases.append(special_case) + print(f"{lib_path} => Not found, might be a special case") + else: + special_case = f"{line.strip()} is a special case or built-in library" + special_cases.append(special_case) + print(f"{line.strip()} => Special case or built-in library") + else: + special_case = f"{line.strip()} is a special case or built-in library" + special_cases.append(special_case) + print(f"{line.strip()} => Special case or built-in library") + if special_cases: + print(f"Special cases found for {binary_path}:") + for case in special_cases: + print(f" - {case}") + else: + print(f"No special cases found for {binary_path}") + print_summary(packages, special_cases, missing_libraries, binary_path) + print("-------------------------------------------") + return packages, special_cases, missing_libraries + +def is_elf_binary(file_path): + file_output = run_command(['file', file_path]) + return 'ELF' in file_output and ('executable' in file_output or 'shared object' in file_output) + +def print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries, HIGH_LEVEL_PACKAGES, PACKAGE_TO_HIGH_LEVEL): + if grand_summary or grand_special_cases or grand_missing_libraries: + print("\nGrand Summary of high-level runtime packages required across all binaries:") + high_level_summary = defaultdict(set) + for package_name, full_package_names in grand_summary.items(): + high_level_package = PACKAGE_TO_HIGH_LEVEL.get(package_name.split('-')[0], package_name.split('-')[0]) + high_level_summary[high_level_package].update(full_package_names) + table = PrettyTable(['High-Level Package', 'Included Packages']) + table.align['High-Level Package'] = 'l' + table.align['Included Packages'] = 'l' + for high_level_package, full_package_names in sorted(high_level_summary.items()): + included_packages = '\n'.join(sorted(full_package_names)) + table.add_row([high_level_package, included_packages]) + print(table) + if grand_missing_libraries: + print("\nGrand Summary of MISSING LIBRARIES across all binaries:") + missing_table = PrettyTable(['Missing Library', 'Referenced By']) + missing_table.align['Missing Library'] = 'l' + missing_table.align['Referenced By'] = 'l' + for lib, binaries in sorted(grand_missing_libraries.items()): + missing_table.add_row([lib, '\n'.join(sorted(binaries))]) + print(missing_table) + print("\nGrand Summary of special cases across all binaries:") + if grand_special_cases: + special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category']) + special_table.align['Library/Case'] = 'l' + special_table.align['Referenced By'] = 'l' + special_table.align['Category'] = 'l' + for case, binary in sorted(set(grand_special_cases)): + category = "Custom/Non-RPM" if "custom or non-RPM library" in case else "Other" + library = case.split(" is ")[0] if " is " in case else case + special_table.add_row([library, binary, category]) + print(special_table) + else: + print("No special cases found.") + +def analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): + if os.path.isfile(path): + packages, special_cases, missing_libraries = process_binary(path) + for package_name, full_package_name in packages: + grand_summary[package_name].add(full_package_name) + grand_special_cases.extend((case, path) for case in special_cases) + for lib in missing_libraries: + grand_missing_libraries[lib].add(path) + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for file in files: + file_path = os.path.join(root, file) + if is_elf_binary(file_path): + packages, special_cases, missing_libraries = process_binary(file_path) + for package_name, full_package_name in packages: + grand_summary[package_name].add(full_package_name) + grand_special_cases.extend((case, file_path) for case in special_cases) + for lib in missing_libraries: + grand_missing_libraries[lib].add(file_path) + else: + print(f"Error: {path} is neither a valid file nor a directory.") + if grand_special_cases: + print(f"Accumulated special cases after processing {path}:") + for case, binary in grand_special_cases: + print(f" - {case} (in {binary})") + else: + print(f"No special cases accumulated after processing {path}") + +def main(): + check_requirements() + parser = argparse.ArgumentParser(description="ELF Dependency Analyzer") + parser.add_argument('paths', nargs='+', help="Paths to files or directories to analyze") + parser.add_argument('--rebuild-cache', action='store_true', help="Force rebuild of the high-level packages cache") + args = parser.parse_args() + grand_summary = defaultdict(set) + grand_special_cases = [] + grand_missing_libraries = defaultdict(set) + for path in args.paths: + analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries) + HIGH_LEVEL_PACKAGES = load_or_build_high_level_packages(grand_summary, args.rebuild_cache) + PACKAGE_TO_HIGH_LEVEL = {low: high for high, lows in HIGH_LEVEL_PACKAGES.items() for low in lows} + print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries, HIGH_LEVEL_PACKAGES, PACKAGE_TO_HIGH_LEVEL) + +if __name__ == '__main__': + main() diff --git a/devops/tools/elf_ubuntu_dependency_analyzer.py b/devops/tools/elf_ubuntu_dependency_analyzer.py new file mode 100755 index 00000000000..a1741f7f888 --- /dev/null +++ b/devops/tools/elf_ubuntu_dependency_analyzer.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +ELF Dependency Analyzer for Ubuntu + +This script analyzes ELF (Executable and Linkable Format) binaries to determine their runtime +package dependencies on Ubuntu systems. It can process individual files or recursively analyze directories. + +The script provides information about: +- Required packages and their versions +- Custom or non-APT libraries +- Core system libraries +- Missing libraries +- Other special cases + +Usage: + python3 elf_dependency_analyzer.py [file_or_directory] [file_or_directory] ... + +Requirements: +- Python 3.6+ +- prettytable (pip install prettytable) +- ldd (usually pre-installed on Linux systems) +- file (usually pre-installed on Linux systems) +- dpkg (pre-installed on Ubuntu) +""" + +import os +import subprocess +import re +import sys +import argparse +from collections import defaultdict +from prettytable import PrettyTable + +def run_command(command): + """ + Execute a shell command and return its output. + + Args: + command (list): The command to execute as a list of strings. + + Returns: + str: The output of the command, or None if an error occurred. + """ + try: + return subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError as e: + print(f"Error running command {' '.join(command)}: {e.output.decode('utf-8').strip()}") + return None + +def get_package_info(lib_path): + """ + Get package information for a given library path. + + Args: + lib_path (str): The path to the library. + + Returns: + tuple: A tuple containing the package name and full package information. + """ + if lib_path.startswith('/usr/local/cloudberry-db'): + return "cloudberry-custom", f"Cloudberry custom library: {lib_path}" + + dpkg_output = run_command(['dpkg', '-S', lib_path]) + if dpkg_output: + package_name = dpkg_output.split(':')[0] + return package_name, dpkg_output.strip() + + # List of core system libraries that might not be individually tracked by dpkg + core_libs = { + 'libc.so': 'libc6', + 'libm.so': 'libc6', + 'libdl.so': 'libc6', + 'libpthread.so': 'libc6', + 'libresolv.so': 'libc6', + 'librt.so': 'libc6', + 'libgcc_s.so': 'libgcc-s1', + 'libstdc++.so': 'libstdc++6', + 'libz.so': 'zlib1g', + 'libbz2.so': 'libbz2-1.0', + 'libpam.so': 'libpam0g', + 'libaudit.so': 'libaudit1', + 'libcap-ng.so': 'libcap-ng0', + 'libkeyutils.so': 'libkeyutils1', + 'liblzma.so': 'liblzma5', + 'libcom_err.so': 'libcomerr2' + } + + lib_name = os.path.basename(lib_path) + for core_lib, package in core_libs.items(): + if lib_name.startswith(core_lib): + return package, f"Core system library: {lib_path}" + + # If not a recognized core library, return as system library + file_output = run_command(['file', lib_path]) + if file_output: + return "system-library", f"System library: {lib_path} - {file_output.strip()}" + + return None + +def print_summary(packages, special_cases, missing_libraries, binary_path): + """ + Print a summary of the dependencies for a binary. + + Args: + packages (list): List of package tuples (package_name, full_package_name). + special_cases (list): List of special case strings. + missing_libraries (list): List of missing library names. + binary_path (str): Path to the binary being analyzed. + """ + print("\nSummary of runtime dependencies:") + table = PrettyTable(['Category', 'Package/Library', 'Details']) + table.align['Category'] = 'l' + table.align['Package/Library'] = 'l' + table.align['Details'] = 'l' + + categories = { + 'cloudberry-custom': 'Cloudberry Custom', + 'system-library': 'System Library', + } + + for package_name, full_package_name in sorted(set(packages)): + category = categories.get(package_name, 'System Package') + table.add_row([category, package_name, full_package_name]) + + print(table) + + if missing_libraries: + print("\nMISSING LIBRARIES:") + for lib in missing_libraries: + print(f" - {lib}") + + if special_cases: + print("\nSPECIAL CASES:") + for case in special_cases: + print(f" - {case}") + +def process_binary(binary_path): + """ + Process a single binary file to determine its dependencies. + + Args: + binary_path (str): Path to the binary file. + + Returns: + tuple: A tuple containing lists of packages, special cases, and missing libraries. + """ + print(f"Binary: {binary_path}\n") + print("Libraries and their corresponding packages:") + packages, special_cases, missing_libraries = [], [], [] + + ldd_output = run_command(['ldd', binary_path]) + if ldd_output is None: + return packages, special_cases, missing_libraries + + for line in ldd_output.splitlines(): + if "=>" not in line: + continue + + parts = line.split('=>') + lib_name = parts[0].strip() + lib_path = parts[1].split()[0].strip() + lib_path = os.path.realpath(lib_path) + + if lib_path == "not": + missing_libraries.append(lib_name) + print(f"MISSING: {line.strip()}") + else: + package_info = get_package_info(lib_path) + if package_info: + print(f"{lib_path} => {package_info[1]}") + packages.append(package_info) + else: + special_case = f"{lib_path} is not found and might be a special case" + special_cases.append(special_case) + print(f"{lib_path} => Not found, might be a special case") + + print_summary(packages, special_cases, missing_libraries, binary_path) + print("-------------------------------------------") + return packages, special_cases, missing_libraries + +def is_elf_binary(file_path): + """ + Check if a file is an ELF binary. + + Args: + file_path (str): Path to the file. + + Returns: + bool: True if the file is an ELF binary, False otherwise. + """ + file_output = run_command(['file', file_path]) + return 'ELF' in file_output and ('executable' in file_output or 'shared object' in file_output) + +def print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries): + """ + Print a grand summary of all analyzed binaries. + + Args: + grand_summary (dict): Dictionary of all packages and their details. + grand_special_cases (list): List of all special cases. + grand_missing_libraries (dict): Dictionary of all missing libraries. + """ + if grand_summary or grand_special_cases or grand_missing_libraries: + print("\nGrand Summary of runtime packages required across all binaries:") + table = PrettyTable(['Package', 'Included Packages']) + table.align['Package'] = 'l' + table.align['Included Packages'] = 'l' + for package_name, full_package_names in sorted(grand_summary.items()): + included_packages = '\n'.join(sorted(full_package_names)) + table.add_row([package_name, included_packages]) + print(table) + + if grand_missing_libraries: + print("\nGrand Summary of MISSING LIBRARIES across all binaries:") + missing_table = PrettyTable(['Missing Library', 'Referenced By']) + missing_table.align['Missing Library'] = 'l' + missing_table.align['Referenced By'] = 'l' + for lib, binaries in sorted(grand_missing_libraries.items()): + missing_table.add_row([lib, '\n'.join(sorted(binaries))]) + print(missing_table) + + print("\nGrand Summary of special cases across all binaries:") + if grand_special_cases: + special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category']) + special_table.align['Library/Case'] = 'l' + special_table.align['Referenced By'] = 'l' + special_table.align['Category'] = 'l' + for case, binary in sorted(set(grand_special_cases)): + category = "System Library" if "system library" in case else "Other" + library = case.split(" is ")[0] if " is " in case else case + special_table.add_row([library, binary, category]) + print(special_table) + else: + print("No special cases found.") + +def analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): + """ + Analyze a file or directory for ELF binaries and their dependencies. + + Args: + path (str): Path to the file or directory to analyze. + grand_summary (dict): Dictionary to store all package information. + grand_special_cases (list): List to store all special cases. + grand_missing_libraries (dict): Dictionary to store all missing libraries. + """ + if os.path.isfile(path): + if is_elf_binary(path): + packages, special_cases, missing_libraries = process_binary(path) + for package_name, full_package_name in packages: + grand_summary[package_name].add(full_package_name) + grand_special_cases.extend((case, path) for case in special_cases) + for lib in missing_libraries: + grand_missing_libraries[lib].add(path) + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for file in files: + file_path = os.path.join(root, file) + if is_elf_binary(file_path): + packages, special_cases, missing_libraries = process_binary(file_path) + for package_name, full_package_name in packages: + grand_summary[package_name].add(full_package_name) + grand_special_cases.extend((case, file_path) for case in special_cases) + for lib in missing_libraries: + grand_missing_libraries[lib].add(file_path) + else: + print(f"Error: {path} is neither a valid file nor a directory.") + +def main(): + """ + Main function to handle command-line arguments and initiate the analysis. + """ + parser = argparse.ArgumentParser(description="ELF Dependency Analyzer for Ubuntu") + parser.add_argument('paths', nargs='+', help="Paths to files or directories to analyze") + args = parser.parse_args() + + grand_summary = defaultdict(set) + grand_special_cases = [] + grand_missing_libraries = defaultdict(set) + + for path in args.paths: + analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries) + + print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries) + +if __name__ == '__main__': + main() diff --git a/devops/tools/s3-repo-sync-and-sign.sh b/devops/tools/s3-repo-sync-and-sign.sh new file mode 100755 index 00000000000..1cd037749c6 --- /dev/null +++ b/devops/tools/s3-repo-sync-and-sign.sh @@ -0,0 +1,266 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +# Description: +# This script automates several tasks related to managing RPM repositories in AWS S3. +# It handles the following operations: +# 1. Syncing an RPM repository from an S3 bucket to a local directory. +# 2. Signing all RPMs in the local repository with a specified GPG key. +# 3. Updating and signing the repository metadata. +# 4. Exporting the GPG public key and placing it in the repository for client use. +# 5. Optionally, uploading changes back to the S3 bucket and deleting files in S3 that no longer exist locally. +# 6. Decrypting and importing a GPG private key used for signing. +# 7. A mode to only decrypt and import the GPG private key. +# 8. Identifying and copying a newly built RPM to the appropriate repository. + +# Function to display detailed usage information +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +This script automates several tasks related to managing RPM repositories in AWS S3. +It can be used to sync repositories from S3, sign RPMs with a GPG key, update and sign repository metadata, +and optionally upload changes back to S3. + +Options: + -c Configure AWS credentials using 'aws configure'. + -s Specify the S3 bucket and path to sync (required for S3 operations). + -d Specify the local directory to sync to (default: ~/repo). + -k Specify the encrypted GPG private key file to import (optional). + -g Specify the GPG key ID or email to use for signing (required for signing operations). + --upload-with-delete Sync local changes to S3, deleting files in S3 that no longer exist locally. + --s3-sync-only Perform only the S3 sync to the local directory, inform the user, and exit. + --import-gpg-key-only Decrypt and import the GPG private key, then exit. No other operations will be performed. + --copy-new-rpm Copy the newly built RPM(s) to the appropriate repository directory based on architecture and version. + -h, --help Display this help message and exit. + +Examples: + # Sync an S3 repository to a local directory and sign RPMs with a GPG key + $0 -s s3://mybucket/repo -g mygpgkey@example.com + + # Sync an S3 repository only, without signing RPMs or performing other operations + $0 -s s3://mybucket/repo --s3-sync-only + + # Decrypt and import a GPG private key, then exit + $0 -k ~/path/to/encrypted-gpg-key.asc --import-gpg-key-only + + # Copy newly built RPMs to the appropriate repository and sign them + $0 --copy-new-rpm -g mygpgkey@example.com + +Notes: + - The -s option is required for any operation that interacts with S3, such as syncing or uploading with delete. + - The -g option is required for any operation that involves signing RPMs or repository metadata. + - When using --upload-with-delete, ensure that you have the necessary permissions to delete objects in the specified S3 bucket. + - If you only want to perform local operations (e.g., copying RPMs, signing), you do not need to specify the -s option. + +EOF +} + +# Parse options and arguments +GPG_KEY_ID="" +UPLOAD_WITH_DELETE=false +S3_SYNC_ONLY=false +IMPORT_GPG_KEY_ONLY=false +COPY_NEW_RPM=false +CONFIGURE_AWS=false +LOCAL_DIR=~/repo + +# Function to check if required commands are available +check_commands() { + local cmds=("aws" "gpg" "shred" "createrepo" "rpm" "find") + for cmd in "${cmds[@]}"; do + if ! command -v "$cmd" &> /dev/null; then + echo "Error: Required command '$cmd' not found. Please install it before running the script." + exit 1 + fi + done +} + +# Parse options +while [[ "$#" -gt 0 ]]; do + case $1 in + -c) CONFIGURE_AWS=true; shift ;; + -s) S3_BUCKET="$2"; shift 2 ;; + -d) LOCAL_DIR="$2"; shift 2 ;; + -k) ENCRYPTED_KEY_FILE="$2"; shift 2 ;; + -g) GPG_KEY_ID="$2"; shift 2 ;; + --upload-with-delete) UPLOAD_WITH_DELETE=true; shift ;; + --s3-sync-only) S3_SYNC_ONLY=true; shift ;; + --import-gpg-key-only) IMPORT_GPG_KEY_ONLY=true; shift ;; + --copy-new-rpm) COPY_NEW_RPM=true; shift ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1"; usage; exit 1 ;; + esac +done + +check_commands + +# AWS credentials configuration (optional) +if [ "$CONFIGURE_AWS" = true ]; then + echo "Configuring AWS credentials..." + aws configure +fi + +# Decrypt and import GPG private key if in import-only mode or not in sync-only mode +if [ -n "${ENCRYPTED_KEY_FILE:-}" ]; then + DECRYPTED_KEY_FILE="${ENCRYPTED_KEY_FILE%.*}" + echo "Decrypting GPG private key..." + gpg --decrypt --output "$DECRYPTED_KEY_FILE" "$ENCRYPTED_KEY_FILE" + + # Check if the key is already imported + if gpg --list-keys | grep -q "$GPG_KEY_ID"; then + echo "GPG key already imported." + else + gpg --import "$DECRYPTED_KEY_FILE" + fi + + # Securely delete the decrypted key file + shred -u "$DECRYPTED_KEY_FILE" + + # Exit if only importing GPG key + if [ "$IMPORT_GPG_KEY_ONLY" = true ]; then + echo "GPG key has been decrypted and imported successfully. Exiting." + exit 0 + fi +fi + +# Check access to the S3 bucket and perform sync only if needed +if [ "$IMPORT_GPG_KEY_ONLY" = false ] && [ "$S3_SYNC_ONLY" = false ] && [ "$COPY_NEW_RPM" = false ] && [ "$UPLOAD_WITH_DELETE" = false ]; then + if [ -z "${S3_BUCKET:-}" ]; then + echo "Error: S3 bucket (-s) is required." + exit 1 + fi + + echo "Checking access to S3 bucket $S3_BUCKET..." + if ! aws s3 ls "$S3_BUCKET" &> /dev/null; then + echo "Error: Unable to access S3 bucket $S3_BUCKET. Please check your AWS credentials and permissions." + exit 1 + fi + + # Sync the S3 repository to the local directory + mkdir -p "$LOCAL_DIR" + echo "Syncing S3 repository from $S3_BUCKET to $LOCAL_DIR..." + aws s3 sync "$S3_BUCKET" "$LOCAL_DIR" + + # Check if the operation is `s3-sync-only` + if [ "$S3_SYNC_ONLY" = true ]; then + echo "S3 sync operation completed successfully." + exit 0 + fi +fi + +# Copy the newly built RPM to the appropriate repository +if [ "$COPY_NEW_RPM" = true ]; then + echo "Identifying the newly built RPMs..." + + for ARCH in x86_64 noarch; do + RPM_DIR=~/rpmbuild/RPMS/$ARCH + + # Check if the RPM directory exists + if [ ! -d "$RPM_DIR" ]; then + echo "Warning: Directory $RPM_DIR does not exist. Skipping $ARCH." + continue + fi + + # Find all matching RPMs and copy them to the appropriate repository directory + NEW_RPMS=$(find "$RPM_DIR" -name "cloudberry-*.rpm" ! -name "*debuginfo*.rpm") + if [ -n "$NEW_RPMS" ]; then + for NEW_RPM in $NEW_RPMS; do + # Determine the repository (el8 or el9) based on the RPM filename + if echo "$NEW_RPM" | grep -q "\.el8\."; then + TARGET_REPO="$LOCAL_DIR/el8/$ARCH" + elif echo "$NEW_RPM" | grep -q "\.el9\."; then + TARGET_REPO="$LOCAL_DIR/el9/$ARCH" + else + echo "Error: Unable to determine the correct repository for $NEW_RPM. Exiting." + exit 1 + fi + + # Ensure the target repository directory exists + mkdir -p "$TARGET_REPO" + + # Copy the RPM to the target repository + echo "Copying $NEW_RPM to $TARGET_REPO..." + cp "$NEW_RPM" "$TARGET_REPO/" + echo "Copy operation completed." + done + else + echo "No matching RPMs found in $RPM_DIR." + fi + done +fi + +# Define the directories for `el8` and `el9` repositories +REPO_DIRS=("$LOCAL_DIR/el8/x86_64" "$LOCAL_DIR/el8/noarch" "$LOCAL_DIR/el9/x86_64" "$LOCAL_DIR/el9/noarch") + +# Traverse each repository directory (el8 and el9) and sign RPMs +for REPO_DIR in "${REPO_DIRS[@]}"; do + if [ -d "$REPO_DIR" ]; then + echo "Processing repository at $REPO_DIR..." + + # Export GPG public key for clients and place it in the root of the repository + TEMP_GPG_KEY=$(mktemp) + echo "Exporting GPG public key to temporary location..." + gpg --armor --export "$GPG_KEY_ID" > "$TEMP_GPG_KEY" + + # Import the GPG public key to RPM database + echo "Importing GPG public key into RPM database..." + sudo rpm --import "$TEMP_GPG_KEY" + + # Sign each RPM in the directory + echo "Signing RPM packages in $REPO_DIR..." + find "$REPO_DIR" -name "*.rpm" -exec rpm --addsign --define "_gpg_name $GPG_KEY_ID" {} \; + + # Verify that RPMs were signed successfully + echo "Verifying RPM signatures in $REPO_DIR..." + find "$REPO_DIR" -name "*.rpm" -exec rpm -Kv {} \; + + # Recreate the repository metadata + echo "Updating repository metadata in $REPO_DIR..." + createrepo --update "$REPO_DIR" + + # Sign the repository metadata, automatically overwriting if the file already exists + echo "Signing repository metadata in $REPO_DIR..." + gpg --batch --yes --detach-sign --armor --local-user "$GPG_KEY_ID" "$REPO_DIR/repodata/repomd.xml" + + # Copy the public key to each repo + cp "$TEMP_GPG_KEY" "$REPO_DIR/RPM-GPG-KEY-cloudberry" + + # Clean up temporary GPG key + rm -f "$TEMP_GPG_KEY" + else + echo "Warning: Repository directory $REPO_DIR does not exist. Skipping..." + fi +done + +# Upload changes to S3 with --delete option if requested +if [ "$UPLOAD_WITH_DELETE" = true ]; then + if [ -z "${S3_BUCKET:-}" ]; then + echo "Error: S3 bucket (-s) is required for upload with delete." + exit 1 + fi + + echo "Uploading local changes to S3 with --delete option..." + aws s3 sync "$LOCAL_DIR" "$S3_BUCKET" --delete + echo "S3 sync with --delete completed." +fi + +# Print completion message +echo "S3 repository sync, RPM signing, metadata signing, and public key export completed successfully." diff --git a/getversion b/getversion index 76f6984ada5..b4c29112651 100755 --- a/getversion +++ b/getversion @@ -94,7 +94,7 @@ generate_dev_version() { } # Check if we're in a Git repo and git is available -if type git >/dev/null 2>&1 && [ -d '.git' ]; then +if type git >/dev/null 2>&1 && [ -e '.git' ]; then # Ensure git describe doesn't fail due to shallow clone if git describe --tags --long >/dev/null 2>&1; then if git describe --exact-match >/dev/null 2>&1; then diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile index 3c4b6d2b031..c5eb6ccba9c 100644 --- a/gpMgmt/bin/Makefile +++ b/gpMgmt/bin/Makefile @@ -90,12 +90,27 @@ PYYAML_VERSION=5.4.1 download-python-deps: @echo "--- Downloading Python dependencies for gpMgmt modules" @mkdir -p $(PYLIB_SRC_EXT) - # Download psutil using curl - curl -sSL https://files.pythonhosted.org/packages/source/p/psutil/psutil-$(PSUTIL_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/psutil-$(PSUTIL_VERSION).tar.gz - # Download PyYAML using curl - curl -sSL https://files.pythonhosted.org/packages/source/P/PyYAML/PyYAML-$(PYYAML_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/PyYAML-$(PYYAML_VERSION).tar.gz - # Download PyGreSQL using curl - curl -sSL https://files.pythonhosted.org/packages/source/P/PyGreSQL/PyGreSQL-$(PYGRESQL_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/PyGreSQL-$(PYGRESQL_VERSION).tar.gz + # Download psutil using curl (only if not exists) + @if [ ! -f $(PYLIB_SRC_EXT)/psutil-$(PSUTIL_VERSION).tar.gz ]; then \ + echo "Downloading psutil-$(PSUTIL_VERSION).tar.gz..."; \ + curl -sSL https://files.pythonhosted.org/packages/source/p/psutil/psutil-$(PSUTIL_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/psutil-$(PSUTIL_VERSION).tar.gz; \ + else \ + echo "psutil-$(PSUTIL_VERSION).tar.gz already exists, skipping download"; \ + fi + # Download PyYAML using curl (only if not exists) + @if [ ! -f $(PYLIB_SRC_EXT)/PyYAML-$(PYYAML_VERSION).tar.gz ]; then \ + echo "Downloading PyYAML-$(PYYAML_VERSION).tar.gz..."; \ + curl -sSL https://files.pythonhosted.org/packages/source/P/PyYAML/PyYAML-$(PYYAML_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/PyYAML-$(PYYAML_VERSION).tar.gz; \ + else \ + echo "PyYAML-$(PYYAML_VERSION).tar.gz already exists, skipping download"; \ + fi + # Download PyGreSQL using curl (only if not exists) + @if [ ! -f $(PYLIB_SRC_EXT)/PyGreSQL-$(PYGRESQL_VERSION).tar.gz ]; then \ + echo "Downloading PyGreSQL-$(PYGRESQL_VERSION).tar.gz..."; \ + curl -sSL https://files.pythonhosted.org/packages/source/P/PyGreSQL/PyGreSQL-$(PYGRESQL_VERSION).tar.gz -o $(PYLIB_SRC_EXT)/PyGreSQL-$(PYGRESQL_VERSION).tar.gz; \ + else \ + echo "PyGreSQL-$(PYGRESQL_VERSION).tar.gz already exists, skipping download"; \ + fi # Install wheel and cython for PyYAML building pip3 install --user wheel "cython<3.0.0" @@ -150,7 +165,12 @@ $(MOCK_BIN): pip3 install mock;\ else\ mkdir -p $(PYLIB_SRC_EXT) && \ - curl -sSL https://files.pythonhosted.org/packages/source/m/mock/mock-$(MOCK_VERSION).zip -o $(PYLIB_SRC_EXT)/mock-$(MOCK_VERSION).zip && \ + if [ ! -f $(PYLIB_SRC_EXT)/mock-$(MOCK_VERSION).zip ]; then \ + echo "Downloading mock-$(MOCK_VERSION).zip..."; \ + curl -sSL https://files.pythonhosted.org/packages/source/m/mock/mock-$(MOCK_VERSION).zip -o $(PYLIB_SRC_EXT)/mock-$(MOCK_VERSION).zip; \ + else \ + echo "mock-$(MOCK_VERSION).zip already exists, skipping download"; \ + fi && \ mkdir -p $(PYTHONSRC_INSTALL_SITE) && \ cd $(PYLIB_SRC_EXT)/ && unzip -q $(MOCK_DIR).zip && \ cd $(PYLIB_SRC_EXT)/$(MOCK_DIR)/ && \ diff --git a/gpMgmt/bin/gpcheckresgroupv2impl b/gpMgmt/bin/gpcheckresgroupv2impl index 4e15d048eb1..b71d2562628 100755 --- a/gpMgmt/bin/gpcheckresgroupv2impl +++ b/gpMgmt/bin/gpcheckresgroupv2impl @@ -2,8 +2,17 @@ # Copyright (c) 2017, VMware, Inc. or its affiliates. import os +import sys from functools import reduce +# Add the gppylib path to sys.path to import database connection modules +try: + from gppylib.db import dbconn + from pg import DatabaseError +except ImportError as err: + sys.exit('Cannot import modules. Please check that you have sourced ' + 'cloudberry-env.sh. Detail: ' + str(err)) + class ValidationException(Exception): def __init__(self, message): @@ -29,6 +38,7 @@ class CgroupValidationVersionTwo(CgroupValidation): def __init__(self): self.mount_point = self.detect_cgroup_mount_point() self.tab = {"r": os.R_OK, "w": os.W_OK, "x": os.X_OK, "f": os.F_OK} + self.cgroup_parent = self.get_cgroup_parent() def validate_all(self): """ @@ -43,23 +53,46 @@ class CgroupValidationVersionTwo(CgroupValidation): self.validate_permission("cgroup.procs", "rw") - self.validate_permission("gpdb/", "rwx") - self.validate_permission("gpdb/cgroup.procs", "rw") + self.validate_permission(self.cgroup_parent + "/", "rwx") + self.validate_permission(self.cgroup_parent + "/cgroup.procs", "rw") + + self.validate_permission(self.cgroup_parent + "/cpu.max", "rw") + self.validate_permission(self.cgroup_parent + "/cpu.weight", "rw") + self.validate_permission(self.cgroup_parent + "/cpu.weight.nice", "rw") + self.validate_permission(self.cgroup_parent + "/cpu.stat", "r") - self.validate_permission("gpdb/cpu.max", "rw") - self.validate_permission("gpdb/cpu.weight", "rw") - self.validate_permission("gpdb/cpu.weight.nice", "rw") - self.validate_permission("gpdb/cpu.stat", "r") + self.validate_permission(self.cgroup_parent + "/cpuset.cpus", "rw") + self.validate_permission(self.cgroup_parent + "/cpuset.cpus.partition", "rw") + self.validate_permission(self.cgroup_parent + "/cpuset.mems", "rw") + self.validate_permission(self.cgroup_parent + "/cpuset.cpus.effective", "r") + self.validate_permission(self.cgroup_parent + "/cpuset.mems.effective", "r") - self.validate_permission("gpdb/cpuset.cpus", "rw") - self.validate_permission("gpdb/cpuset.cpus.partition", "rw") - self.validate_permission("gpdb/cpuset.mems", "rw") - self.validate_permission("gpdb/cpuset.cpus.effective", "r") - self.validate_permission("gpdb/cpuset.mems.effective", "r") + self.validate_permission(self.cgroup_parent + "/memory.current", "r") - self.validate_permission("gpdb/memory.current", "r") + self.validate_permission(self.cgroup_parent + "/io.max", "rw") - self.validate_permission("gpdb/io.max", "rw") + def get_cgroup_parent(self): + """ + Get the cgroup parent directory from the database GUC parameter + gp_resource_group_cgroup_parent. If unable to connect to database + or retrieve the parameter, report error using die function. + """ + try: + dburl = dbconn.DbURL() + + with dbconn.connect(dburl, utility=True) as conn: + # Query the GUC parameter value + sql = "SHOW gp_resource_group_cgroup_parent" + cursor = dbconn.query(conn, sql) + result = cursor.fetchone() + + if result and result[0]: + return result[0] + else: + self.die("failed to retrieve gp_resource_group_cgroup_parent parameter from database") + + except Exception as e: + self.die("failed to retrieve gp_resource_group_cgroup_parent parameter: {}".format(str(e))) def die(self, msg): raise ValidationException("cgroup is not properly configured: {}".format(msg)) diff --git a/gpMgmt/bin/gpconfig b/gpMgmt/bin/gpconfig index 3d5495bd01b..f4e3ce7c62d 100755 --- a/gpMgmt/bin/gpconfig +++ b/gpMgmt/bin/gpconfig @@ -15,6 +15,7 @@ import os import sys import re +import psutil try: from gppylib.gpparseopts import OptParser, OptChecker @@ -170,6 +171,20 @@ class Guc: return msg elif newval != "'queue'": return "the value of gp_resource_manager must be 'group' or 'group-v2' or 'queue'" + elif self.name == "gp_resource_group_cgroup_parent": + base = newval.strip("'") + if not re.match("^[0-9a-zA-Z][-._0-9a-zA-Z]*$", base): + return "resource group cgroup parent can only contains alphabet, number, and non-leading . _ -" + + path = None + for partition in psutil.disk_partitions(all=True): + if partition.fstype == "cgroup2": + path = partition.mountpoint + "/" + base + break + if path is None: + return "cannot find cgroup v2 mountpoint" + if not os.path.isdir(path): + return "'%s' doesn't exists or is not a directory" % path elif self.name == 'unix_socket_permissions': if newval[0] != '0': diff --git a/pom.xml b/pom.xml index 97ebb23bb70..745565143d3 100644 --- a/pom.xml +++ b/pom.xml @@ -1241,6 +1241,11 @@ code or new licensing patterns. aclocal.m4 python-dependencies.txt + + + devops/deploy/docker/build/rocky8/tests/requirements.txt + devops/deploy/docker/build/rocky9/tests/requirements.txt