diff --git a/CHANGELOG.md b/CHANGELOG.md index e92bde7b..90e5e685 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ These are changes that will probably be included in the next release. * Include pgvectorscale * Include pgai +* Include pg_lake ## [v1.7.1] - 2023-04-27 diff --git a/Dockerfile b/Dockerfile index 9234b26f..e270e364 100644 --- a/Dockerfile +++ b/Dockerfile @@ -150,7 +150,7 @@ RUN python3 -m pip install uv # We install some build dependencies and mark the installed packages as auto-installed, # this will cause the cleanup to get rid of all of these packages -ENV BUILD_PACKAGES="binutils cmake devscripts equivs gcc git gpg gpg-agent libc-dev libc6-dev libkrb5-dev libperl-dev libssl-dev lsb-release make patchutils python2-dev python3-dev wget libsodium-dev" +ENV BUILD_PACKAGES="binutils cmake devscripts equivs gcc git gpg gpg-agent libc-dev libc6-dev libkrb5-dev libperl-dev libssl-dev lsb-release make patchutils python2-dev python3-dev wget libsodium-dev ninja-build libgeos-dev libproj-dev libgdal-dev openjdk-21-jdk libjansson-dev" RUN apt-get install -y ${BUILD_PACKAGES} RUN apt-mark auto ${BUILD_PACKAGES} @@ -207,6 +207,9 @@ RUN apt-get install -y pgxnclient ## Add pgsodium extension depedencies RUN apt-get install -y libsodium23 +## Add pg_lake runtime dependencies +RUN apt-get install -y libjansson4 + RUN for pg in ${PG_VERSIONS}; do \ for pkg in pg_uuidv7 pgsodium; do \ PATH="/usr/lib/postgresql/${pg}/bin:$PATH" pgxnclient install --pg_config "/usr/lib/postgresql/${pg}/bin/pg_config" "$pkg"; \ @@ -299,6 +302,8 @@ RUN for pg in ${PG_VERSIONS}; do \ RUN for file in $(find /usr/share/postgresql -name 'postgresql.conf.sample'); do \ # We want timescaledb to be loaded in this image by every created cluster sed -r -i "s/[#]*\s*(shared_preload_libraries)\s*=\s*'(.*)'/\1 = 'timescaledb,\2'/;s/,'/'/" $file \ + # Add pg_extension_base for pg_lake support + && sed -r -i "s/(shared_preload_libraries\s*=\s*'[^']*)/\1,pg_extension_base/" $file \ # We need to listen on all interfaces, otherwise PostgreSQL is not accessible && echo "listen_addresses = '*'" >> $file; \ done @@ -310,6 +315,15 @@ RUN mkdir -p /usr/lib/debug; \ chgrp -R postgres /usr/lib/debug; \ chmod -R g+w /usr/lib/debug +# required for pg_lake to install Avro libraries +RUN set -ex; \ + for libdir in /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu; do \ + if [ -d "$libdir" ]; then \ + chgrp -R postgres "$libdir"; \ + chmod -R g+w "$libdir"; \ + fi; \ + done + ## Prepare pgai, needs a separate directory RUN install -o postgres -g postgres -m 0750 -d /usr/local/lib/pgai @@ -402,6 +416,11 @@ RUN OSS_ONLY="${OSS_ONLY}" \ PGVECTORSCALE_VERSIONS="${PGVECTORSCALE_VERSIONS}" \ /build/scripts/install_extensions pgvectorscale +ARG PG_LAKE_VERSIONS +RUN OSS_ONLY="${OSS_ONLY}" \ + PG_LAKE_VERSIONS="${PG_LAKE_VERSIONS}" \ + /build/scripts/install_extensions pg_lake + USER root # All the tools that were built in the previous steps have their ownership set to postgres @@ -492,6 +511,7 @@ RUN /build/scripts/install_extensions versions > /.image_config; \ echo "PGBACKREST_EXPORTER_VERSION=\"${PGBACKREST_EXPORTER_VERSION}\"" >> /.image_config; \ echo "PGAI_VERSION=\"${PGAI_VERSION}\"" >> /.image_config; \ echo "PGVECTORSCALE_VERSIONS=\"${PGVECTORSCALE_VERSIONS}\"" >> /.image_config; \ + echo "PG_LAKE_VERSIONS=\"${PG_LAKE_VERSIONS}\"" >> /.image_config; \ echo "PG_MAJOR=\"${PG_MAJOR}\"" >> /.image_config; \ echo "PG_VERSIONS=\"${PG_VERSIONS}\"" >> /.image_config; \ echo "FROM=\"${DOCKER_FROM}\"" >> /.image_config; \ @@ -515,7 +535,7 @@ FROM builder AS trimmed USER root -ENV BUILD_PACKAGES="binutils cmake devscripts equivs gcc git gpg gpg-agent libc-dev libc6-dev libkrb5-dev libperl-dev libssl-dev lsb-release make patchutils python2-dev python3-dev wget libsodium-dev" +ENV BUILD_PACKAGES="binutils cmake devscripts equivs gcc git gpg gpg-agent libc-dev libc6-dev libkrb5-dev libperl-dev libssl-dev lsb-release make patchutils python2-dev python3-dev wget libsodium-dev ninja-build libgeos-dev libproj-dev libgdal-dev openjdk-21-jdk libjansson-dev" RUN set -ex; \ apt-get purge -y ${BUILD_PACKAGES}; \ diff --git a/Makefile b/Makefile index f3470389..454a02f8 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ PG_VERSIONS?= # Additional PostgreSQL extensions we want to include with specific version/commit tags PGAI_VERSION?=extension-0.11.2 PGVECTORSCALE_VERSIONS?=all +PG_LAKE_VERSIONS?=all POSTGIS_VERSIONS?=3 PG_AUTH_MON?=v3.0 PG_STAT_MONITOR?=2.2.0 @@ -152,6 +153,7 @@ DOCKER_BUILD_COMMAND=docker build \ --build-arg INSTALL_METHOD="$(INSTALL_METHOD)" \ --build-arg PGAI_VERSION="$(PGAI_VERSION)" \ --build-arg PGVECTORSCALE_VERSIONS="$(PGVECTORSCALE_VERSIONS)" \ + --build-arg PG_LAKE_VERSIONS="$(PG_LAKE_VERSIONS)" \ --build-arg PG_AUTH_MON="$(PG_AUTH_MON)" \ --build-arg PG_LOGERRORS="$(PG_LOGERRORS)" \ --build-arg PG_MAJOR=$(PG_MAJOR) \ @@ -188,6 +190,7 @@ fast: PG_VERSIONS=17 fast: POSTGIS_VERSIONS= fast: TOOLKIT_VERSIONS= fast: PGVECTORSCALE_VERSIONS= +fast: PG_LAKE_VERSIONS= fast: build .PHONY: latest @@ -195,6 +198,7 @@ latest: ALL_VERSIONS=false latest: TIMESCALEDB_VERSIONS=latest latest: TOOLKIT_VERSIONS=latest latest: PGVECTORSCALE_VERSIONS=latest +latest: PG_LAKE_VERSIONS=latest latest: build prune: # docker system prune -af diff --git a/README.md b/README.md index 682d663d..eae24e82 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ POSTGIS_VERSIONS="" make build For further environment variables that can be set, we point you to the [Makefile](Makefile) itself. -For updating changes in versions for timescaledb, pgvectorscale, or toolkit, update `build_scripts/versions.yaml` +For updating changes in versions for timescaledb, pgvectorscale, pg_lake, or toolkit, update `build_scripts/versions.yaml` ## Verify your work diff --git a/build_scripts/install_extensions b/build_scripts/install_extensions index 872bf932..92f70ab5 100755 --- a/build_scripts/install_extensions +++ b/build_scripts/install_extensions @@ -18,8 +18,8 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" what="$1" [ -z "$what" ] && what=all -if [[ ! "$what" =~ ^versions|timescaledb|rust|all|pgvectorscale|toolkit$ ]]; then - echo "usage: $0 [-n] [versions|timescaledb|rust|all|pgvectorscale|toolkit]" >&2 +if [[ ! "$what" =~ ^versions|timescaledb|rust|all|pgvectorscale|pg_lake|toolkit$ ]]; then + echo "usage: $0 [-n] [versions|timescaledb|rust|all|pgvectorscale|pg_lake|toolkit]" >&2 exit 1 fi @@ -29,6 +29,7 @@ versions) echo "TIMESCALEDB_VERSIONS=\"$TIMESCALEDB_VERSIONS\"" echo "TOOLKIT_VERSIONS=\"$TOOLKIT_VERSIONS\"" echo "PGVECTORSCALE_VERSIONS=\"$PGVECTORSCALE_VERSIONS\"" + echo "PG_LAKE_VERSIONS=\"$PG_LAKE_VERSIONS\"" ;; timescaledb | all) @@ -44,6 +45,12 @@ pgvectorscale | all) done ;;& # fallthrough to get rust as well if we're called with 'all' +pg_lake | all) + for ver in $PG_LAKE_VERSIONS; do + install_pg_lake "$ver" + done + ;;& # fallthrough to get rust as well if we're called with 'all' + rust | all) install_rust_extensions;; toolkit) install_rust_extensions;; diff --git a/build_scripts/shared_install.sh b/build_scripts/shared_install.sh index fb4d3d26..90653ac5 100644 --- a/build_scripts/shared_install.sh +++ b/build_scripts/shared_install.sh @@ -240,3 +240,59 @@ install_pgvectorscale() { ) done } + +install_pg_lake() { + local version="$1" pg pkg=pg_lake unsupported_reason + + for pg in $(available_pg_versions); do + unsupported_reason="$(supported_pg_lake "$pg" "$version")" + if [ -n "$unsupported_reason" ]; then + log "$pkg-$version: $unsupported_reason" + continue + fi + + log "building $pkg-$version for pg$pg" + + [[ "$DRYRUN" = true ]] && continue + + ( + set -ex + + # Clone or update pg_lake repository + if [ ! -d /build/pg_lake ]; then + cd /build + git clone https://github.com/Snowflake-Labs/pg_lake + fi + + cd /build/pg_lake + + # Checkout the requested version (tag, branch, or commit) + git fetch --all --tags + if [[ "$version" = main || "$version" = master ]]; then + git checkout "$version" + git pull + else + git checkout "$version" + fi + + # Fix incorrect include path in pg_lake_table (upstream bug) + # PostgreSQL headers should be included without "server/" prefix + if [ -f pg_lake_table/src/planner/query_pushdown.c ]; then + sed -i 's|#include "server/rewrite/rewriteManip.h"|#include "rewrite/rewriteManip.h"|g' pg_lake_table/src/planner/query_pushdown.c + fi + + # Build and install pg_lake components for the current PostgreSQL version + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make clean || true + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_extension_base + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_map + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_extension_updater + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake_engine + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-avro + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake_iceberg + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake_table + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake_spatial + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake_copy + PATH="/usr/lib/postgresql/${pg}/bin:${PATH}" make install-pg_lake + ) + done +} diff --git a/build_scripts/shared_versions.sh b/build_scripts/shared_versions.sh index ff64b859..0b95ca1a 100644 --- a/build_scripts/shared_versions.sh +++ b/build_scripts/shared_versions.sh @@ -226,6 +226,23 @@ supported_pgvectorscale() { version_is_supported pgvectorscale "$pg" "$ver" } +supported_pg_lake() { + local pg="$1" ver="$2" + + # pg_lake requires PostgreSQL 16+ (uses APIs introduced in PG16) + if [ "$pg" -lt 16 ]; then + echo "pg_lake requires PostgreSQL 16 or later (PG$pg not supported)" + return + fi + + # just attempt the build for main/master/or other branch build + if [[ "$ver" = main || "$ver" = master || "$ver" =~ [a-z_-]*/[A-Za-z0-9_-]* ]]; then + return + fi + + version_is_supported pg_lake "$pg" "$ver" +} + require_supported_arch() { if [[ "$ARCH" != amd64 && "$ARCH" != aarch64 ]]; then echo "unsupported architecture: $ARCH" >&2 @@ -236,3 +253,4 @@ require_supported_arch() { TIMESCALEDB_VERSIONS="$(requested_pkg_versions timescaledb "$TIMESCALEDB_VERSIONS")" TOOLKIT_VERSIONS="$(requested_pkg_versions toolkit "$TOOLKIT_VERSIONS")" PGVECTORSCALE_VERSIONS="$(requested_pkg_versions pgvectorscale "$PGVECTORSCALE_VERSIONS")" +PG_LAKE_VERSIONS="$(requested_pkg_versions pg_lake "$PG_LAKE_VERSIONS")" diff --git a/build_scripts/versions.yaml b/build_scripts/versions.yaml index b249c4dd..83a361ff 100644 --- a/build_scripts/versions.yaml +++ b/build_scripts/versions.yaml @@ -184,3 +184,8 @@ pgvectorscale: pg-max: 17 0.8.0: pg-max: 17 + +pg_lake: + main: + pg-min: 16 + pg-max: 18 diff --git a/cicd/shared.sh b/cicd/shared.sh index 4d45b0fe..328d8b7e 100644 --- a/cicd/shared.sh +++ b/cicd/shared.sh @@ -61,6 +61,7 @@ check_base_components() { check_timescaledb "$pg" "$lib" check_pgvectorscale "$pg" "$lib" + check_pg_lake "$pg" "$lib" check_toolkit "$pg" "$lib" check_oss_extensions "$pg" "$lib" check_others "$pg" "$lib" @@ -188,6 +189,47 @@ check_pgvectorscale() { if [[ "$found" = false && "$pg" -le 17 ]]; then error "no pgvectorscale versions found for pg$pg"; fi } +check_pg_lake() { + if [ -z "$PG_LAKE_VERSIONS" ]; then return; fi + local pg="$1" lib="$2" found=false + + # record an empty version so we'll get an empty table row if we don't have any versions + record_ext_version pg_lake "$pg" "" + + for ver in $PG_LAKE_VERSIONS; do + # For main/master branches, check if installed but don't verify specific version + if [[ "$ver" = master || "$ver" = main ]]; then + if [ -s "$lib/pg_lake.so" ]; then + found=true + record_ext_version pg_lake "$pg" "$ver" + else + unsupported_reason="$(supported_pg_lake "$pg" "$ver")" + if [ -n "$unsupported_reason" ]; then + log "skipped: pg_lake-$ver: $unsupported_reason" + else + log "pg_lake-$ver not built for pg$pg (skipping version check for main/master)" + fi + fi + continue + fi + + # Check for one of the main pg_lake .so files + if [ -s "$lib/pg_lake.so" ]; then + found=true + record_ext_version pg_lake "$pg" "$ver" + else + unsupported_reason="$(supported_pg_lake "$pg" "$ver")" + if [ -n "$unsupported_reason" ]; then + log "skipped: pg_lake-$ver: $unsupported_reason" + else + error "pg_lake-$ver not found for pg$pg" + fi + fi + done + + if [[ "$found" = false && "$pg" -ge 16 && "$pg" -le 18 ]]; then error "no pg_lake versions found for pg$pg"; fi +} + # this checks for other extensions that should always exist check_others() { local pg="$1" lib="$2" version status diff --git a/cicd/version_info.sql b/cicd/version_info.sql index a636c70e..9b308fa3 100644 --- a/cicd/version_info.sql +++ b/cicd/version_info.sql @@ -9,7 +9,7 @@ WITH versions(name, version) AS ( FROM pg_available_extensions WHERE - name IN ('timescaledb', 'postgis', 'pg_prometheus', 'timescale_prometheus_extra', 'vectorscale', 'ai', 'timescaledb_toolkit', 'timescale_analytics') + name IN ('timescaledb', 'postgis', 'pg_prometheus', 'timescale_prometheus_extra', 'vectorscale', 'ai', 'pg_lake', 'timescaledb_toolkit', 'timescale_analytics') UNION ALL SELECT 'postgresql.version', @@ -41,6 +41,14 @@ WITH versions(name, version) AS ( WHERE name = 'vectorscale' UNION ALL + SELECT + 'pg_lake.available_versions', + string_agg(version, ',' ORDER BY version) + FROM + pg_available_extension_versions + WHERE + name = 'pg_lake' + UNION ALL SELECT 'timescaledb_toolkit.available_versions', string_agg(version, ',' ORDER BY version)