diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 55951c4036c..236b30e7e35 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -41,6 +41,9 @@ on: env: MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60 + JAI_CORE_VERSION: '1.1.3' + JAI_CODEC_VERSION: '1.1.3' + JAI_IMAGEIO_VERSION: '1.1' DO_NOT_TRACK: true concurrency: @@ -109,24 +112,34 @@ jobs: distribution: 'temurin' java-version: '11' cache: 'maven' - - name: Get OS name - id: os-name - run: | - # `os_name` will be like "Ubuntu-20.04.1-LTS" - OS_NAME=$(lsb_release -ds | sed 's/\s/-/g') - echo "os-name=$OS_NAME" >> $GITHUB_OUTPUT - - name: Cache Spark installations - if: runner.os != 'Windows' - uses: actions/cache@master + - uses: actions/setup-python@v5 with: - path: ~/spark - key: apache.sedona-apache-spark-${{ steps.os-name.outputs.os-name }}-${{ env.SPARK_VERSION }} + python-version: '3.11' + - name: Install PySpark + run: | + pip3 install pyspark==${SPARK_VERSION} + - name: Download JAI libraries + run: | + PYSPARK_PATH=$(python3 -c "import pyspark; print(pyspark.__path__[0])") + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar + mv -v jai_core-${JAI_CORE_VERSION}.jar ${PYSPARK_PATH}/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${PYSPARK_PATH}/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${PYSPARK_PATH}/jars + echo "PYSPARK_PATH=${PYSPARK_PATH}" >> $GITHUB_ENV - name: Build Sedona libraries run: | SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools + - name: Copy Sedona JARs to PySpark + run: | + find spark-shaded/target -name sedona-*.jar -exec cp {} ${PYSPARK_PATH}/jars/ \; - name: Run tests run: | + # Set SPARK_HOME to PySpark path + export SPARK_HOME=${PYSPARK_PATH} + if [[ "${SPARK_VERSION:0:3}" < "3.3" ]]; then case "$HADOOP_VERSION" in 3) @@ -143,6 +156,7 @@ jobs: cd ./R/tests NOT_CRAN='true' Rscript testthat.R shell: bash + timeout-minutes: 30 - uses: actions/upload-artifact@v4 if: failure() with: diff --git a/R/tests/testthat/helper-initialize.R b/R/tests/testthat/helper-initialize.R index 84cadbff017..9d1143d93f6 100644 --- a/R/tests/testthat/helper-initialize.R +++ b/R/tests/testthat/helper-initialize.R @@ -20,9 +20,16 @@ testthat_spark_connection <- function(conn_retry_interval_s = 2) { if (!exists(conn_key, envir = .GlobalEnv)) { version <- Sys.getenv("SPARK_VERSION") hadoop_version <- Sys.getenv("HADOOP_VERSION") - spark_installed <- spark_installed_versions() - if (nrow(spark_installed[spark_installed$spark == version & spark_installed$hadoop == hadoop_version, ]) == 0) { - spark_install(version, hadoop_version) + spark_home <- Sys.getenv("SPARK_HOME") + + if (spark_home != "") { + message(sprintf("Using pre-installed Spark from: %s", spark_home)) + } else { + spark_installed <- spark_installed_versions() + if (nrow(spark_installed[spark_installed$spark == version & spark_installed$hadoop == hadoop_version, ]) == 0) { + message("Installing Spark for local development...") + spark_install(version, hadoop_version) + } } conn_attempts <- 3 @@ -33,13 +40,21 @@ testthat_spark_connection <- function(conn_retry_interval_s = 2) { config <- spark_config() config[["sparklyr.connect.timeout"]] <- 300 - sc <- spark_connect( + # Use spark_home if set (CI), otherwise use version (local dev) + connect_args <- list( master = "local", method = "shell", config = config, - app_name = paste0("testthat-", uuid::UUIDgenerate()), - version = version + app_name = paste0("testthat-", uuid::UUIDgenerate()) ) + + if (spark_home != "") { + connect_args$spark_home <- spark_home + } else { + connect_args$version <- version + } + + sc <- do.call(spark_connect, connect_args) assign(conn_key, sc, envir = .GlobalEnv) TRUE },