Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
JAI_CORE_VERSION: '1.1.3'
JAI_CODEC_VERSION: '1.1.3'
JAI_IMAGEIO_VERSION: '1.1'
DO_NOT_TRACK: true

concurrency:
Expand Down Expand Up @@ -109,24 +112,34 @@ jobs:
distribution: 'temurin'
java-version: '11'
cache: 'maven'
- name: Get OS name
id: os-name
run: |
# `os_name` will be like "Ubuntu-20.04.1-LTS"
OS_NAME=$(lsb_release -ds | sed 's/\s/-/g')
echo "os-name=$OS_NAME" >> $GITHUB_OUTPUT
- name: Cache Spark installations
if: runner.os != 'Windows'
uses: actions/cache@master
- uses: actions/setup-python@v5
with:
path: ~/spark
key: apache.sedona-apache-spark-${{ steps.os-name.outputs.os-name }}-${{ env.SPARK_VERSION }}
python-version: '3.11'
- name: Install PySpark
run: |
pip3 install pyspark==${SPARK_VERSION}
- name: Download JAI libraries
run: |
PYSPARK_PATH=$(python3 -c "import pyspark; print(pyspark.__path__[0])")
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar
mv -v jai_core-${JAI_CORE_VERSION}.jar ${PYSPARK_PATH}/jars
mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${PYSPARK_PATH}/jars
mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${PYSPARK_PATH}/jars
echo "PYSPARK_PATH=${PYSPARK_PATH}" >> $GITHUB_ENV
- name: Build Sedona libraries
run: |
SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools
- name: Copy Sedona JARs to PySpark
run: |
find spark-shaded/target -name sedona-*.jar -exec cp {} ${PYSPARK_PATH}/jars/ \;
- name: Run tests
run: |
# Set SPARK_HOME to PySpark path
export SPARK_HOME=${PYSPARK_PATH}

if [[ "${SPARK_VERSION:0:3}" < "3.3" ]]; then
case "$HADOOP_VERSION" in
3)
Expand All @@ -143,6 +156,7 @@ jobs:
cd ./R/tests
NOT_CRAN='true' Rscript testthat.R
shell: bash
timeout-minutes: 30
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
27 changes: 21 additions & 6 deletions R/tests/testthat/helper-initialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,16 @@ testthat_spark_connection <- function(conn_retry_interval_s = 2) {
if (!exists(conn_key, envir = .GlobalEnv)) {
version <- Sys.getenv("SPARK_VERSION")
hadoop_version <- Sys.getenv("HADOOP_VERSION")
spark_installed <- spark_installed_versions()
if (nrow(spark_installed[spark_installed$spark == version & spark_installed$hadoop == hadoop_version, ]) == 0) {
spark_install(version, hadoop_version)
spark_home <- Sys.getenv("SPARK_HOME")

if (spark_home != "") {
message(sprintf("Using pre-installed Spark from: %s", spark_home))
} else {
spark_installed <- spark_installed_versions()
if (nrow(spark_installed[spark_installed$spark == version & spark_installed$hadoop == hadoop_version, ]) == 0) {
message("Installing Spark for local development...")
spark_install(version, hadoop_version)
}
}

conn_attempts <- 3
Expand All @@ -33,13 +40,21 @@ testthat_spark_connection <- function(conn_retry_interval_s = 2) {
config <- spark_config()
config[["sparklyr.connect.timeout"]] <- 300

sc <- spark_connect(
# Use spark_home if set (CI), otherwise use version (local dev)
connect_args <- list(
master = "local",
method = "shell",
config = config,
app_name = paste0("testthat-", uuid::UUIDgenerate()),
version = version
app_name = paste0("testthat-", uuid::UUIDgenerate())
)

if (spark_home != "") {
connect_args$spark_home <- spark_home
} else {
connect_args$version <- version
}

sc <- do.call(spark_connect, connect_args)
assign(conn_key, sc, envir = .GlobalEnv)
TRUE
},
Expand Down