diff --git a/.github/workflows/pyspark.yml b/.github/workflows/pyspark.yml index 4744c58af..3ceda7822 100644 --- a/.github/workflows/pyspark.yml +++ b/.github/workflows/pyspark.yml @@ -55,7 +55,7 @@ jobs: - name: Install Poetry working-directory: pyspark run: | - yes | sudo python3 -m pip install poetry --quiet + yes | python3 -m pip install poetry --quiet poetry env use python3 - name: Lint diff --git a/maven-projects/java/src/test/java/org/apache/graphar/graphinfo/GraphInfoTest.java b/maven-projects/java/src/test/java/org/apache/graphar/graphinfo/GraphInfoTest.java index 07b111c13..4b84b15ae 100644 --- a/maven-projects/java/src/test/java/org/apache/graphar/graphinfo/GraphInfoTest.java +++ b/maven-projects/java/src/test/java/org/apache/graphar/graphinfo/GraphInfoTest.java @@ -32,7 +32,36 @@ import org.junit.Test; public class GraphInfoTest { - public static final String root = System.getenv("GAR_TEST_DATA") + "/java"; + private static String resolveTestData() { + String path = System.getenv("GAR_TEST_DATA"); + if (path == null) { + path = System.getProperty("gar.test.data"); + } + if (path == null) { + String[] candidates = {"../../testing", "../testing", "testing"}; + for (String p : candidates) { + java.io.File dir = new java.io.File(p).getAbsoluteFile(); + if (new java.io.File(dir, "ldbc_sample/csv/ldbc_sample.graph.yml").exists()) { + path = dir.getAbsolutePath(); + break; + } + } + if (path == null) { + throw new RuntimeException( + "GAR_TEST_DATA not found or invalid. Please set GAR_TEST_DATA environment variable to point to the testing directory or ensure the testing directory exists with ldbc_sample/csv/ldbc_sample.graph.yml"); + } + } + java.io.File baseDir = new java.io.File(path).getAbsoluteFile(); + java.io.File markerFile = + new java.io.File(baseDir, "ldbc_sample/csv/ldbc_sample.graph.yml"); + if (!baseDir.isDirectory() || !markerFile.exists()) { + throw new RuntimeException( + "GAR_TEST_DATA not found or invalid. Please set GAR_TEST_DATA environment variable to point to the testing directory or ensure the testing directory exists with ldbc_sample/csv/ldbc_sample.graph.yml"); + } + return baseDir.getAbsolutePath(); + } + + public static final String root = resolveTestData() + "/java"; @Test public void test1() { diff --git a/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/BaseTestSuite.scala b/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/BaseTestSuite.scala index 2545d7ed0..4e3232eeb 100644 --- a/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/BaseTestSuite.scala +++ b/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/BaseTestSuite.scala @@ -29,10 +29,42 @@ abstract class BaseTestSuite extends AnyFunSuite with BeforeAndAfterAll { var spark: SparkSession = _ override def beforeAll(): Unit = { - if (System.getenv("GAR_TEST_DATA") == null) { - throw new IllegalArgumentException("GAR_TEST_DATA is not set") + def resolveTestData(): String = { + var testDataPath: String = + Option(System.getenv("GAR_TEST_DATA")) + .orElse(Option(System.getProperty("gar.test.data"))) + .orNull + + if (testDataPath == null) { + val candidates = Seq("../../testing", "../testing", "testing") + candidates.foreach { p => + val dir = new java.io.File(p).getAbsoluteFile + val marker = + new java.io.File(dir, "ldbc_sample/csv/ldbc_sample.graph.yml") + if (dir.exists() && dir.isDirectory && marker.isFile) { + testDataPath = dir.getAbsolutePath + return testDataPath + } + } + } + + if (testDataPath != null) { + val dir = new java.io.File(testDataPath) + val marker = + new java.io.File(dir, "ldbc_sample/csv/ldbc_sample.graph.yml") + if (dir.exists() && dir.isDirectory && marker.isFile) { + return testDataPath + } + } + + throw new RuntimeException( + "GAR_TEST_DATA not found or invalid. " + + "Please set GAR_TEST_DATA environment variable to point to the testing directory " + + "or ensure the testing directory exists with ldbc_sample/csv/ldbc_sample.graph.yml" + ) } - testData = System.getenv("GAR_TEST_DATA") + + testData = resolveTestData() spark = SparkSession .builder() .enableHiveSupport()