Skip to content

Commit a636224

Browse files
committed
Fix the DateTimeParseException
1 parent 3a1e9b7 commit a636224

File tree

4 files changed

+181
-5
lines changed

4 files changed

+181
-5
lines changed

spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
*/
1919
package org.apache.sedona.sql.datasources.geopackage.transform
2020

21-
import java.time.{Instant, LocalDate}
21+
import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
2222
import java.time.format.DateTimeFormatter
23+
import java.time.format.DateTimeParseException
2324
import java.time.temporal.ChronoUnit
2425

2526
object DataTypesTransformations {
@@ -34,6 +35,48 @@ object DataTypesTransformations {
3435
}
3536

3637
def epoch(timestampStr: String): Long = {
37-
Instant.parse(timestampStr).toEpochMilli
38+
try {
39+
// Try parsing as-is first (works for timestamps with timezone info)
40+
Instant.parse(timestampStr).toEpochMilli
41+
} catch {
42+
case _: DateTimeParseException =>
43+
// If parsing fails, try treating it as UTC (common case for GeoPackage)
44+
try {
45+
// Handle various datetime formats without timezone info
46+
// Try different patterns to handle various millisecond formats
47+
val patterns = Array(
48+
"yyyy-MM-dd'T'HH:mm:ss.SSS", // 3 digits
49+
"yyyy-MM-dd'T'HH:mm:ss.SS", // 2 digits
50+
"yyyy-MM-dd'T'HH:mm:ss.S", // 1 digit
51+
"yyyy-MM-dd'T'HH:mm:ss" // no milliseconds
52+
)
53+
54+
var localDateTime: LocalDateTime = null
55+
var lastException: DateTimeParseException = null
56+
57+
for (pattern <- patterns) {
58+
try {
59+
val formatter = DateTimeFormatter.ofPattern(pattern)
60+
localDateTime = LocalDateTime.parse(timestampStr, formatter)
61+
lastException = null
62+
} catch {
63+
case e: DateTimeParseException =>
64+
lastException = e
65+
}
66+
}
67+
68+
if (localDateTime != null) {
69+
localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
70+
} else {
71+
throw lastException
72+
}
73+
} catch {
74+
case e: DateTimeParseException =>
75+
throw new IllegalArgumentException(
76+
s"Unable to parse datetime: $timestampStr. " +
77+
s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or 'yyyy-MM-ddTHH:mm:ss[.S]Z'",
78+
e)
79+
}
80+
}
3881
}
3982
}

spark/spark-3.4/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
*/
1919
package org.apache.sedona.sql
2020

21-
import io.minio.{MakeBucketArgs, MinioClient, PutObjectArgs}
22-
import org.apache.spark.sql.{DataFrame, SparkSession}
21+
import io.minio.{MakeBucketArgs, MinioClient}
22+
import org.apache.spark.sql.DataFrame
2323
import org.apache.spark.sql.functions.expr
2424
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
25-
import org.apache.spark.sql.types.{BinaryType, BooleanType, DateType, DoubleType, IntegerType, StringType, StructField, StructType, TimestampType}
25+
import org.apache.spark.sql.types._
2626
import org.scalatest.matchers.should.Matchers
2727
import org.scalatest.prop.TableDrivenPropertyChecks._
2828
import org.testcontainers.containers.MinIOContainer
@@ -38,6 +38,7 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers {
3838
val path: String = resourceFolder + "geopackage/example.gpkg"
3939
val polygonsPath: String = resourceFolder + "geopackage/features.gpkg"
4040
val rasterPath: String = resourceFolder + "geopackage/raster.gpkg"
41+
val datetimePath: String = resourceFolder + "geopackage/test_datetime_issue.gpkg"
4142
val wktReader = new org.locationtech.jts.io.WKTReader()
4243
val wktWriter = new org.locationtech.jts.io.WKTWriter()
4344

@@ -168,6 +169,50 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers {
168169
df.count() shouldEqual expectedCount
169170
}
170171
}
172+
173+
it("should handle datetime fields without timezone information") {
174+
// This test verifies the fix for DateTimeParseException when reading
175+
// GeoPackage files with datetime fields that don't include timezone info
176+
val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
177+
178+
// Test reading the test_features table with problematic datetime formats
179+
val df = sparkSession.read
180+
.format("geopackage")
181+
.option("tableName", "test_features")
182+
.load(testFilePath)
183+
184+
// The test should not throw DateTimeParseException when reading datetime fields
185+
noException should be thrownBy {
186+
df.select("created_at", "updated_at").collect()
187+
}
188+
189+
// Verify that datetime fields are properly parsed as TimestampType
190+
df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual TimestampType
191+
df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual TimestampType
192+
193+
// Verify that we can read the datetime values
194+
val datetimeValues = df.select("created_at", "updated_at").collect()
195+
datetimeValues should not be empty
196+
197+
// Verify that datetime values are valid timestamps
198+
datetimeValues.foreach { row =>
199+
val createdTimestamp = row.getAs[Timestamp]("created_at")
200+
val updatedTimestamp = row.getAs[Timestamp]("updated_at")
201+
createdTimestamp should not be null
202+
updatedTimestamp should not be null
203+
createdTimestamp.getTime should be > 0L
204+
updatedTimestamp.getTime should be > 0L
205+
}
206+
207+
// Test showMetadata option with the same file
208+
noException should be thrownBy {
209+
val metadataDf = sparkSession.read
210+
.format("geopackage")
211+
.option("showMetadata", "true")
212+
.load(testFilePath)
213+
metadataDf.select("last_change").collect()
214+
}
215+
}
171216
}
172217

173218
describe("GeoPackage Raster Data Test") {

spark/spark-3.5/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,50 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers {
168168
df.count() shouldEqual expectedCount
169169
}
170170
}
171+
172+
it("should handle datetime fields without timezone information") {
173+
// This test verifies the fix for DateTimeParseException when reading
174+
// GeoPackage files with datetime fields that don't include timezone info
175+
val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
176+
177+
// Test reading the test_features table with problematic datetime formats
178+
val df = sparkSession.read
179+
.format("geopackage")
180+
.option("tableName", "test_features")
181+
.load(testFilePath)
182+
183+
// The test should not throw DateTimeParseException when reading datetime fields
184+
noException should be thrownBy {
185+
df.select("created_at", "updated_at").collect()
186+
}
187+
188+
// Verify that datetime fields are properly parsed as TimestampType
189+
df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual TimestampType
190+
df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual TimestampType
191+
192+
// Verify that we can read the datetime values
193+
val datetimeValues = df.select("created_at", "updated_at").collect()
194+
datetimeValues should not be empty
195+
196+
// Verify that datetime values are valid timestamps
197+
datetimeValues.foreach { row =>
198+
val createdTimestamp = row.getAs[Timestamp]("created_at")
199+
val updatedTimestamp = row.getAs[Timestamp]("updated_at")
200+
createdTimestamp should not be null
201+
updatedTimestamp should not be null
202+
createdTimestamp.getTime should be > 0L
203+
updatedTimestamp.getTime should be > 0L
204+
}
205+
206+
// Test showMetadata option with the same file
207+
noException should be thrownBy {
208+
val metadataDf = sparkSession.read
209+
.format("geopackage")
210+
.option("showMetadata", "true")
211+
.load(testFilePath)
212+
metadataDf.select("last_change").collect()
213+
}
214+
}
171215
}
172216

173217
describe("GeoPackage Raster Data Test") {

spark/spark-4.0/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,50 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers {
168168
df.count() shouldEqual expectedCount
169169
}
170170
}
171+
172+
it("should handle datetime fields without timezone information") {
173+
// This test verifies the fix for DateTimeParseException when reading
174+
// GeoPackage files with datetime fields that don't include timezone info
175+
val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
176+
177+
// Test reading the test_features table with problematic datetime formats
178+
val df = sparkSession.read
179+
.format("geopackage")
180+
.option("tableName", "test_features")
181+
.load(testFilePath)
182+
183+
// The test should not throw DateTimeParseException when reading datetime fields
184+
noException should be thrownBy {
185+
df.select("created_at", "updated_at").collect()
186+
}
187+
188+
// Verify that datetime fields are properly parsed as TimestampType
189+
df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual TimestampType
190+
df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual TimestampType
191+
192+
// Verify that we can read the datetime values
193+
val datetimeValues = df.select("created_at", "updated_at").collect()
194+
datetimeValues should not be empty
195+
196+
// Verify that datetime values are valid timestamps
197+
datetimeValues.foreach { row =>
198+
val createdTimestamp = row.getAs[Timestamp]("created_at")
199+
val updatedTimestamp = row.getAs[Timestamp]("updated_at")
200+
createdTimestamp should not be null
201+
updatedTimestamp should not be null
202+
createdTimestamp.getTime should be > 0L
203+
updatedTimestamp.getTime should be > 0L
204+
}
205+
206+
// Test showMetadata option with the same file
207+
noException should be thrownBy {
208+
val metadataDf = sparkSession.read
209+
.format("geopackage")
210+
.option("showMetadata", "true")
211+
.load(testFilePath)
212+
metadataDf.select("last_change").collect()
213+
}
214+
}
171215
}
172216

173217
describe("GeoPackage Raster Data Test") {

0 commit comments

Comments
 (0)