Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions native/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ inherits = "release"
lto = false # Skip LTO for faster linking
codegen-units = 16 # Parallel codegen (faster compile, slightly larger binary)
# overflow-checks inherited as false from release

[patch.crates-io]
datafusion-spark = { git = "https://github.com/unknowntpo/datafusion", branch = "feat-str-to-map-52" }

2 changes: 2 additions & 0 deletions native/core/src/execution/jni_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use datafusion_spark::function::hash::crc32::SparkCrc32;
use datafusion_spark::function::hash::sha1::SparkSha1;
use datafusion_spark::function::hash::sha2::SparkSha2;
use datafusion_spark::function::map::map_from_entries::MapFromEntries;
use datafusion_spark::function::map::str_to_map::SparkStrToMap;
use datafusion_spark::function::math::expm1::SparkExpm1;
use datafusion_spark::function::math::hex::SparkHex;
use datafusion_spark::function::math::width_bucket::SparkWidthBucket;
Expand Down Expand Up @@ -404,6 +405,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCrc32::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSpace::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitCount::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkStrToMap::default()));
}

/// Prepares arrow arrays for output.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ object QueryPlanSerde extends Logging with CometExprShim {
classOf[MapValues] -> CometMapValues,
classOf[MapFromArrays] -> CometMapFromArrays,
classOf[MapContainsKey] -> CometMapContainsKey,
classOf[MapFromEntries] -> CometMapFromEntries)
classOf[MapFromEntries] -> CometMapFromEntries,
classOf[StringToMap] -> CometStrToMap)

private val structExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map(
classOf[CreateNamedStruct] -> CometCreateNamedStruct,
Expand Down
7 changes: 7 additions & 0 deletions spark/src/main/scala/org/apache/comet/serde/maps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,10 @@ object CometMapFromEntries extends CometScalarFunction[MapFromEntries]("map_from
Compatible(None)
}
}

object CometStrToMap extends CometScalarFunction[StringToMap]("str_to_map") {

override def getSupportLevel(expr: StringToMap): SupportLevel = {
Compatible(None)
}
}
81 changes: 81 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/map/str_to_map.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Tests for Spark-compatible str_to_map function
-- https://spark.apache.org/docs/latest/api/sql/index.html#str_to_map
--
-- Test cases derived from Spark test("StringToMap"):
-- https://github.com/apache/spark/blob/v4.0.0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala#L525-L618

-- ConfigMatrix: parquet.enable.dictionary=false,true

-- s0: Basic test with default delimiters
query spark_answer_only
SELECT str_to_map('a:1,b:2,c:3')

-- s1: Preserve spaces in values
query spark_answer_only
SELECT str_to_map('a: ,b:2')

-- s2: Custom key-value delimiter '='
query spark_answer_only
SELECT str_to_map('a=1,b=2,c=3', ',', '=')

-- s3: Empty string returns map with empty key and NULL value
query spark_answer_only
SELECT str_to_map('', ',', '=')

-- s4: Custom pair delimiter '_'
query spark_answer_only
SELECT str_to_map('a:1_b:2_c:3', '_', ':')

-- s5: Single key without value returns NULL value
query spark_answer_only
SELECT str_to_map('a')

-- s6: Custom delimiters '&' and '='
query spark_answer_only
SELECT str_to_map('a=1&b=2&c=3', '&', '=')

-- Duplicate keys: EXCEPTION policy (Spark 3.0+ default)
-- TODO: Add LAST_WIN policy tests when spark.sql.mapKeyDedupPolicy config is supported
-- query spark_answer_only
-- SELECT str_to_map('a:1,b:2,a:3')

-- NULL input returns NULL
query spark_answer_only
SELECT str_to_map(NULL, ',', ':')

-- Explicit 3-arg form
query spark_answer_only
SELECT str_to_map('a:1,b:2,c:3', ',', ':')

-- Missing key-value delimiter results in NULL value
query spark_answer_only
SELECT str_to_map('a,b:2', ',', ':')

-- Multi-row test
query spark_answer_only
SELECT str_to_map(col) FROM (VALUES ('a:1,b:2'), ('x:9'), (NULL)) AS t(col)

-- Multi-row with custom delimiter
query spark_answer_only
SELECT str_to_map(col, ',', '=') FROM (VALUES ('a=1,b=2'), ('x=9'), (NULL)) AS t(col)

-- Per-row delimiters: each row can have different delimiters
query spark_answer_only
SELECT str_to_map(col1, col2, col3) FROM (VALUES ('a=1,b=2', ',', '='), ('x#9', ',', '#'), (NULL, ',', '=')) AS t(col1, col2, col3)
Loading