-
Notifications
You must be signed in to change notification settings - Fork 286
Expand file tree
/
Copy pathCaseClassSchemaProblem.scala
More file actions
57 lines (40 loc) · 1.49 KB
/
CaseClassSchemaProblem.scala
File metadata and controls
57 lines (40 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package sql
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}
//
// ***NOTE***: this fails in Spark 1.3.0 with scala.MatchError
// It was written as an attempt to answer the following question:
// http://stackoverflow.com/questions/29310405/what-is-the-right-way-to-represent-an-any-type-in-spark-sql
//
// Filed as https://issues.apache.org/jira/browse/SPARK-6587 -- while I don't
// necessarily agree with the answer I have to concede that it's reasonable.
//
// Arguably, the error message has improved a bit in Spark 1.5.0.
//
// TODO: it may be interesting to see where DataSet takes this ...
//
object CaseClassSchemaProblem {
private abstract class MyHolder
private case class StringHolder(s: String) extends MyHolder
private case class IntHolder(i: Int) extends MyHolder
private case class BooleanHolder(b: Boolean) extends MyHolder
private case class Thing(key: Integer, foo: MyHolder)
def main(args: Array[String]) {
val spark =
SparkSession.builder()
.appName("SQL-CaseClassSchemaProblem")
.master("local[4]")
.getOrCreate()
import spark.implicits._
val things = Seq(
Thing(1, IntHolder(42)),
Thing(2, StringHolder("hello")),
Thing(3, BooleanHolder(false))
)
val thingsDF = spark.sparkContext.parallelize(things, 4).toDF()
thingsDF.createOrReplaceTempView("things")
val all = spark.sql("SELECT * from things")
all.printSchema()
all.show()
}
}