@@ -127,13 +127,13 @@ class Reader2Doc(override val uid: String)
127
127
128
128
override def transform (dataset : Dataset [_]): DataFrame = {
129
129
validateRequiredParameters()
130
- val structuredDf = if ($(contentType).trim.isEmpty) {
130
+ val structuredDf = if ($(contentType).trim.isEmpty && getInputCol.trim.isEmpty ) {
131
131
val partitionParams = Map (
132
132
" inferTableStructure" -> $(inferTableStructure).toString,
133
133
" outputFormat" -> $(outputFormat))
134
134
partitionMixedContent(dataset, $(contentPath), partitionParams)
135
135
} else {
136
- partitionContent(partitionBuilder, $(contentPath), isStringContent($(contentType) ), dataset)
136
+ partitionContent(partitionBuilder, $(contentPath), isStringContent(getContentType ), dataset)
137
137
}
138
138
if (! structuredDf.isEmpty) {
139
139
val annotatedDf = structuredDf
@@ -149,7 +149,7 @@ class Reader2Doc(override val uid: String)
149
149
150
150
protected def partitionBuilder : Partition = {
151
151
val params = Map (
152
- " contentType" -> $(contentType) ,
152
+ " contentType" -> getContentType ,
153
153
" storeContent" -> $(storeContent).toString,
154
154
" titleFontSize" -> $(titleFontSize).toString,
155
155
" inferTableStructure" -> $(inferTableStructure).toString,
@@ -186,15 +186,16 @@ class Reader2Doc(override val uid: String)
186
186
}
187
187
188
188
protected def validateRequiredParameters (): Unit = {
189
- require(
190
- $(contentPath) != null && $(contentPath).trim.nonEmpty,
191
- " contentPath must be set and not empty" )
189
+ val hasContentPath = $(contentPath) != null && $(contentPath).trim.nonEmpty
190
+ if (hasContentPath) {
191
+ require(
192
+ ResourceHelper .validFile($(contentPath)),
193
+ " contentPath must point to a valid file or directory" )
194
+ }
195
+
192
196
require(
193
197
$(outputFormat) == " plain-text" ,
194
198
" Only 'plain-text' outputFormat is supported for this operation." )
195
- require(
196
- ResourceHelper .validFile($(contentPath)),
197
- " contentPath must point to a valid file or directory" )
198
199
}
199
200
200
201
protected def partitionToAnnotation : UserDefinedFunction = udf {
0 commit comments