Skip to content

Commit 886af8b

Browse files
add Path overloads for io methods
1 parent 57a8f47 commit 886af8b

File tree

7 files changed

+131
-6
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io
  • dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io
  • dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io
  • dataframe-geo/src/main/kotlin/org/jetbrains/kotlinx/dataframe/geo/io
  • dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io

7 files changed

+131
-6
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import java.io.File
1616
import java.io.FileNotFoundException
1717
import java.io.InputStream
1818
import java.net.URL
19+
import java.nio.file.Path
1920
import java.util.ServiceLoader
2021
import kotlin.reflect.KType
2122

@@ -57,6 +58,13 @@ public interface SupportedDataFrameFormat : SupportedFormat {
5758
public fun readDataFrame(stream: InputStream, header: List<String> = emptyList()): DataFrame<*>
5859

5960
public fun readDataFrame(file: File, header: List<String> = emptyList()): DataFrame<*>
61+
62+
/**
63+
* Path overload for reading a DataFrame. Default implementation delegates to the [File] overload.
64+
* Implementors are not required to override this method.
65+
*/
66+
public fun readDataFrame(path: Path, header: List<String> = emptyList()): DataFrame<*> =
67+
readDataFrame(path.toFile(), header)
6068
}
6169

6270
/**
@@ -293,3 +301,16 @@ public fun URL.readDataRow(header: List<String> = emptyList()): AnyRow = DataRow
293301
public fun File.readDataFrame(header: List<String> = emptyList()): AnyFrame = DataFrame.read(this, header)
294302

295303
public fun File.readDataRow(header: List<String> = emptyList()): AnyRow = DataRow.read(this, header)
304+
305+
// Path-based overloads and extensions
306+
public fun DataFrame.Companion.read(path: Path, header: List<String> = emptyList()): AnyFrame =
307+
read(path.toFile(), header)
308+
309+
public fun DataRow.Companion.read(path: Path, header: List<String> = emptyList()): AnyRow =
310+
DataFrame.read(path, header).single()
311+
312+
public fun Path.readDataFrame(header: List<String> = emptyList()): AnyFrame =
313+
DataFrame.read(this, header)
314+
315+
public fun Path.readDataRow(header: List<String> = emptyList()): AnyRow =
316+
DataRow.read(this, header)

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowWriter.kt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import java.io.FileOutputStream
1313
import java.io.OutputStream
1414
import java.nio.channels.Channels
1515
import java.nio.channels.WritableByteChannel
16+
import java.nio.file.Files
17+
import java.nio.file.Path
1618

1719
public val ignoreMismatchMessage: (ConvertingMismatch) -> Unit = { message: ConvertingMismatch -> }
1820
public val writeMismatchMessage: (ConvertingMismatch) -> Unit = { message: ConvertingMismatch ->
@@ -96,6 +98,16 @@ public interface ArrowWriter : AutoCloseable {
9698
writeArrowIPC(FileOutputStream(file, append))
9799
}
98100

101+
/** Path overload for Arrow IPC writing. */
102+
public fun writeArrowIPC(path: Path, append: Boolean = true) {
103+
// For append=true use FileOutputStream on File to preserve semantics; otherwise use Files.newOutputStream
104+
if (append) {
105+
writeArrowIPC(FileOutputStream(path.toFile(), /* append = */ true))
106+
} else {
107+
Files.newOutputStream(path).use { os -> writeArrowIPC(os) }
108+
}
109+
}
110+
99111
/**
100112
* Save data to [Arrow interprocess streaming format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-streaming-format), write to new [ByteArray]
101113
*/
@@ -133,6 +145,11 @@ public interface ArrowWriter : AutoCloseable {
133145
writeArrowFeather(FileOutputStream(file))
134146
}
135147

148+
/** Path overload for Arrow Feather writing. */
149+
public fun writeArrowFeather(path: Path) {
150+
Files.newOutputStream(path).use { os -> writeArrowFeather(os) }
151+
}
152+
136153
/**
137154
* Save data to [Arrow random access format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-random-access-files), write to new [ByteArray]
138155
*/

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ public fun DataFrame.Companion.readArrowIPC(
7777
nullability: NullabilityOptions = NullabilityOptions.Infer,
7878
): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowIPC(it, nullability = nullability) }
7979

80+
/** Path overload for reading Arrow IPC from file path. */
81+
public fun DataFrame.Companion.readArrowIPC(
82+
path: Path,
83+
nullability: NullabilityOptions = NullabilityOptions.Infer,
84+
): AnyFrame = Files.newByteChannel(path).use { readArrowIPC(it, nullability = nullability) }
85+
8086
/**
8187
* Read [Arrow interprocess streaming format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-streaming-format) data from existing [byteArray]
8288
*/
@@ -130,6 +136,12 @@ public fun DataFrame.Companion.readArrowFeather(
130136
nullability: NullabilityOptions = NullabilityOptions.Infer,
131137
): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowFeather(it, nullability = nullability) }
132138

139+
/** Path overload for reading Arrow Feather from file path. */
140+
public fun DataFrame.Companion.readArrowFeather(
141+
path: Path,
142+
nullability: NullabilityOptions = NullabilityOptions.Infer,
143+
): AnyFrame = Files.newByteChannel(path).use { readArrowFeather(it, nullability = nullability) }
144+
133145
/**
134146
* Read [Arrow random access format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-random-access-files) data from existing [byteArray]
135147
*/

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowWriting.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import org.jetbrains.kotlinx.dataframe.AnyFrame
55
import java.io.File
66
import java.io.OutputStream
77
import java.nio.channels.WritableByteChannel
8+
import java.nio.file.Path
89

910
/**
1011
* Create [ArrowWriter] for [this] DataFrame with target schema matching actual data
@@ -51,6 +52,13 @@ public fun AnyFrame.writeArrowIPC(file: File, append: Boolean = true) {
5152
}
5253
}
5354

55+
/** Path overload for IPC writing. */
56+
public fun AnyFrame.writeArrowIPC(path: Path, append: Boolean = true) {
57+
this.arrowWriter().use { writer ->
58+
writer.writeArrowIPC(path, append)
59+
}
60+
}
61+
5462
/**
5563
* Save data to [Arrow interprocess streaming format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-streaming-format), write to new [ByteArray]
5664
*/
@@ -89,6 +97,13 @@ public fun AnyFrame.writeArrowFeather(file: File) {
8997
}
9098
}
9199

100+
/** Path overload for Feather writing. */
101+
public fun AnyFrame.writeArrowFeather(path: Path) {
102+
this.arrowWriter().use { writer ->
103+
writer.writeArrowFeather(path)
104+
}
105+
}
106+
92107
/**
93108
* Save data to [Arrow random access format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-random-access-files), write to new [ByteArray]
94109
*/

dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ import java.io.InputStream
3939
import java.io.OutputStream
4040
import java.net.URL
4141
import java.nio.file.Files
42+
import java.nio.file.Path
43+
import kotlin.io.path.outputStream
44+
import kotlin.io.path.inputStream
4245
import java.util.Calendar
4346
import java.time.LocalDate as JavaLocalDate
4447
import java.time.LocalDateTime as JavaLocalDateTime
@@ -627,24 +630,38 @@ public fun <T> DataFrame<T>.writeExcel(
627630
writeHeader: Boolean = true,
628631
workBookType: WorkBookType = WorkBookType.XLSX,
629632
keepFile: Boolean = false,
633+
): Unit = writeExcel(
634+
path = file.toPath(),
635+
columnsSelector = columnsSelector,
636+
sheetName = sheetName,
637+
writeHeader = writeHeader,
638+
workBookType = workBookType,
639+
keepFile = keepFile,
640+
)
641+
642+
/** Path overload for writing this DataFrame to an Excel file. */
643+
public fun <T> DataFrame<T>.writeExcel(
644+
path: Path,
645+
columnsSelector: ColumnsSelector<T, *> = { all() },
646+
sheetName: String? = null,
647+
writeHeader: Boolean = true,
648+
workBookType: WorkBookType = WorkBookType.XLSX,
649+
keepFile: Boolean = false,
630650
) {
631651
val factory =
632-
// Write to an existing file with `keepFile` flag
633-
if (keepFile && file.exists() && file.length() > 0L) {
634-
val fis = file.inputStream()
652+
if (keepFile && Files.exists(path) && Files.size(path) > 0L) {
653+
val fis = path.inputStream()
635654
when (workBookType) {
636655
WorkBookType.XLS -> HSSFWorkbook(fis)
637656
WorkBookType.XLSX -> XSSFWorkbook(fis)
638657
}
639658
} else {
640659
when (workBookType) {
641660
WorkBookType.XLS -> HSSFWorkbook()
642-
643-
// Use streaming mode for a new XLSX file
644661
WorkBookType.XLSX -> SXSSFWorkbook()
645662
}
646663
}
647-
return file.outputStream().use {
664+
return path.outputStream().use {
648665
writeExcel(it, columnsSelector, sheetName, writeHeader, factory)
649666
}
650667
}

dataframe-geo/src/main/kotlin/org/jetbrains/kotlinx/dataframe/geo/io/write.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,19 @@ import org.geotools.geojson.feature.FeatureJSON
88
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
99
import org.jetbrains.kotlinx.dataframe.geo.geotools.toSimpleFeatureCollection
1010
import java.io.File
11+
import java.nio.file.Files
12+
import java.nio.file.Path
1113

1214
fun GeoDataFrame<*>.writeGeoJson(path: String): Unit = writeGeoJson(File(path))
1315

16+
/** Path overload for writing GeoJSON */
17+
fun GeoDataFrame<*>.writeGeoJson(path: Path) {
18+
val featureJSON = FeatureJSON()
19+
Files.newOutputStream(path).use { outputStream ->
20+
featureJSON.writeFeatureCollection(toSimpleFeatureCollection(), outputStream)
21+
}
22+
}
23+
1424
fun GeoDataFrame<*>.writeGeoJson(file: File) {
1525
// TODO: adds ids that breaks order of reading
1626
val featureJSON = FeatureJSON()
@@ -21,6 +31,11 @@ fun GeoDataFrame<*>.writeGeoJson(file: File) {
2131

2232
fun GeoDataFrame<*>.writeShapefile(directoryPath: String): Unit = writeShapefile(File(directoryPath))
2333

34+
/** Path overload for writing Shapefile to a directory */
35+
fun GeoDataFrame<*>.writeShapefile(directory: Path) {
36+
writeShapefile(directory.toFile())
37+
}
38+
2439
fun GeoDataFrame<*>.writeShapefile(directory: File) {
2540
if (!directory.exists()) {
2641
directory.mkdirs()

dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_C
2929
import java.io.File
3030
import java.io.InputStream
3131
import java.net.URL
32+
import java.nio.file.Path
33+
import kotlin.io.path.writeText
3234
import kotlin.reflect.typeOf
3335

3436
public class JSON(
@@ -147,6 +149,15 @@ public fun DataFrame.Companion.readJson(
147149
unifyNumbers: Boolean = true,
148150
): AnyFrame = DataFrame.readJson(file.toURI().toURL(), header, keyValuePaths, typeClashTactic, unifyNumbers)
149151

152+
/** Path overload for reading JSON into DataFrame. */
153+
public fun DataFrame.Companion.readJson(
154+
path: Path,
155+
header: List<String> = emptyList(),
156+
keyValuePaths: List<JsonPath> = emptyList(),
157+
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
158+
unifyNumbers: Boolean = true,
159+
): AnyFrame = DataFrame.readJson(path.toUri().toURL(), header, keyValuePaths, typeClashTactic, unifyNumbers)
160+
150161
/**
151162
* @param file Where to fetch the Json as [InputStream] to be converted to a [DataRow].
152163
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
@@ -164,6 +175,15 @@ public fun DataRow.Companion.readJson(
164175
unifyNumbers: Boolean = true,
165176
): AnyRow = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
166177

178+
/** Path overload for reading JSON into DataRow. */
179+
public fun DataRow.Companion.readJson(
180+
path: Path,
181+
header: List<String> = emptyList(),
182+
keyValuePaths: List<JsonPath> = emptyList(),
183+
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
184+
unifyNumbers: Boolean = true,
185+
): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
186+
167187
/**
168188
* @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataFrame].
169189
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
@@ -412,6 +432,10 @@ public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) {
412432
file.writeText(toJson(prettyPrint))
413433
}
414434

435+
public fun AnyFrame.writeJson(path: Path, prettyPrint: Boolean = false) {
436+
path.writeText(toJson(prettyPrint))
437+
}
438+
415439
public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false): Unit = writeJson(File(path), prettyPrint)
416440

417441
public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) {
@@ -422,6 +446,10 @@ public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) {
422446
file.writeText(toJson(prettyPrint))
423447
}
424448

449+
public fun AnyRow.writeJson(path: Path, prettyPrint: Boolean = false) {
450+
path.writeText(toJson(prettyPrint))
451+
}
452+
425453
public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) {
426454
writeJson(File(path), prettyPrint)
427455
}

0 commit comments

Comments
 (0)