From e46883bca40929b4a2c59f2c457158afde044b21 Mon Sep 17 00:00:00 2001 From: Kopilov Aleksandr Date: Mon, 2 Sep 2024 17:39:57 +0300 Subject: [PATCH] Allow reading ViewVarCharVector and ViewVarBinaryVector from Arrow files --- .../kotlinx/dataframe/io/arrowReadingImpl.kt | 29 +++++++++++++++++-- gradle/libs.versions.toml | 2 +- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index 20c924f6ec..5854e20b3a 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -31,6 +31,8 @@ import org.apache.arrow.vector.UInt8Vector import org.apache.arrow.vector.VarBinaryVector import org.apache.arrow.vector.VarCharVector import org.apache.arrow.vector.VectorSchemaRoot +import org.apache.arrow.vector.ViewVarBinaryVector +import org.apache.arrow.vector.ViewVarCharVector import org.apache.arrow.vector.complex.StructVector import org.apache.arrow.vector.ipc.ArrowFileReader import org.apache.arrow.vector.ipc.ArrowReader @@ -207,6 +209,25 @@ private fun VarCharVector.values(range: IntRange): List = } } +private fun LargeVarCharVector.values(range: IntRange): List = + range.map { + if (isNull(it)) { + null + } else { + String(get(it)) + } + } + +private fun ViewVarCharVector.values(range: IntRange): List = + range.map { + if (isNull(it)) { + null + } else { + String(get(it)) + } + } + + private fun VarBinaryVector.values(range: IntRange): List = range.map { if (isNull(it)) { @@ -225,12 +246,12 @@ private fun LargeVarBinaryVector.values(range: IntRange): List = } } -private fun LargeVarCharVector.values(range: IntRange): List = +private fun ViewVarBinaryVector.values(range: IntRange): List = range.map { if (isNull(it)) { null } else { - String(get(it)) + get(it) } } @@ -266,10 +287,14 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is LargeVarCharVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is ViewVarCharVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is VarBinaryVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is LargeVarBinaryVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is ViewVarBinaryVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is BitVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is SmallIntVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index b14609930b..85e8a9517b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -48,7 +48,7 @@ junit-platform = "1.10.2" kotestAsserions = "5.5.4" jsoup = "1.17.2" -arrow = "15.0.0" +arrow = "17.0.0" docProcessor = "0.3.10" simpleGit = "2.0.3" dependencyVersions = "0.51.0"