diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt index de2eef8419..88f93ffa17 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt @@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.compileTimeSchemaImpl import org.jetbrains.kotlinx.dataframe.impl.owner import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema -import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema // region DataRow @@ -26,5 +26,5 @@ public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema() // endregion -@Suppress("UnusedReceiverParameter") -public inline fun DataFrame.compileTimeSchema(): DataFrameSchema = getSchema(T::class) +public inline fun DataFrame.compileTimeSchema(): DataFrameSchema = + compileTimeSchemaImpl(schema(), T::class) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/schema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/schema.kt new file mode 100644 index 0000000000..6b80262891 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/schema.kt @@ -0,0 +1,55 @@ +package org.jetbrains.kotlinx.dataframe.impl.api + +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath +import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl +import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import kotlin.reflect.KClass + +@PublishedApi +internal fun compileTimeSchemaImpl(runtimeSchema: DataFrameSchema, klass: KClass<*>): DataFrameSchema { + val compileSchema = getSchema(klass) + val root = ColumnPath(emptyList()) + val order = buildMap { + putColumnsOrder(runtimeSchema, path = root) + } + return compileSchema.sortedBy(order, path = root) +} + +internal fun MutableMap.putColumnsOrder(schema: DataFrameSchema, path: ColumnPath) { + schema.columns.entries.forEachIndexed { i, (name, column) -> + val columnPath = path + name + this[columnPath] = i + when (column) { + is ColumnSchema.Frame -> { + putColumnsOrder(column.schema, columnPath) + } + + is ColumnSchema.Group -> { + putColumnsOrder(column.schema, columnPath) + } + } + } +} + +internal fun DataFrameSchema.sortedBy(order: Map, path: ColumnPath): DataFrameSchema { + val sorted = columns.map { (name, column) -> + name to when (column) { + is ColumnSchema.Frame -> ColumnSchema.Frame( + column.schema.sortedBy(order, path + name), + column.nullable, + column.contentType, + ) + + is ColumnSchema.Group -> ColumnSchema.Group(column.schema.sortedBy(order, path + name), column.contentType) + + is ColumnSchema.Value -> column + + else -> TODO("unexpected ColumnSchema class ${column::class}") + } + }.sortedBy { (name, _) -> + order[path + name] + }.toMap() + return DataFrameSchemaImpl(sorted) +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt new file mode 100644 index 0000000000..f76ad6b27c --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt @@ -0,0 +1,26 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataRow +import org.junit.Test + +class SchemaTests { + @Test + fun `columns order test`() { + val row = dataFrameOf("c", "b")(4, 5).first() + val df = dataFrameOf("abc", "a", "a123", "nested")(1, 2, 3, row).cast() + df.schema().toString() shouldBe df.compileTimeSchema().toString() + } +} + +private interface Schema { + val a: Int + val abc: Int + val a123: Int + val nested: DataRow +} + +private interface Nested { + val b: Int + val c: Int +}