Skip to content

Commit

Permalink
Merge pull request #990 from Kotlin/compileTimeSchemaOrder
Browse files Browse the repository at this point in the history
Sort df.compileTimeSchema() columns according to df.schema() so they're easier to compare
  • Loading branch information
koperagen authored Jan 29, 2025
2 parents 651cbae + 791dfb7 commit 72354c5
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 3 deletions.
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -9799,6 +9799,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/MapKt {
public static final fun mapNotNullValues (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
public static final fun compileTimeSchemaImpl (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.compileTimeSchemaImpl
import org.jetbrains.kotlinx.dataframe.impl.owner
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema

// region DataRow
Expand All @@ -26,5 +26,5 @@ public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema()

// endregion

@Suppress("UnusedReceiverParameter")
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema = getSchema(T::class)
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema =
compileTimeSchemaImpl(schema(), T::class)
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import kotlin.reflect.KClass

@PublishedApi
internal fun compileTimeSchemaImpl(runtimeSchema: DataFrameSchema, klass: KClass<*>): DataFrameSchema {
val compileSchema = getSchema(klass)
val root = ColumnPath(emptyList())
val order = buildMap {
putColumnsOrder(runtimeSchema, path = root)
}
return compileSchema.sortedBy(order, path = root)
}

internal fun MutableMap<ColumnPath, Int>.putColumnsOrder(schema: DataFrameSchema, path: ColumnPath) {
schema.columns.entries.forEachIndexed { i, (name, column) ->
val columnPath = path + name
this[columnPath] = i
when (column) {
is ColumnSchema.Frame -> {
putColumnsOrder(column.schema, columnPath)
}

is ColumnSchema.Group -> {
putColumnsOrder(column.schema, columnPath)
}
}
}
}

internal fun DataFrameSchema.sortedBy(order: Map<ColumnPath, Int>, path: ColumnPath): DataFrameSchema {
val sorted = columns.map { (name, column) ->
name to when (column) {
is ColumnSchema.Frame -> ColumnSchema.Frame(
column.schema.sortedBy(order, path + name),
column.nullable,
column.contentType,
)

is ColumnSchema.Group -> ColumnSchema.Group(column.schema.sortedBy(order, path + name), column.contentType)

is ColumnSchema.Value -> column

else -> TODO("unexpected ColumnSchema class ${column::class}")
}
}.sortedBy { (name, _) ->
order[path + name]
}.toMap()
return DataFrameSchemaImpl(sorted)
}
26 changes: 26 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataRow
import org.junit.Test

class SchemaTests {
@Test
fun `columns order test`() {
val row = dataFrameOf("c", "b")(4, 5).first()
val df = dataFrameOf("abc", "a", "a123", "nested")(1, 2, 3, row).cast<Schema>()
df.schema().toString() shouldBe df.compileTimeSchema().toString()
}
}

private interface Schema {
val a: Int
val abc: Int
val a123: Int
val nested: DataRow<Nested>
}

private interface Nested {
val b: Int
val c: Int
}

0 comments on commit 72354c5

Please sign in to comment.